Complete reference for github.com/gleicon/tldt/pkg/tldt.
Stateless, thread-safe functions for text summarization, security screening,
and PII detection. Zero global mutable state.
$ go get github.com/gleicon/tldt/pkg/tldt
package main import ( "fmt" "log" "github.com/gleicon/tldt/pkg/tldt" ) func main() { // Simple summarization with defaults result, err := tldt.Summarize("Your long text here...", tldt.SummarizeOptions{}) if err != nil { log.Fatal(err) } fmt.Println(result.Summary) // Output: The most representative sentences from the text }
// Full pipeline for untrusted input before LLM context result, err := tldt.Pipeline("untrusted text", tldt.PipelineOptions{ Sanitize: true, // Strip invisible Unicode SanitizePII: true, // Redact emails, API keys, JWTs DetectPII: true, // Report PII findings Detect: tldt.DetectOptions{ OutlierThreshold: 0.85, }, Summarize: tldt.SummarizeOptions{ Algorithm: "ensemble", Sentences: 5, }, }) // Access findings fmt.Printf("Redactions: %d\n", result.Redactions) for _, f := range result.PIIFindings { fmt.Printf("Found %s on line %d\n", f.Pattern, f.Line) }
Runs extractive summarization on text. Returns the summary and token statistics.
Algorithms: "lexrank" (default), "textrank", "graph", "ensemble"
result, _ := tldt.Summarize("text", tldt.SummarizeOptions{ Algorithm: "ensemble", Sentences: 3, }) // result.Summary, result.TokensIn, result.TokensOut, result.Reduction
Complete processing pipeline. Runs stages in order: Sanitize (Unicode) → SanitizePII → DetectPII → Detect (injection) → Summarize.
Returns full results including summary, warnings, PII findings, and redaction counts.
Fetches a URL, extracts article text using readability algorithm. Includes SSRF protection against private IPs and cloud metadata endpoints.
Returns HTTP metadata (status, content-type, final URL after redirects) alongside extracted text.
Sentinel errors: ErrSSRFBlocked, ErrRedirectLimit, ErrHTTPError, ErrNonHTML
Detects prompt injection patterns without modifying text. Returns findings and warning strings for stderr output.
Strips invisible Unicode characters and applies NFKC normalization. Reports what was removed.
Scans text for PII/secrets: emails, API keys (Bearer, sk-, AIza, AKIA), JWTs, credit cards. Returns findings with pattern type, excerpt, and line number.
Redacts PII matches with [REDACTED:pattern] placeholders. Returns redacted text and findings.
Converts HTML content to clean Markdown text using readability content extraction and html-to-markdown.
Removes navigation, ads, and boilerplate. Useful for processing HTML from curl commands or web scraping.
type SummarizeOptions struct { Algorithm string // "lexrank"|"textrank"|"graph"|"ensemble" (default: "lexrank") Sentences int // number of output sentences (default: 5) }
type PipelineOptions struct { Summarize SummarizeOptions Detect DetectOptions Sanitize bool // run Unicode sanitizer before detection/summarization DetectPII bool // run PII detection stage (text unchanged) SanitizePII bool // run PII redaction stage (text redacted; implies detection) }
type PipelineResult struct { Summary string TokensIn int TokensOut int Reduction int // percentage Warnings []string // human-readable WARNING lines Redactions int // count of PII redactions PIIFindings []PIIFinding // nil when no PII flags enabled }
type PIIFinding struct { Pattern string // "email", "api-key", "jwt", "credit-card" Excerpt string // truncated to 12 chars + "..." Line int // 1-based line number }
type FetchResult struct { Text string // Extracted article text StatusCode int // HTTP status code (after redirects) ContentType string // Response Content-Type header FinalURL string // Final URL after all redirects }
type HTMLConvertOptions struct { ExtractContent bool // use readability to extract main content (default: true) IncludeTitle bool // include article title as H1 (default: true) MaxLength int // limit output length, 0 = unlimited (default: 0) }
var ( ErrSSRFBlocked = errors.New("tldt: SSRF blocked: private or reserved IP address") ErrRedirectLimit = errors.New("tldt: redirect limit exceeded") ErrHTTPError = errors.New("tldt: HTTP error") ErrNonHTML = errors.New("tldt: not HTML content") ) // Usage with errors.Is if errors.Is(err, tldt.ErrSSRFBlocked) { // Handle SSRF block }
result, err := tldt.Fetch("https://example.com/doc", tldt.FetchOptions{}) if err != nil { switch { case errors.Is(err, tldt.ErrSSRFBlocked): log.Fatal("SSRF protection triggered") case errors.Is(err, tldt.ErrRedirectLimit): log.Fatal("Too many redirects") default: log.Fatal(err) } }
// All tldt functions are safe for concurrent use var wg sync.WaitGroup for _, doc := range documents { wg.Add(1) go func(text string) { defer wg.Done() result, _ := tldt.Summarize(text, tldt.SummarizeOptions{Sentences: 3}) // Process result }(doc) } wg.Wait()
// Clean and summarize before sending to LLM func prepareForLLM(input string) (string, error) { result, err := tldt.Pipeline(input, tldt.PipelineOptions{ Sanitize: true, // Remove invisible characters SanitizePII: true, // Redact secrets DetectPII: true, // Log what was found Summarize: tldt.SummarizeOptions{ Algorithm: "ensemble", Sentences: 10, // Keep more context for LLM }, }) if err != nil { return "", err } // Log security findings for _, f := range result.PIIFindings { log.Printf("PII found: %s on line %d", f.Pattern, f.Line) } return result.Summary, nil }
// Process HTML from curl or web scraping html, _ := os.ReadFile("article.html") markdown, err := tldt.ConvertHTML(string(html), tldt.HTMLConvertOptions{ ExtractContent: true, // Remove nav, ads, boilerplate IncludeTitle: true, // Add article title }) if err != nil { log.Fatal(err) } // Now summarize the clean Markdown result, _ := tldt.Summarize(markdown, tldt.SummarizeOptions{ Sentences: 5, })
Version 1.2.0 · MIT License · github.com/gleicon/tldt