Go Library API Reference

Complete reference for github.com/gleicon/tldt/pkg/tldt. Stateless, thread-safe functions for text summarization, security screening, and PII detection. Zero global mutable state.

go get
$ go get github.com/gleicon/tldt/pkg/tldt

Basic usage.

Basic Summarization
package main

import (
    "fmt"
    "log"

    "github.com/gleicon/tldt/pkg/tldt"
)

func main() {
    // Simple summarization with defaults
    result, err := tldt.Summarize("Your long text here...",
        tldt.SummarizeOptions{})
    if err != nil {
        log.Fatal(err)
    }

    fmt.Println(result.Summary)
    // Output: The most representative sentences from the text
}
Security Pipeline
// Full pipeline for untrusted input before LLM context
result, err := tldt.Pipeline("untrusted text", tldt.PipelineOptions{
    Sanitize:    true,  // Strip invisible Unicode
    SanitizePII: true,  // Redact emails, API keys, JWTs
    DetectPII:   true,  // Report PII findings
    Detect: tldt.DetectOptions{
        OutlierThreshold: 0.85,
    },
    Summarize: tldt.SummarizeOptions{
        Algorithm: "ensemble",
        Sentences: 5,
    },
})

// Access findings
fmt.Printf("Redactions: %d\n", result.Redactions)
for _, f := range result.PIIFindings {
    fmt.Printf("Found %s on line %d\n", f.Pattern, f.Line)
}

Function reference.

func Summarize
func Summarize(text string, opts SummarizeOptions) (Result, error)

Runs extractive summarization on text. Returns the summary and token statistics.

Algorithms: "lexrank" (default), "textrank", "graph", "ensemble"

result, _ := tldt.Summarize("text", tldt.SummarizeOptions{
    Algorithm: "ensemble",
    Sentences: 3,
})
// result.Summary, result.TokensIn, result.TokensOut, result.Reduction
func Pipeline
func Pipeline(text string, opts PipelineOptions) (PipelineResult, error)

Complete processing pipeline. Runs stages in order: Sanitize (Unicode) → SanitizePII → DetectPII → Detect (injection) → Summarize.

Returns full results including summary, warnings, PII findings, and redaction counts.

func Fetch
func Fetch(url string, opts FetchOptions) (FetchResult, error)

Fetches a URL, extracts article text using readability algorithm. Includes SSRF protection against private IPs and cloud metadata endpoints.

Returns HTTP metadata (status, content-type, final URL after redirects) alongside extracted text.

Sentinel errors: ErrSSRFBlocked, ErrRedirectLimit, ErrHTTPError, ErrNonHTML

func Detect
func Detect(text string, opts DetectOptions) (DetectResult, error)

Detects prompt injection patterns without modifying text. Returns findings and warning strings for stderr output.

func Sanitize
func Sanitize(text string) (string, SanitizeReport, error)

Strips invisible Unicode characters and applies NFKC normalization. Reports what was removed.

func DetectPII
func DetectPII(text string) []PIIFinding

Scans text for PII/secrets: emails, API keys (Bearer, sk-, AIza, AKIA), JWTs, credit cards. Returns findings with pattern type, excerpt, and line number.

func SanitizePII
func SanitizePII(text string) (string, []PIIFinding)

Redacts PII matches with [REDACTED:pattern] placeholders. Returns redacted text and findings.

func ConvertHTML
func ConvertHTML(html string, opts HTMLConvertOptions) (string, error)

Converts HTML content to clean Markdown text using readability content extraction and html-to-markdown.

Removes navigation, ads, and boilerplate. Useful for processing HTML from curl commands or web scraping.

Types and options.

SummarizeOptions
type SummarizeOptions struct {
    Algorithm string  // "lexrank"|"textrank"|"graph"|"ensemble" (default: "lexrank")
    Sentences int     // number of output sentences (default: 5)
}
PipelineOptions
type PipelineOptions struct {
    Summarize   SummarizeOptions
    Detect      DetectOptions
    Sanitize    bool  // run Unicode sanitizer before detection/summarization
    DetectPII   bool  // run PII detection stage (text unchanged)
    SanitizePII bool  // run PII redaction stage (text redacted; implies detection)
}
PipelineResult
type PipelineResult struct {
    Summary     string
    TokensIn    int
    TokensOut   int
    Reduction   int           // percentage
    Warnings    []string      // human-readable WARNING lines
    Redactions  int           // count of PII redactions
    PIIFindings []PIIFinding  // nil when no PII flags enabled
}
PIIFinding
type PIIFinding struct {
    Pattern string  // "email", "api-key", "jwt", "credit-card"
    Excerpt string  // truncated to 12 chars + "..."
    Line    int     // 1-based line number
}
FetchResult
type FetchResult struct {
    Text        string  // Extracted article text
    StatusCode  int     // HTTP status code (after redirects)
    ContentType string  // Response Content-Type header
    FinalURL    string  // Final URL after all redirects
}
HTMLConvertOptions
type HTMLConvertOptions struct {
    ExtractContent bool  // use readability to extract main content (default: true)
    IncludeTitle     bool  // include article title as H1 (default: true)
    MaxLength        int   // limit output length, 0 = unlimited (default: 0)
}
Sentinel Errors
var (
    ErrSSRFBlocked   = errors.New("tldt: SSRF blocked: private or reserved IP address")
    ErrRedirectLimit = errors.New("tldt: redirect limit exceeded")
    ErrHTTPError     = errors.New("tldt: HTTP error")
    ErrNonHTML       = errors.New("tldt: not HTML content")
)

// Usage with errors.Is
if errors.Is(err, tldt.ErrSSRFBlocked) {
    // Handle SSRF block
}

Usage patterns.

Error Handling
result, err := tldt.Fetch("https://example.com/doc", tldt.FetchOptions{})
if err != nil {
    switch {
    case errors.Is(err, tldt.ErrSSRFBlocked):
        log.Fatal("SSRF protection triggered")
    case errors.Is(err, tldt.ErrRedirectLimit):
        log.Fatal("Too many redirects")
    default:
        log.Fatal(err)
    }
}
Concurrency
// All tldt functions are safe for concurrent use
var wg sync.WaitGroup
for _, doc := range documents {
    wg.Add(1)
    go func(text string) {
        defer wg.Done()
        result, _ := tldt.Summarize(text,
            tldt.SummarizeOptions{Sentences: 3})
        // Process result
    }(doc)
}
wg.Wait()
Pre-LLM Processing
// Clean and summarize before sending to LLM
func prepareForLLM(input string) (string, error) {
    result, err := tldt.Pipeline(input, tldt.PipelineOptions{
        Sanitize:    true,   // Remove invisible characters
        SanitizePII: true,   // Redact secrets
        DetectPII:   true,   // Log what was found
        Summarize: tldt.SummarizeOptions{
            Algorithm: "ensemble",
            Sentences: 10,   // Keep more context for LLM
        },
    })
    if err != nil {
        return "", err
    }

    // Log security findings
    for _, f := range result.PIIFindings {
        log.Printf("PII found: %s on line %d", f.Pattern, f.Line)
    }

    return result.Summary, nil
}
HTML to Markdown Conversion
// Process HTML from curl or web scraping
html, _ := os.ReadFile("article.html")

markdown, err := tldt.ConvertHTML(string(html), tldt.HTMLConvertOptions{
    ExtractContent: true,   // Remove nav, ads, boilerplate
    IncludeTitle:     true,   // Add article title
})
if err != nil {
    log.Fatal(err)
}

// Now summarize the clean Markdown
result, _ := tldt.Summarize(markdown, tldt.SummarizeOptions{
    Sentences: 5,
})
Back to main site

Version 1.2.0 · MIT License · github.com/gleicon/tldt