tldt Library API — Go Documentation

quick start

Basic usage.

Basic Summarization

        package main

import (
    "fmt"
    "log"

    "github.com/gleicon/tldt/pkg/tldt"
)

func main() {
    // Simple summarization with defaults
    result, err := tldt.Summarize("Your long text here...",
        tldt.SummarizeOptions{})
    if err != nil {
        log.Fatal(err)
    }

    fmt.Println(result.Summary)
    // Output: The most representative sentences from the text
}
      

Security Pipeline

        // Full pipeline for untrusted input before LLM context
result, err := tldt.Pipeline("untrusted text", tldt.PipelineOptions{
    Sanitize:    true,  // Strip invisible Unicode
    SanitizePII: true,  // Redact emails, API keys, JWTs
    DetectPII:   true,  // Report PII findings
    Detect: tldt.DetectOptions{
        OutlierThreshold: 0.85,
    },
    Summarize: tldt.SummarizeOptions{
        Algorithm: "ensemble",
        Sentences: 5,
    },
})

// Access findings
fmt.Printf("Redactions: %d\n", result.Redactions)
for _, f := range result.PIIFindings {
    fmt.Printf("Found %s on line %d\n", f.Pattern, f.Line)
}
      

api reference

Function reference.

func Summarize

func Summarize(text string, opts SummarizeOptions) (Result, error)

Runs extractive summarization on text. Returns the summary and token statistics.

Algorithms: "lexrank" (default), "textrank", "graph", "ensemble"

            result, _ := tldt.Summarize("text", tldt.SummarizeOptions{
    Algorithm: "ensemble",
    Sentences: 3,
})
// result.Summary, result.TokensIn, result.TokensOut, result.Reduction
          

func Pipeline

func Pipeline(text string, opts PipelineOptions) (PipelineResult, error)

Complete processing pipeline. Runs stages in order: Sanitize (Unicode) → SanitizePII → DetectPII → Detect (injection) → Summarize.

Returns full results including summary, warnings, PII findings, and redaction counts.

func Fetch

func Fetch(url string, opts FetchOptions) (FetchResult, error)

Fetches a URL, extracts article text using readability algorithm. Includes SSRF protection against private IPs and cloud metadata endpoints.

Returns HTTP metadata (status, content-type, final URL after redirects) alongside extracted text.

Sentinel errors: ErrSSRFBlocked, ErrRedirectLimit, ErrHTTPError, ErrNonHTML

func Detect

func Detect(text string, opts DetectOptions) (DetectResult, error)

Detects prompt injection patterns without modifying text. Returns findings and warning strings for stderr output.

func Sanitize

func Sanitize(text string) (string, SanitizeReport, error)

Strips invisible Unicode characters and applies NFKC normalization. Reports what was removed.

func DetectPII

func DetectPII(text string) []PIIFinding

Scans text for PII/secrets: emails, API keys (Bearer, sk-, AIza, AKIA), JWTs, credit cards. Returns findings with pattern type, excerpt, and line number.

func SanitizePII

func SanitizePII(text string) (string, []PIIFinding)

Redacts PII matches with [REDACTED:pattern] placeholders. Returns redacted text and findings.

func ConvertHTML

func ConvertHTML(html string, opts HTMLConvertOptions) (string, error)

Converts HTML content to clean Markdown text using readability content extraction and html-to-markdown.

Removes navigation, ads, and boilerplate. Useful for processing HTML from curl commands or web scraping.

type reference

Types and options.

SummarizeOptions

        type SummarizeOptions struct {
    Algorithm string  // "lexrank"|"textrank"|"graph"|"ensemble" (default: "lexrank")
    Sentences int     // number of output sentences (default: 5)
}
      

PipelineOptions

        type PipelineOptions struct {
    Summarize   SummarizeOptions
    Detect      DetectOptions
    Sanitize    bool  // run Unicode sanitizer before detection/summarization
    DetectPII   bool  // run PII detection stage (text unchanged)
    SanitizePII bool  // run PII redaction stage (text redacted; implies detection)
}
      

PipelineResult

        type PipelineResult struct {
    Summary     string
    TokensIn    int
    TokensOut   int
    Reduction   int           // percentage
    Warnings    []string      // human-readable WARNING lines
    Redactions  int           // count of PII redactions
    PIIFindings []PIIFinding  // nil when no PII flags enabled
}
      

PIIFinding

        type PIIFinding struct {
    Pattern string  // "email", "api-key", "jwt", "credit-card"
    Excerpt string  // truncated to 12 chars + "..."
    Line    int     // 1-based line number
}
      

FetchResult

        type FetchResult struct {
    Text        string  // Extracted article text
    StatusCode  int     // HTTP status code (after redirects)
    ContentType string  // Response Content-Type header
    FinalURL    string  // Final URL after all redirects
}
      

HTMLConvertOptions

        type HTMLConvertOptions struct {
    ExtractContent bool  // use readability to extract main content (default: true)
    IncludeTitle     bool  // include article title as H1 (default: true)
    MaxLength        int   // limit output length, 0 = unlimited (default: 0)
}
      

Sentinel Errors

        var (
    ErrSSRFBlocked   = errors.New("tldt: SSRF blocked: private or reserved IP address")
    ErrRedirectLimit = errors.New("tldt: redirect limit exceeded")
    ErrHTTPError     = errors.New("tldt: HTTP error")
    ErrNonHTML       = errors.New("tldt: not HTML content")
)

// Usage with errors.Is
if errors.Is(err, tldt.ErrSSRFBlocked) {
    // Handle SSRF block
}
      

examples

Usage patterns.

Error Handling

        result, err := tldt.Fetch("https://example.com/doc", tldt.FetchOptions{})
if err != nil {
    switch {
    case errors.Is(err, tldt.ErrSSRFBlocked):
        log.Fatal("SSRF protection triggered")
    case errors.Is(err, tldt.ErrRedirectLimit):
        log.Fatal("Too many redirects")
    default:
        log.Fatal(err)
    }
}
      

Concurrency

        // All tldt functions are safe for concurrent use
var wg sync.WaitGroup
for _, doc := range documents {
    wg.Add(1)
    go func(text string) {
        defer wg.Done()
        result, _ := tldt.Summarize(text,
            tldt.SummarizeOptions{Sentences: 3})
        // Process result
    }(doc)
}
wg.Wait()
      

Pre-LLM Processing

        // Clean and summarize before sending to LLM
func prepareForLLM(input string) (string, error) {
    result, err := tldt.Pipeline(input, tldt.PipelineOptions{
        Sanitize:    true,   // Remove invisible characters
        SanitizePII: true,   // Redact secrets
        DetectPII:   true,   // Log what was found
        Summarize: tldt.SummarizeOptions{
            Algorithm: "ensemble",
            Sentences: 10,   // Keep more context for LLM
        },
    })
    if err != nil {
        return "", err
    }

    // Log security findings
    for _, f := range result.PIIFindings {
        log.Printf("PII found: %s on line %d", f.Pattern, f.Line)
    }

    return result.Summary, nil
}
      

HTML to Markdown Conversion

        // Process HTML from curl or web scraping
html, _ := os.ReadFile("article.html")

markdown, err := tldt.ConvertHTML(string(html), tldt.HTMLConvertOptions{
    ExtractContent: true,   // Remove nav, ads, boilerplate
    IncludeTitle:     true,   // Add article title
})
if err != nil {
    log.Fatal(err)
}

// Now summarize the clean Markdown
result, _ := tldt.Summarize(markdown, tldt.SummarizeOptions{
    Sentences: 5,
})
      

Back to main site

Version 1.2.0 · MIT License · github.com/gleicon/tldt

Go Library API Reference

Basic usage.

Function reference.

Types and options.

Usage patterns.