general_knowledge.yml•1.52 kB
# Configuration optimized for general knowledge content
# Examples: Wikipedia articles, encyclopedias, general information
# Main path configuration
path: .txtai/kb-general
content: true
writable: true
# Text extraction configuration
textractor:
paragraphs: true
sentences: false
minlength: 75 # General content has medium-length passages
cleantext: true
backend: "text"
# Embedding configuration
embeddings:
# General-purpose model with good performance on diverse content
path: sentence-transformers/all-mpnet-base-v2 # Strong general-purpose model
content: true
normalize: true
hybrid: true
gpu: true
# Storage configuration
writable: true
backend: faiss
storagetype: sqlite
# Scoring methods - balanced for general content
scoring:
method: bm25
normalize: true
terms:
cachelimit: 1000000000
cutoff: 0.001
# Graph configuration - optimized for conceptual networks
graph:
backend: "networkx"
batchsize: 256
limit: 12
minscore: 0.35
approximate: true
topics:
algorithm: "louvain"
terms: 5
centrality: "pagerank" # Good for authority in general knowledge
directed: true
weight: "similarity"
# Graph search options
search:
max_hops: 2
use_centrality: true
min_score: 0.3
# Search configuration - balanced for general queries
search:
limit: 7
minscore: 0.3
rerank: true
weights:
bm25: 0.5
similarity: 0.5 # Equal weighting for general knowledge