Skip to main content
Glama
Southclaws

Storyden

by Southclaws
bleve_searcher.go14 kB
package bleve_search import ( "context" "fmt" "strings" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis" "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/registry" "github.com/blevesearch/bleve/v2/search" "github.com/blevesearch/bleve/v2/search/query" "github.com/rs/xid" "github.com/Southclaws/dt" "github.com/Southclaws/fault" "github.com/Southclaws/fault/fctx" "github.com/Southclaws/fault/fmsg" "github.com/Southclaws/opt" "github.com/Southclaws/storyden/app/resources/datagraph" "github.com/Southclaws/storyden/app/resources/datagraph/hydrate" "github.com/Southclaws/storyden/app/resources/pagination" "github.com/Southclaws/storyden/app/services/search/searcher" "github.com/Southclaws/storyden/internal/config" ) type Document struct { ID string `json:"id"` Kind string `json:"kind"` Name string `json:"name"` Slug string `json:"slug"` Description string `json:"description"` Content string `json:"content"` CreatedAt int64 `json:"created_at"` AuthorID string `json:"author_id"` CategoryID string `json:"category_id"` Tags []string `json:"tags"` } type BleveSearcher struct { client bleve.Index hydrator *hydrate.Hydrator } func New(ctx context.Context, cfg config.Config, hydrator *hydrate.Hydrator) (*BleveSearcher, error) { if cfg.SearchProvider != "bleve" { return nil, nil } if cfg.BlevePath == "" { return nil, fault.New("BLEVE_PATH is required when SEARCH_PROVIDER is set to 'bleve'") } index, err := openOrCreateIndex(cfg.BlevePath) if err != nil { return nil, fault.Wrap(err, fctx.With(ctx), fmsg.With("failed to open or create bleve index")) } return &BleveSearcher{ client: index, hydrator: hydrator, }, nil } func (s *BleveSearcher) Search(ctx context.Context, q string, p pagination.Parameters, opts searcher.Options) (*pagination.Result[datagraph.Item], error) { searchQuery := s.buildSearchQuery(q, opts) req := bleve.NewSearchRequestOptions(searchQuery, p.Size(), (p.PageOneIndexed()-1)*p.Size(), false) req.Fields = []string{"id", "kind", "name", "slug", "description", "created_at"} req.SortBy([]string{"-_score"}) result, err := s.client.Search(req) if err != nil { return nil, fault.Wrap(err, fctx.With(ctx)) } return s.processResults(ctx, result, p) } func (s *BleveSearcher) MatchFast(ctx context.Context, q string, limit int, opts searcher.Options) (datagraph.MatchList, error) { matchQuery := s.buildMatchQuery(q, opts) req := bleve.NewSearchRequestOptions(matchQuery, limit, 0, false) req.Fields = []string{"id", "kind", "name", "slug", "description", "created_at"} req.SortBy([]string{"-_score"}) result, err := s.client.Search(req) if err != nil { return nil, fault.Wrap(err, fctx.With(ctx)) } return dt.Reduce(result.Hits, func(acc datagraph.MatchList, hit *search.DocumentMatch) datagraph.MatchList { match, ok := s.matchFromHit(hit) if !ok { return acc } return append(acc, match) }, datagraph.MatchList{}), nil } func (s *BleveSearcher) processResults(ctx context.Context, result *bleve.SearchResult, p pagination.Parameters) (*pagination.Result[datagraph.Item], error) { refs := make([]*datagraph.Ref, 0, len(result.Hits)) for _, hit := range result.Hits { id, err := xid.FromString(hit.ID) if err != nil { continue } kindStr, ok := hit.Fields["kind"].(string) if !ok { continue } kind, err := datagraph.NewKind(kindStr) if err != nil { continue } refs = append(refs, &datagraph.Ref{ ID: id, Kind: kind, Relevance: hit.Score, }) } items, err := s.hydrator.Hydrate(ctx, refs...) if err != nil { return nil, fault.Wrap(err, fctx.With(ctx)) } totalPages := int(result.Total) / p.Size() if int(result.Total)%p.Size() > 0 { totalPages++ } nextPage := opt.NewEmpty[int]() if p.PageOneIndexed() < totalPages { nextPage = opt.New(p.PageOneIndexed() + 1) } return &pagination.Result[datagraph.Item]{ Size: p.Size(), Results: int(result.Total), TotalPages: totalPages, CurrentPage: p.PageOneIndexed(), NextPage: nextPage, Items: items, }, nil } func (s *BleveSearcher) buildSearchQuery(q string, opts searcher.Options) query.Query { var textQuery query.Query if strings.TrimSpace(q) == "" { textQuery = bleve.NewMatchAllQuery() } else { matchName := bleve.NewMatchQuery(q) matchName.SetField("name") matchSlug := bleve.NewMatchQuery(q) matchSlug.SetField("slug") matchDescription := bleve.NewMatchQuery(q) matchDescription.SetField("description") matchContent := bleve.NewMatchQuery(q) matchContent.SetField("content") textQuery = bleve.NewDisjunctionQuery( matchName, matchSlug, matchDescription, matchContent, ) } filters := []query.Query{} if kinds, ok := opts.Kinds.Get(); ok && len(kinds) > 0 { kindQueries := make([]query.Query, 0, len(kinds)) for _, k := range kinds { kq := bleve.NewTermQuery(k.String()) kq.SetField("kind") kindQueries = append(kindQueries, kq) } filters = append(filters, bleve.NewDisjunctionQuery(kindQueries...)) } if authors, ok := opts.Authors.Get(); ok && len(authors) > 0 { authorQueries := make([]query.Query, 0, len(authors)) for _, a := range authors { aq := bleve.NewTermQuery(a.String()) aq.SetField("author_id") authorQueries = append(authorQueries, aq) } filters = append(filters, bleve.NewDisjunctionQuery(authorQueries...)) } if categories, ok := opts.Categories.Get(); ok && len(categories) > 0 { categoryQueries := make([]query.Query, 0, len(categories)) for _, c := range categories { cq := bleve.NewTermQuery(c.String()) cq.SetField("category_id") categoryQueries = append(categoryQueries, cq) } filters = append(filters, bleve.NewDisjunctionQuery(categoryQueries...)) } if tags, ok := opts.Tags.Get(); ok && len(tags) > 0 { tagQueries := make([]query.Query, 0, len(tags)) for _, t := range tags { tq := bleve.NewTermQuery(t.String()) tq.SetField("tags") tagQueries = append(tagQueries, tq) } filters = append(filters, bleve.NewConjunctionQuery(tagQueries...)) } if len(filters) > 0 { allQueries := append([]query.Query{textQuery}, filters...) return bleve.NewConjunctionQuery(allQueries...) } return textQuery } func (s *BleveSearcher) buildMatchQuery(q string, opts searcher.Options) query.Query { lowercaseQ := strings.ToLower(q) tokens := strings.Fields(lowercaseQ) textQuery := bleve.NewBooleanQuery() for _, tok := range tokens { pq := bleve.NewPrefixQuery(tok) pq.SetField("name") textQuery.AddShould(pq) } filters := []query.Query{} if kinds, ok := opts.Kinds.Get(); ok && len(kinds) > 0 { kindQueries := make([]query.Query, 0, len(kinds)) for _, k := range kinds { kq := bleve.NewMatchQuery(k.String()) kq.SetField("kind") kindQueries = append(kindQueries, kq) } filters = append(filters, bleve.NewDisjunctionQuery(kindQueries...)) } if authors, ok := opts.Authors.Get(); ok && len(authors) > 0 { authorQueries := make([]query.Query, 0, len(authors)) for _, a := range authors { aq := bleve.NewMatchQuery(a.String()) aq.SetField("author_id") authorQueries = append(authorQueries, aq) } filters = append(filters, bleve.NewDisjunctionQuery(authorQueries...)) } if categories, ok := opts.Categories.Get(); ok && len(categories) > 0 { categoryQueries := make([]query.Query, 0, len(categories)) for _, c := range categories { cq := bleve.NewMatchQuery(c.String()) cq.SetField("category_id") categoryQueries = append(categoryQueries, cq) } filters = append(filters, bleve.NewDisjunctionQuery(categoryQueries...)) } if tags, ok := opts.Tags.Get(); ok && len(tags) > 0 { tagQueries := make([]query.Query, 0, len(tags)) for _, t := range tags { tq := bleve.NewMatchQuery(t.String()) tq.SetField("tags") tagQueries = append(tagQueries, tq) } filters = append(filters, bleve.NewConjunctionQuery(tagQueries...)) } if len(filters) > 0 { allQueries := append([]query.Query{textQuery}, filters...) return bleve.NewConjunctionQuery(allQueries...) } return textQuery } func (s *BleveSearcher) matchFromHit(hit *search.DocumentMatch) (datagraph.Match, bool) { id, err := xid.FromString(hit.ID) if err != nil { return datagraph.Match{}, false } kindStr, ok := hit.Fields["kind"].(string) if !ok { return datagraph.Match{}, false } kind, err := datagraph.NewKind(kindStr) if err != nil { return datagraph.Match{}, false } return datagraph.Match{ ID: id, Kind: kind, Slug: valueAsString(hit.Fields, "slug"), Name: valueAsString(hit.Fields, "name"), Description: valueAsString(hit.Fields, "description"), }, true } func valueAsString(fields map[string]any, key string) string { if v, ok := fields[key]; ok { if s, ok := v.(string); ok { return s } } return "" } func (s *BleveSearcher) Index(ctx context.Context, item datagraph.Item) error { doc := Document{ ID: item.GetID().String(), Kind: item.GetKind().String(), Name: item.GetName(), Slug: item.GetSlug(), Description: item.GetDesc(), Content: item.GetContent().Plaintext(), // We index plaintext only. CreatedAt: item.GetCreated().Unix(), } if v, ok := item.(datagraph.WithAuthor); ok { if v.GetAuthor().IsNil() { return fault.New("index: item has zero-value author ID") } doc.AuthorID = v.GetAuthor().String() } if v, ok := item.(datagraph.WithCategory); ok { if v.GetCategory().IsNil() { return fault.New("index: item has zero-value category ID") } doc.CategoryID = v.GetCategory().String() } if v, ok := item.(datagraph.WithTagNames); ok { doc.Tags = v.GetTags() } err := s.client.Index(item.GetID().String(), doc) if err != nil { return fault.Wrap(err, fctx.With(ctx), fmsg.With(fmt.Sprintf("failed to index document in bleve %s", item.GetID()))) } return nil } func (s *BleveSearcher) Deindex(ctx context.Context, ir datagraph.ItemRef) error { err := s.client.Delete(ir.GetID().String()) if err != nil { return fault.Wrap(err, fmsg.With(fmt.Sprintf("failed to delete document from bleve %s", ir.GetID()))) } return nil } func openOrCreateIndex(path string) (bleve.Index, error) { err := registry.RegisterAnalyzer("intl", InternationalAnalyser) if err != nil { return nil, fault.Wrap(err, fmsg.With("failed to register international analyzer")) } index, err := bleve.Open(path) if err == bleve.ErrorIndexPathDoesNotExist { indexMapping := createIndexMapping() index, err = bleve.New(path, indexMapping) if err != nil { return nil, fault.Wrap(err, fmsg.With("failed to create new bleve index")) } return index, nil } if err != nil { return nil, fault.Wrap(err, fmsg.With("failed to open existing bleve index")) } return index, nil } func createIndexMapping() mapping.IndexMapping { indexMapping := bleve.NewIndexMapping() indexMapping.DefaultAnalyzer = "intl" docMapping := bleve.NewDocumentMapping() idFieldMapping := bleve.NewTextFieldMapping() idFieldMapping.Store = true idFieldMapping.Index = false idFieldMapping.Analyzer = "keyword" docMapping.AddFieldMappingsAt("id", idFieldMapping) kindFieldMapping := bleve.NewTextFieldMapping() kindFieldMapping.Store = true kindFieldMapping.Index = true kindFieldMapping.Analyzer = "keyword" docMapping.AddFieldMappingsAt("kind", kindFieldMapping) nameFieldMapping := bleve.NewTextFieldMapping() nameFieldMapping.Store = true nameFieldMapping.Index = true nameFieldMapping.Analyzer = "intl" docMapping.AddFieldMappingsAt("name", nameFieldMapping) slugFieldMapping := bleve.NewTextFieldMapping() slugFieldMapping.Store = true slugFieldMapping.Index = true slugFieldMapping.Analyzer = "intl" docMapping.AddFieldMappingsAt("slug", slugFieldMapping) descFieldMapping := bleve.NewTextFieldMapping() descFieldMapping.Store = true descFieldMapping.Index = true descFieldMapping.Analyzer = "intl" docMapping.AddFieldMappingsAt("description", descFieldMapping) contentFieldMapping := bleve.NewTextFieldMapping() contentFieldMapping.Store = false contentFieldMapping.Index = true contentFieldMapping.Analyzer = "intl" docMapping.AddFieldMappingsAt("content", contentFieldMapping) createdAtFieldMapping := bleve.NewNumericFieldMapping() createdAtFieldMapping.Store = true createdAtFieldMapping.Index = true docMapping.AddFieldMappingsAt("created_at", createdAtFieldMapping) authorIDFieldMapping := bleve.NewTextFieldMapping() authorIDFieldMapping.Store = true authorIDFieldMapping.Index = true authorIDFieldMapping.Analyzer = "keyword" docMapping.AddFieldMappingsAt("author_id", authorIDFieldMapping) categoryIDFieldMapping := bleve.NewTextFieldMapping() categoryIDFieldMapping.Store = true categoryIDFieldMapping.Index = true categoryIDFieldMapping.Analyzer = "keyword" docMapping.AddFieldMappingsAt("category_id", categoryIDFieldMapping) tagsFieldMapping := bleve.NewTextFieldMapping() tagsFieldMapping.Store = true tagsFieldMapping.Index = true tagsFieldMapping.Analyzer = "keyword" docMapping.AddFieldMappingsAt("tags", tagsFieldMapping) indexMapping.DefaultMapping = docMapping return indexMapping } // InternationalAnalyser is a copy of the "standard" analyser but without the // use of English stopwords. This makes it suitable for any language. func InternationalAnalyser(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) { tokenizer, err := cache.TokenizerNamed(unicode.Name) if err != nil { return nil, err } toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) if err != nil { return nil, err } rv := analysis.DefaultAnalyzer{ Tokenizer: tokenizer, TokenFilters: []analysis.TokenFilter{ toLowerFilter, }, } return &rv, nil }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Southclaws/storyden'

If you have feedback or need assistance with the MCP directory API, please join our Discord server