Skip to main content
Glama

Storyden

by Southclaws
Mozilla Public License 2.0
229
extract_test.go17 kB
package asker import ( "strings" "testing" "github.com/stretchr/testify/assert" "github.com/Southclaws/storyden/app/services/semdex" ) func Test_streamExtractor(t *testing.T) { t.Run("basic", func(t *testing.T) { a := assert.New(t) stream := []string{ "For", " finance data", ", several", " datasets can", " be useful", " depending on", " the specific", " needs of", " your project", ". Here", " are some", " recommendations:\n\n", "1.", " **Credit", " Analytics Dataset", " by S", "\u0026P Global", "**: This", " dataset provides", " a comprehensive", " range of", " credit risk", " indicators,", " including market", "-based and", " fundamental-based", " scores.", " It covers", " over ", "881,", "000 public", " and private", " companies across", " 160", " industries,", " 183", " countries,", " and ", "241 S", "\u0026P Dow", " Jones Indices", ". It", " includes pre", "-calculated", " probability of", " default and", " credit scores", ", early", " warning indicators", " for credit", " deterioration,", " and systematic", " risk factors", " like country", " and industry", " risk scores", ".\n\n2", ". **", "Prosper", " Loan Dataset", " on Kag", "gle**:", " This dataset", " contains information", " on loan", " origination", " dates,", " original amounts", ", payments", ", returns", ", and", " loss rates", ". It", " is useful", " for analyzing", " loan performance", " and can", " be used", " to create", " a portfolio", " project in", " finance analytics", ". The", " dataset includes", " metrics viable", " from ", "2009", ", and", " it can", " be filtered", " to exclude", " missing values", " and focus", " on specific", " time periods", ".\n\n3", ". **", "Finance and", " Risk Analytics", " Dataset on", " GitHub**:", " This dataset", " contains ", "6 years", " of weekly", " stock information", " for ", "10 different", " Indian stocks", ". It", " can be", " used for", " market risk", " analysis and", " includes data", " on stock", " prices,", " returns,", " and share", " insights.", " This dataset", " is particularly", " useful for", " those interested", " in stock", " market data", " and risk", " analysis.\n\n", "4.", " **Data", "hub.io", "**: This", " platform offers", " a variety", " of business", " and finance", " datasets,", " including stock", " market data", ", property", " prices,", " inflation,", " and logistics", ". The", " data is", " often updated", " monthly or", " daily,", " providing a", " constant flow", " of real", "-time information", ".\n\nThese", " datasets can", " be used", " to create", " a portfolio", " project in", " finance analytics", ", covering", " various aspects", " of finance", " such as", " credit risk", ", loan", " performance,", " and market", " risk.\n\n", "### Sources", "\n-", " https://", "www.market", "place.s", "pglobal", ".com/en", "/datasets", "/credit", "-analytics-(", "146)\n", "- https", "://www", ".youtube.com", "/watch?v", "=rS", "0NU", "ngQ", "cbU", "\n-", " https://", "github.com", "/Honey", "28Git", "/Finance", "-and-R", "isk-An", "alytics\n", "- https", "://career", "foundry", ".com/en", "/blog/data", "-analytics/", "where-to", "-find-free", "-datasets", "/", } iter := func(yield func(string, error) bool) { for _, v := range stream { if !yield(v, nil) { return } } } acc := []semdex.AskResponseChunk{} buf := "" meta := semdex.AskResponseChunkMeta{} for i := range streamExtractor(iter) { switch v := i.(type) { case *semdex.AskResponseChunkText: buf += v.Chunk case *semdex.AskResponseChunkMeta: meta = *v } acc = append(acc, i) } a.Equal(strings.Join(stream, ""), buf) a.Len(meta.URLs, 4) }) t.Run("basic_sdrs", func(t *testing.T) { a := assert.New(t) stream := []string{ "For", " finance data", ", several", " datasets can", " be useful", " depending on", " the specific", " needs of", " your project", ". Here", " are some", " recommendations:\n\n", "1.", " **Credit", " Analytics Dataset", " by S", "\u0026P Global", "**: This", " dataset provides", " a comprehensive", " range of", " credit risk", " indicators,", " including market", "-based and", " fundamental-based", " scores.", " It covers", " over ", "881,", "000 public", " and private", " companies across", " 160", " industries,", " 183", " countries,", " and ", "241 S", "\u0026P Dow", " Jones Indices", ". It", " includes pre", "-calculated", " probability of", " default and", " credit scores", ", early", " warning indicators", " for credit", " deterioration,", " and systematic", " risk factors", " like country", " and industry", " risk scores", ".\n\n2", ". **", "Prosper", " Loan Dataset", " on Kag", "gle**:", " This dataset", " contains information", " on loan", " origination", " dates,", " original amounts", ", payments", ", returns", ", and", " loss rates", ". It", " is useful", " for analyzing", " loan performance", " and can", " be used", " to create", " a portfolio", " project in", " finance analytics", ". The", " dataset includes", " metrics viable", " from ", "2009", ", and", " it can", " be filtered", " to exclude", " missing values", " and focus", " on specific", " time periods", ".\n\n3", ". **", "Finance and", " Risk Analytics", " Dataset on", " GitHub**:", " This dataset", " contains ", "6 years", " of weekly", " stock information", " for ", "10 different", " Indian stocks", ". It", " can be", " used for", " market risk", " analysis and", " includes data", " on stock", " prices,", " returns,", " and share", " insights.", " This dataset", " is particularly", " useful for", " those interested", " in stock", " market data", " and risk", " analysis.\n\n", "4.", " **Data", "hub.io", "**: This", " platform offers", " a variety", " of business", " and finance", " datasets,", " including stock", " market data", ", property", " prices,", " inflation,", " and logistics", ". The", " data is", " often updated", " monthly or", " daily,", " providing a", " constant flow", " of real", "-time information", ".\n\nThese", " datasets can", " be used", " to create", " a portfolio", " project in", " finance analytics", ", covering", " various aspects", " of finance", " such as", " credit risk", ", loan", " performance,", " and market", " risk.\n\n", "### Sources", "\n-", " sdr:", "thread", "/cpvf89ifunp0qr2aqp2g\n", "- sdr", ":node/", "cpvf89ifunp0qr2aqp8g", "\n-", " sdr:profile", "/cpvf89ifunp0qr2aqp8g", } iter := func(yield func(string, error) bool) { for _, v := range stream { if !yield(v, nil) { return } } } acc := []semdex.AskResponseChunk{} buf := "" meta := semdex.AskResponseChunkMeta{} for i := range streamExtractor(iter) { switch v := i.(type) { case *semdex.AskResponseChunkText: buf += v.Chunk case *semdex.AskResponseChunkMeta: meta = *v } acc = append(acc, i) } a.Equal(strings.Join(stream, ""), buf) a.Len(meta.URLs, 0) a.Len(meta.Refs, 3) }) t.Run("edge_case_full_url_in_single_chunk", func(t *testing.T) { a := assert.New(t) stream := []string{ "this", "is", "a", "chunk", "with", "a", "full", "url", "https://www.example.com", "in", "it", } iter := func(yield func(string, error) bool) { for _, v := range stream { if !yield(v, nil) { return } } } acc := []semdex.AskResponseChunk{} buf := "" meta := semdex.AskResponseChunkMeta{} for i := range streamExtractor(iter) { switch v := i.(type) { case *semdex.AskResponseChunkText: buf += v.Chunk case *semdex.AskResponseChunkMeta: meta = *v } acc = append(acc, i) } a.Equal(strings.Join(stream, ""), buf) a.Len(meta.URLs, 1) a.Len(meta.Refs, 0) }) t.Run("edge_case_full_url_plus_more_in_single_chunk", func(t *testing.T) { a := assert.New(t) stream := []string{ "this ", "is", " a", " chunk", " with", " a", " full", " url ", "https://www.example.com\nin", " it", } iter := func(yield func(string, error) bool) { for _, v := range stream { if !yield(v, nil) { return } } } acc := []semdex.AskResponseChunk{} buf := "" meta := semdex.AskResponseChunkMeta{} for i := range streamExtractor(iter) { switch v := i.(type) { case *semdex.AskResponseChunkText: buf += v.Chunk case *semdex.AskResponseChunkMeta: meta = *v } acc = append(acc, i) } a.Equal(strings.Join(stream, ""), buf) a.Len(meta.URLs, 1) a.Len(meta.Refs, 0) }) t.Run("supports_break", func(t *testing.T) { a := assert.New(t) stream := []string{ "For", " finance data", ", several", " datasets can", " be useful", " depending on", " the specific", " needs of", " your project", ". Here", " are some", " recommendations:\n\n", "1.", " **Credit", " Analytics Dataset", " by S", "\u0026P Global", "**: This", " dataset provides", " a comprehensive", " range of", " credit risk", " indicators,", " including market", "-based and", " fundamental-based", " scores.", " It covers", " over ", "881,", "000 public", " and private", " companies across", " 160", " industries,", " 183", " countries,", " and ", "241 S", "\u0026P Dow", " Jones Indices", ". It", " includes pre", "-calculated", " probability of", " default and", " credit scores", ", early", " warning indicators", " for credit", " deterioration,", " and systematic", " risk factors", " like country", " and industry", " risk scores", ".\n\n2", ". **", "Prosper", " Loan Dataset", " on Kag", "gle**:", " This dataset", " contains information", " on loan", " origination", " dates,", " original amounts", ", payments", ", returns", ", and", " loss rates", ". It", " is useful", " for analyzing", " loan performance", " and can", " be used", " to create", " a portfolio", " project in", " finance analytics", ". The", " dataset includes", " metrics viable", " from ", "2009", ", and", " it can", " be filtered", " to exclude", " missing values", " and focus", " on specific", " time periods", ".\n\n3", ". **", "Finance and", " Risk Analytics", " Dataset on", " GitHub**:", " This dataset", " contains ", "6 years", " of weekly", " stock information", " for ", "10 different", " Indian stocks", ". It", " can be", " used for", " market risk", " analysis and", " includes data", " on stock", " prices,", " returns,", " and share", " insights.", " This dataset", " is particularly", " useful for", " those interested", " in stock", " market data", " and risk", " analysis.\n\n", "4.", " **Data", "hub.io", "**: This", " platform offers", " a variety", " of business", " and finance", " datasets,", " including stock", " market data", ", property", " prices,", " inflation,", " and logistics", ". The", " data is", " often updated", " monthly or", " daily,", " providing a", " constant flow", " of real", "-time information", ".\n\nThese", " datasets can", " be used", " to create", " a portfolio", " project in", " finance analytics", ", covering", " various aspects", " of finance", " such as", " credit risk", ", loan", " performance,", " and market", " risk.\n\n", "### Sources", "\n-", " https://", "www.market", "place.s", "pglobal", ".com/en", "/datasets", "/credit", "-analytics-(", "146)\n", "- https", "://www", ".youtube.com", "/watch?v", "=rS", "0NU", "ngQ", "cbU", "\n-", " https://", "github.com", "/Honey", "28Git", "/Finance", "-and-R", "isk-An", "alytics\n", "- https", "://career", "foundry", ".com/en", "/blog/data", "-analytics/", "where-to", "-find-free", "-datasets", "/", } iter := func(yield func(string, error) bool) { for _, v := range stream { if !yield(v, nil) { return } } } acc := []semdex.AskResponseChunk{} buf := "" meta := semdex.AskResponseChunkMeta{} count := 0 for i := range streamExtractor(iter) { switch v := i.(type) { case *semdex.AskResponseChunkText: buf += v.Chunk case *semdex.AskResponseChunkMeta: meta = *v } acc = append(acc, i) // break at the 173rd token. This token is the beginning of a URL. // This means the source tokens are advanced until the end of the // URL and the actual consumed tokens are 179 because the range from // 173 to 179 is the full URL: // "https://www.marketplace.spglobal.com/en/datasets/credit-analytics-(146)" count++ if count == 173 { break } } // 179 is the last index of the stream before the break // remove the trailing newline because while this char is actually part // of the 179th token, it's not returned by yieldPeekedReference because // it doesn't yield actual tokens, it yields the entire parsed URL. wantBuf := strings.TrimRight(strings.Join(stream[:179], ""), "\n") a.Equal(wantBuf, buf) a.Len(meta.URLs, 1) }) t.Run("supports_break_on_input_stream", func(t *testing.T) { a := assert.New(t) stream := []string{ "For", " finance data", ", several", " datasets can", " be useful", " depending on", " the specific", " needs of", " your project", ". Here", " are some", " recommendations:\n\n", "1.", " **Credit", " Analytics Dataset", " by S", "\u0026P Global", "**: This", " dataset provides", " a comprehensive", " range of", " credit risk", " indicators,", " including market", "-based and", " fundamental-based", " scores.", " It covers", " over ", "881,", "000 public", " and private", " companies across", " 160", " industries,", " 183", " countries,", " and ", "241 S", "\u0026P Dow", " Jones Indices", ". It", " includes pre", "-calculated", " probability of", " default and", " credit scores", ", early", " warning indicators", " for credit", " deterioration,", " and systematic", " risk factors", " like country", " and industry", " risk scores", ".\n\n2", ". **", "Prosper", " Loan Dataset", " on Kag", "gle**:", " This dataset", " contains information", " on loan", " origination", " dates,", " original amounts", ", payments", ", returns", ", and", " loss rates", ". It", " is useful", " for analyzing", " loan performance", " and can", " be used", " to create", " a portfolio", " project in", " finance analytics", ". The", " dataset includes", " metrics viable", " from ", "2009", ", and", " it can", " be filtered", " to exclude", " missing values", " and focus", " on specific", " time periods", ".\n\n3", ". **", "Finance and", " Risk Analytics", " Dataset on", " GitHub**:", " This dataset", " contains ", "6 years", " of weekly", " stock information", " for ", "10 different", " Indian stocks", ". It", " can be", " used for", " market risk", " analysis and", " includes data", " on stock", " prices,", " returns,", " and share", " insights.", " This dataset", " is particularly", " useful for", " those interested", " in stock", " market data", " and risk", " analysis.\n\n", "4.", " **Data", "hub.io", "**: This", " platform offers", " a variety", " of business", " and finance", " datasets,", " including stock", " market data", ", property", " prices,", " inflation,", " and logistics", ". The", " data is", " often updated", " monthly or", " daily,", " providing a", " constant flow", " of real", "-time information", ".\n\nThese", " datasets can", " be used", " to create", " a portfolio", " project in", " finance analytics", ", covering", " various aspects", " of finance", " such as", " credit risk", ", loan", " performance,", " and market", " risk.\n\n", "### Sources", "\n-", " https://", "www.market", "place.s", "pglobal", ".com/en", "/datasets", "/credit", "-analytics-(", "146)\n", "- https", "://www", ".youtube.com", "/watch?v", "=rS", "0NU", "ngQ", "cbU", "\n-", " https://", "github.com", "/Honey", "28Git", "/Finance", "-and-R", "isk-An", "alytics\n", "- https", "://career", "foundry", ".com/en", "/blog/data", "-analytics/", "where-to", "-find-free", "-datasets", "/", } iter := func(yield func(string, error) bool) { for i, v := range stream { // Break the input stream at the 173rd token. This tests that // the streamExtractor break handling functionality works. if i == 173 { break } if !yield(v, nil) { return } } } acc := []semdex.AskResponseChunk{} buf := "" meta := semdex.AskResponseChunkMeta{} count := 0 for i := range streamExtractor(iter) { switch v := i.(type) { case *semdex.AskResponseChunkText: buf += v.Chunk case *semdex.AskResponseChunkMeta: meta = *v } acc = append(acc, i) // break at the 173rd token. This token is the beginning of a URL. // This means the source tokens are advanced until the end of the // URL and the actual consumed tokens are 179 because the range from // 173 to 179 is the full URL: // "https://www.marketplace.spglobal.com/en/datasets/credit-analytics-(146)" count++ if count == 173 { break } } // 179 is the last index of the stream before the break // remove the trailing newline because while this char is actually part // of the 179th token, it's not returned by yieldPeekedReference because // it doesn't yield actual tokens, it yields the entire parsed URL. wantBuf := strings.TrimRight(strings.Join(stream[:173], ""), "\n") a.Equal(wantBuf, buf) a.Len(meta.URLs, 1) }) }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Southclaws/storyden'

If you have feedback or need assistance with the MCP directory API, please join our Discord server