Solr MCP
by allenday
Verified
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="unified" version="1.6">
<!-- Field Types -->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="int" class="solr.IntPointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.FloatPointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.LongPointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="double" class="solr.DoublePointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="date" class="solr.DatePointField" omitNorms="true" positionIncrementGap="0"/>
<!-- Improved text field with stemming and better tokenization -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1"
generateNumberParts="1"
catenateWords="1"
catenateNumbers="1"
catenateAll="0"
splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
splitOnCaseChange="1"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- Vector field type for embeddings -->
<fieldType name="knn_vector" class="solr.DenseVectorField"
vectorDimension="768" similarityFunction="cosine">
<vectorEncoding>FLOAT32</vectorEncoding>
</fieldType>
<!-- Fields for document -->
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false"/>
<field name="title" type="text_general" indexed="true" stored="true"/>
<field name="content" type="text_general" indexed="true" stored="true"/>
<field name="text" type="text_general" indexed="true" stored="true"/>
<field name="section" type="string" indexed="true" stored="true"/>
<field name="section_number" type="int" indexed="true" stored="true"/>
<field name="source" type="string" indexed="true" stored="true"/>
<field name="url" type="string" indexed="true" stored="true"/>
<field name="published_date" type="date" indexed="true" stored="true"/>
<field name="author" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="tags" type="string" indexed="true" stored="true" multiValued="true"/>
<!-- Vector embedding field -->
<field name="content_vector" type="knn_vector" indexed="true" stored="true"/>
<!-- Dynamic field patterns -->
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_vector" type="knn_vector" indexed="true" stored="true"/>
<!-- Required fields -->
<uniqueKey>id</uniqueKey>
<!-- Expanded field for full-text search -->
<copyField source="title" dest="_text_"/>
<copyField source="content" dest="_text_"/>
<copyField source="text" dest="_text_"/>
<copyField source="section" dest="_text_"/>
<copyField source="source" dest="_text_"/>
<copyField source="author" dest="_text_"/>
<copyField source="tags" dest="_text_"/>
<copyField source="*_t" dest="_text_"/>
<copyField source="*_s" dest="_text_"/>
<!-- Special fields -->
<field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
</schema>