Skip to content

Commit

Permalink
Remove campos não clean_* e garante que a pesquisa com caracteres esp…
Browse files Browse the repository at this point in the history
…eciais do Lucene seja encontrados

Mais detalhes sobre essa atividade é possível no seguinte tíquete:
scieloorg/search-journals-proc#26 (comment)
  • Loading branch information
gitnnolabs committed Feb 5, 2025
1 parent 868f02e commit deca6de
Showing 1 changed file with 12 additions and 25 deletions.
37 changes: 12 additions & 25 deletions solr/8.5.1/cores/solrdata/data/articles/conf/managed-schema
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,6 @@
<!-- Dynamic Field to Article Keywords in any languages -->
<dynamicField name="keyword_*" type="text" indexed="false" stored="true" multiValued="true"/>

<!-- Dynamic Field to Abstract in any languages **from:ab_* to:abstract_in_* ** -->
<dynamicField name="cleaned_ab_*" type="text" indexed="false" stored="true" multiValued="true"/>
<dynamicField name="cleaned_ti_*" type="text_ws" indexed="false" stored="true" multiValued="true"/>
<dynamicField name="cleaned_keyword_*" type="text" indexed="false" stored="true" multiValued="true"/>

<!-- Allow search for keywords in different languages -->
<field name="kw" type="text" indexed="true" stored="false" multiValued="true"/>

Expand Down Expand Up @@ -283,18 +278,15 @@
<copyField source="id" dest="tw"/>
<copyField source="ti" dest="tw"/>
<copyField source="ti_*" dest="tw"/>
<copyField source="cleaned_ti_*" dest="tw"/>
<copyField source="au" dest="tw"/>
<copyField source="ab" dest="tw"/>
<copyField source="ab_*" dest="tw"/>
<copyField source="cleaned_ab_*" dest="tw"/>
<copyField source="ta" dest="tw"/>
<copyField source="doi" dest="tw"/>
<copyField source="orcid" dest="tw"/>
<copyField source="sponsor" dest="tw"/>
<copyField source="journal_title" dest="tw"/>
<copyField source="keyword_*" dest="tw"/>
<copyField source="cleaned_keyword_*" dest="tw"/>
<copyField source="document_fk_au" dest="tw"/>
<copyField source="document_fk_ta" dest="tw"/>
<copyField source="citation_fk_au" dest="tw"/>
Expand All @@ -308,18 +300,12 @@
<copyField source="ab" dest="subject"/>
<copyField source="ab_*" dest="subject"/>
<copyField source="keyword_*" dest="subject"/>
<copyField source="cleaned_ti_*" dest="subject"/>
<copyField source="cleaned_ab_*" dest="subject"/>
<copyField source="cleaned_keyword_*" dest="subject"/>
<!-- /Subject index -->

<!-- Language fields -->
<copyField source="ab_*" dest="ab"/>
<copyField source="ti_*" dest="ti"/>
<copyField source="keyword_*" dest="kw"/>
<copyField source="cleaned_ti_*" dest="ti"/>
<copyField source="cleaned_ab_*" dest="ab"/>
<copyField source="cleaned_keyword_*" dest="kw"/>
<!-- /Language fields -->

<!-- Cluster indexes -->
Expand Down Expand Up @@ -392,13 +378,13 @@
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> -->
<filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="false" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> -->
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="false" />
<filter class="solr.LowerCaseFilterFactory"/>
Expand All @@ -408,16 +394,17 @@
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="false" />
<filter class="solr.LowerCaseFilterFactory"/>
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern='[+\-|!(){}\[\]^"~*?:\/\\“”]' replacement=""/>
<!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> -->
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="false" />
<filter class="solr.LowerCaseFilterFactory"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern='[+\-|!(){}\[\]^"~*?:\/\\“”]' replacement=""/>
<!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> -->
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true"/>
</analyzer>
</fieldType>

Expand Down Expand Up @@ -447,7 +434,7 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> -->
</analyzer>
</fieldType>

Expand Down

0 comments on commit deca6de

Please sign in to comment.