diff --git a/fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/Extractor.java b/fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/Extractor.java index 4950a386..969688ae 100644 --- a/fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/Extractor.java +++ b/fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/Extractor.java @@ -28,4 +28,8 @@ public interface Extractor { ExtractData getText(InputStream in, Map params); + default int getWeight() { + return 1; + } + } diff --git a/fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java b/fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java index 29fa17a5..ac3a2856 100644 --- a/fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java +++ b/fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java @@ -48,6 +48,15 @@ public void addExtractor(final String key, final Extractor extractor) { if (extractor == null) { throw new CrawlerSystemException("The extractor is null."); } + if (extractorMap.containsKey(key)) { + final Extractor oldExtractor = extractorMap.get(key); + if (oldExtractor.getWeight() > extractor.getWeight()) { + if (logger.isDebugEnabled()) { + logger.debug("Ignored {} on {}. Use {}.", extractor.getClass().getName(), key, oldExtractor.getClass().getName()); + } + return; + } + } extractorMap.put(key, extractor); if (logger.isDebugEnabled()) { logger.debug("Loaded {} : {}", key, extractor.getClass().getName()); diff --git a/fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/ExtractorFactoryTest.java b/fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/ExtractorFactoryTest.java index b29894df..4f72c4fa 100644 --- a/fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/ExtractorFactoryTest.java +++ b/fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/ExtractorFactoryTest.java @@ -123,4 +123,40 @@ public void test_builder() { assertEquals("test", extractorFactory.builder(new ByteArrayInputStream("test".getBytes()), null).filename("test.txt").extract().getContent()); } + + public void test_addExtractor_weight() { + final String key = "application/test"; + assertNull(extractorFactory.getExtractor(key)); + extractorFactory.addExtractor(key, new Extractor() { + @Override + public ExtractData getText(InputStream in, Map params) { + return null; + } + }); + assertEquals(1, extractorFactory.getExtractor(key).getWeight()); + extractorFactory.addExtractor(key, new Extractor() { + @Override + public ExtractData getText(InputStream in, Map params) { + return null; + } + + @Override + public int getWeight() { + return 10; + } + }); + assertEquals(10, extractorFactory.getExtractor(key).getWeight()); + extractorFactory.addExtractor(key, new Extractor() { + @Override + public ExtractData getText(InputStream in, Map params) { + return null; + } + + @Override + public int getWeight() { + return 5; + } + }); + assertEquals(10, extractorFactory.getExtractor(key).getWeight()); + } }