Skip to content

Commit

Permalink
fix string input in csv source2dicts
Browse files Browse the repository at this point in the history
  • Loading branch information
lukavdplas committed Apr 11, 2024
1 parent 0372a6e commit d35e888
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 5 deletions.
2 changes: 1 addition & 1 deletion ianalyzer_readers/readers/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def source2dicts(self, source: Source) -> Iterable[Document]:
if isinstance(source, str):
filename = source
metadata = {}
if isinstance(source, bytes):
elif isinstance(source, bytes):
raise NotImplementedError()
else:
filename, metadata = source
Expand Down
4 changes: 1 addition & 3 deletions tests/mock_csv_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ class TestCSVReader(CSVReader):
def sources(self, **kwargs):
for filename in os.listdir(self.data_directory):
full_path = os.path.join(self.data_directory, filename)
yield full_path, {
'filename': filename
}
yield full_path

fields = [
Field(
Expand Down
14 changes: 13 additions & 1 deletion tests/test_csvcorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,20 @@ def test_csv():
corpus = TestCSVReader()

sources = list(corpus.sources())
assert len(sources) == 1 and sources[0][1] == {'filename': 'example.csv'}
assert len(sources) == 1

docs = corpus.source2dicts(sources[0])
for doc, target in zip(docs, target_documents):
assert doc == target

def test_csv_supported_source_types():
corpus = TestCSVReader()
source = next(corpus.sources())
assert isinstance(source, str)

# should work with a path as the source
list(corpus.source2dicts(source))

# should work with a path + metadata as the source
source_with_metadata = source, {}
list(corpus.source2dicts(source_with_metadata))

0 comments on commit d35e888

Please sign in to comment.