Skip to content

Commit

Permalink
CSV importer
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasgautheron committed Jul 14, 2021
1 parent 8f91219 commit b488a4f
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ All notable changes to this project will be documented in this file.

### Added

- CSV importer to register pre-exisiting CSV annotations into the index without performing any conversion

### Fixed

## [0.0.1] - 2021-07-14
Expand Down
8 changes: 8 additions & 0 deletions ChildProject/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
converters[cls.FORMAT] = cls


class CsvConverter(AnnotationConverter):
FORMAT = 'csv'

@staticmethod
def convert(filename: str) -> pd.DataFrame:
return pd.read_csv(filename)

class VtcConverter(AnnotationConverter):
FORMAT = 'vtc_rttm'

Expand Down
16 changes: 16 additions & 0 deletions tests/data/csv.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
segment_onset,segment_offset,speaker_type
1982193,1982492,NA
1983496,1988992,NA
1984136,1984993,CHI
1984168,1986512,OCH
1985492,1988951,FEM
28278092,28278784,NA
28282768,28284052,MAL
28283492,28289116,NA
28284010,28287945,OCH
28285421,28285575,MAL
28288492,28289007,OCH
28294206,28294692,MAL
28300492,28300769,NA
28310511,28312511,MAL
28310992,28312491,NA
9 changes: 9 additions & 0 deletions tests/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ def project(request):
for raw_annotation in glob.glob("output/annotations/annotations/*.*/converted"):
shutil.rmtree(raw_annotation)

def test_csv():
converted = CsvConverter().convert('tests/truth/csv.csv').fillna('NA')
truth = pd.read_csv('tests/truth/csv.csv').fillna('NA')

pd.testing.assert_frame_equal(
standardize_dataframe(converted, converted.columns),
standardize_dataframe(truth, converted.columns)
)

def test_vtc():
converted = VtcConverter().convert('tests/data/vtc.rttm')
truth = pd.read_csv('tests/truth/vtc.csv').fillna('NA')
Expand Down
16 changes: 16 additions & 0 deletions tests/truth/csv.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
segment_onset,segment_offset,speaker_type
1982193,1982492,NA
1983496,1988992,NA
1984136,1984993,CHI
1984168,1986512,OCH
1985492,1988951,FEM
28278092,28278784,NA
28282768,28284052,MAL
28283492,28289116,NA
28284010,28287945,OCH
28285421,28285575,MAL
28288492,28289007,OCH
28294206,28294692,MAL
28300492,28300769,NA
28310511,28312511,MAL
28310992,28312491,NA

0 comments on commit b488a4f

Please sign in to comment.