From 10ba608f324d08dd17158ebeb670170ba23f15e5 Mon Sep 17 00:00:00 2001 From: jakoble <37188634+jakoble@users.noreply.github.com> Date: Thu, 7 Nov 2024 15:37:00 +0100 Subject: [PATCH] Add files via upload Added 1 PORTULAN corpus --- .../cintil-corpus-internacional.json | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 corpora/reference-corpora/cintil-corpus-internacional.json diff --git a/corpora/reference-corpora/cintil-corpus-internacional.json b/corpora/reference-corpora/cintil-corpus-internacional.json new file mode 100644 index 0000000..040956b --- /dev/null +++ b/corpora/reference-corpora/cintil-corpus-internacional.json @@ -0,0 +1,17 @@ +{ + "Name": "CINTIL-Corpus Internacional do Português", + "URL": "https://hdl.handle.net/21.11129/0000-000B-D33B-5", + "Family": "Manually annotated corpora", + "Description": "This is a linguistically annotated corpus of both written and spoken Portuguese, whose annotations were manually verified.\nThe written texts consists of fictional, newspaper, and technical discourse (689,124 tokens) while the spoken texts correspond to both informal and formal speech (502,622 tokens).\nThe corpus is available from PORTULAN.", + "Language": ["por"], + "Licence": "ELRA END USER", + "Size": ["1 million tokens"], + "Annotation": ["tokenised", "PoS-tagged", "lemmatised"], + "Infrastructure": "CLARIN", + "Group": ["PoS MSD tagging"], + "Access": { + "Concordancer": "http://cintil.ul.pt/", + "Download": "https://hdl.handle.net/21.11129/0000-000B-D33B-5" + }, + "Publication": "" +} \ No newline at end of file