From 0e776e39eeeb5ec6a7435ea840b0cd8d7b470053 Mon Sep 17 00:00:00 2001 From: Robert Forkel Date: Wed, 15 May 2024 20:59:23 +0200 Subject: [PATCH] work on errata fixing --- cldfbench_languageatlasofthepacificarea.py | 194 ++++++++++++++++++--- etc/errata.csv | 6 + etc/languages_with_comment.csv | 72 ++++---- 3 files changed, 209 insertions(+), 63 deletions(-) create mode 100644 etc/errata.csv diff --git a/cldfbench_languageatlasofthepacificarea.py b/cldfbench_languageatlasofthepacificarea.py index 4a12482..278c4ec 100644 --- a/cldfbench_languageatlasofthepacificarea.py +++ b/cldfbench_languageatlasofthepacificarea.py @@ -2,34 +2,74 @@ import functools import itertools import collections +import typing +import urllib.request import geopandas from pycldf import Sources -from clldutils.jsonlib import dump +from clldutils.jsonlib import dump, load from clldutils.markup import add_markdown_text from cldfbench import Dataset as BaseDataset from shapely.geometry import Point, shape +import pycountry +from lxml.etree import HTMLParser, fromstring from cldfgeojson import MEDIA_TYPE, aggregate, feature_collection, merged_geometry, fixed_geometry +DC_RIGHTS = "© ECAI Digital Language Atlas of the Pacific Area" +# License for scanned leaves New Guinea (https://ecaidata.org/dataset/language_atlas_of_the_pacific_scanned_atlas_leaves_-_new_guinea): +# Leaves 1-12 +# CC-BY +# License for scanned leaves Taiwan (https://ecaidata.org/dataset/pacific-language-atlas-leaves-taiwan): +# Leave 30 +# http://creativecommons.org/licenses/by-nc/2.0/ -def norm(d): +# https://ecaidata.org/dataset/language_atlas_of_the_pacific_scanned_atlas_leaves_-_new_guinea + +COLS = ['LANGUAGE', 'COUNTRY_NAME', 'ISLAND_NAME', 'SOVEREIGN'] + + +def norm_metadata(d) -> typing.Union[typing.Dict[str, str], None]: + """ + Normalize field names and field content for country and island names. + + Return `None` if the record does not contain metadata about a language polygon. + """ for k in ['ISLAND_NAM', 'ISLAND_NA_', 'ISL_NAM']: if k in d: - d['ISLAND_NAME'] = d.pop(k) + v = d.pop(k) + d['ISLAND_NAME'] = { + 'apua New Guinea': 'Papua New Guinea', + 'Papua New Gu': 'Papua New Guinea', + }.get(v, v) if 'CNTRY_NAME' in d: d['COUNTRY_NAME'] = d.pop('CNTRY_NAME') - if d['COUNTRY_NAME'] == 'Tailand': - d['COUNTRY_NAME'] = 'Thailand' + ncountries = [] + for name in d['COUNTRY_NAME'].split('/'): + name = { + 'Tailand': 'Thailand', + 'Burma': 'Myanmar', + 'Christmas I.': 'Christmas Island', + 'East Tiimor': 'Timor-Leste', + 'East Timor': 'Timor-Leste', + 'Kampuchea': 'Cambodia', + 'Laos': "Lao People's Democratic Republic", + }.get(name, name) + assert pycountry.countries.lookup(name) + ncountries.append(name) + d['COUNTRY_NAME'] = '/'.join(ncountries) if 'SOVEREIGN' in d and 'COUNTRY_NAME' not in d: if d['SOVEREIGN'] == 'Australia': d['COUNTRY_NAME'] = 'Australia' if d.get('LANGUAGE', '').startswith('Uninhabite'): - del d['LANGUAGE'] + return None if d.get('LANGUAGE', '').startswith('Unclassified'): - del d['LANGUAGE'] + return None for v in d.values(): assert ';' not in v + for col in COLS: + d.setdefault(col, '') + assert set(COLS).issubset(set(d.keys())) return d @@ -47,7 +87,7 @@ def move(feature, references): if pshape.contains(point): global MOVED MOVED += 1 - print(MOVED) + #print(MOVED) lon, lat = _lon, _lat if _lon is None and _lat is None: delete = True @@ -98,11 +138,96 @@ def cmd_readme(self, args) -> str: 'Description') def cmd_download(self, args): + import requests + for item in load(self.raw_dir / 'atlas_leaves.json'): + html = fromstring(urllib.request.urlopen(item['url']).read(), HTMLParser()) + for link in html.xpath('//a'): + if 'href' in link.attrib: + href = link.attrib['href'] + if href.split('/')[-1].startswith('L0'): + o = self.raw_dir / 'atlas' / href.split('/')[-1] + href = href.replace('http:', 'https:').replace('edu/a', 'edu//a') + if href.endswith('jgw'): + continue + if not o.exists(): + try: + print(href) + o.write_bytes(requests.get(href, verify=False).content) + except: + raise + pass + return + + res = [] + u = "https://ecai.org//austronesiaweb/PacificAtlasContents-Alpha.htm" + html = fromstring(urllib.request.urlopen(u).read(), HTMLParser()) + for tr in html.xpath('//table[@width="589"]/tr'): + tds = list(tr.xpath('td')) + if len(tds) == 3 and tds[-1].xpath('a'): + res.append(dict( + name=tds[0].text, + id=tds[1].text, + url='https://ecai.org/austronesiaweb/{}'.format(tds[-1].xpath('a')[0].attrib['href']) + )) + dump(res, self.raw_dir / 'atlas_leaves.json', indent=2) + return + + dl = self.raw_dir / 'atlas' / 'new_guinea' + dl.mkdir() + for item in self.raw_dir.joinpath('atlas').read_json('new_guinea.json')['@graph']: + if item['@type'] == 'schema:DataDownload': + urllib.request.urlretrieve(item['schema:url'], dl / item['schema:url'].split('/')[-1]) + return + url = "https://ecaidata.org/dataset/language_atlas_of_the_pacific_scanned_atlas_leaves_-_new_guinea" + html = fromstring(urllib.request.urlopen(url).read(), HTMLParser()) + json = html.xpath('//script[@type="application/ld+json"]')[0] + self.raw_dir.joinpath('atlas', 'new_guinea.json').write_text(json.text, encoding='utf8') + return + + import os + from csvw.dsv import UnicodeWriter + + cols = ['LANGUAGE', 'COUNTRY_NAME', 'ISLAND_NAME', 'SOVEREIGN'] + with UnicodeWriter('shapes_norm.csv') as w: + w.writerow(cols) + for i, feature in enumerate(geopandas.read_file( + str(self.raw_dir / 'languagemap_040102.shp')).__geo_interface__['features']): + props = norm_metadata({k: v for k, v in feature['properties'].items() if v}) + if props: + w.writerow([props.get(col, '') for col in cols]) + return + + u = "https://ecai.berkeley.edu//austronesiaweb/maps/pacificatlas/Pacific_leaves/{}.jpg" + for row in self.raw_dir.read_csv('atlas_leaves.csv', dicts=True): + os.system('curl -k {} -o {}'.format(u.format(row['File']), str(self.raw_dir / '{}.jpg'.format(row['File'])))) + return + #from csvw.dsv import UnicodeWriter + #md = [] + #for i, feature in enumerate(geopandas.read_file( + # str(self.raw_dir / 'languagemap_040102.shp')).__geo_interface__['features']): + # md.append(feature['properties']) + #cols = set() + #for p in md: + # cols = cols.union(p.keys()) + #cols = sorted(cols) + + #print(len(md)) + #with UnicodeWriter('shapes.csv') as w: + # w.writerow(cols) + # for p in md: + # w.writerow([p.get(c, '') for c in cols]) + + #return self.raw_dir.download_and_unpack( 'https://ecaidata.org/dataset/209cb079-2270-4016-bc8d-f6c7835779c5/' 'resource/b5095d0f-7429-445d-a507-916aae5398ba/download/languagemap040429.zip') def iter_geojson_features(self): + errata = collections.defaultdict(list) + for row in self.etc_dir.read_csv('errata.csv', dicts=True): + errata[row['LANGUAGE']].append(( + Point(float(row['lon']), float(row['lat'])), + dict(s.split('=') for s in row['fix'].split(';')))) features = {} properties = [] lname2index = {} @@ -110,52 +235,63 @@ def iter_geojson_features(self): for i, feature in enumerate(geopandas.read_file( str(self.raw_dir / 'languagemap_040102.shp')).__geo_interface__['features']): _all.append(feature) - props = norm({k: v for k, v in feature['properties'].items() if v}) - if 'LANGUAGE' in props: # Ignore uninhabited areas, unclassified languages etc. - props.setdefault('COUNTRY_NAME', '') - lid = (props['LANGUAGE'], props['COUNTRY_NAME']) - properties.append(props) + props = norm_metadata({k: v for k, v in feature['properties'].items() if v}) + if props: # Ignore uninhabited areas, unclassified languages etc. + geom = fixed_geometry(feature) + # Sometimes polygons erroneously share the same metadata. This must be fixed before + # we can merge based on metadata and then lookup language mappings. + if props['LANGUAGE'] in errata: + obj = shape(geom['geometry']) + for point, fix in errata[props['LANGUAGE']]: + if obj.contains(point): + props.update(fix) + break + + lid = tuple(props[col] for col in COLS) + properties.append((lid, props)) if lid in features: - features[lid]['geometry'] = merged_geometry( - [features[lid], fixed_geometry(feature)], buffer=0) + features[lid]['geometry'] = merged_geometry([features[lid], geom], buffer=0) else: lname2index[lid] = i + 1 features[lid] = { 'id': str(i), 'type': 'Feature', 'properties': {}, - 'geometry': fixed_geometry(feature)['geometry'], + 'geometry': geom['geometry'], } + # pass dump(feature_collection(_all), self.raw_dir / 'all.geojson') - for (lname, cname), props in itertools.groupby( - sorted(properties, key=lambda f: (f['LANGUAGE'], f['COUNTRY_NAME'])), - lambda f: (f['LANGUAGE'], f['COUNTRY_NAME'])): - f = features[(lname, cname)] + for lid, props in itertools.groupby(sorted(properties, key=lambda f: f[0]), lambda f: f[0]): + f = features[lid] props = list(props) for attr in ['COUNTRY_NAME', 'SOVEREIGN', 'ISLAND_NAME']: f['properties'][attr] = sorted(set(p[attr] for p in props if attr in p)) - fid = lname2index[(lname, cname)] + fid = lname2index[lid] if fid in self.vectors: move(f, self.vectors[fid]) - yield fid, lname, cname, f + yield fid, lid, f def cmd_makecldf(self, args): self.schema(args.writer.cldf) args.writer.cldf.add_sources(*Sources.from_file(self.etc_dir / "sources.bib")) - coded_langs = {k: v for k, v in self.languages.items() if v.get('Glottocode')} + coded_langs = { + tuple(v[col] for col in COLS): v + for v in self.etc_dir.read_csv('languages_with_comment.csv', dicts=True) + if v.get('Glottocode')} coded_names = collections.defaultdict(list) for k, v in self.languages.items(): if v.get('Glottocode'): coded_names[k[0]].append(v) polys = [] - for lid, lname, cname, feature in sorted(self.iter_geojson_features(), key=lambda i: i[0]): + for lid, lidt, feature in sorted(self.iter_geojson_features(), key=lambda i: i[0]): + lname, cname, iname, sov = lidt args.writer.objects['ContributionTable'].append(dict( ID=lid, Name=lname, @@ -164,11 +300,11 @@ def cmd_makecldf(self, args): Islands=feature['properties']['ISLAND_NAME'], Source=['ecai', 'wurm_and_hattori'] )) - if (not cname) and lname in coded_names and len(coded_names[lname]) == 1: - # No country specified, but we only have one entry for the name anyway. - cname = coded_names[lname][0]['Countries'] - if (lname, cname) in coded_langs: - for gc in coded_langs[(lname, cname)]['Glottocode'].split(): + #if (not cname) and lname in coded_names and len(coded_names[lname]) == 1: + # # No country specified, but we only have one entry for the name anyway. + # cname = coded_names[lname][0]['Countries'] + if lidt in coded_langs: + for gc in coded_langs[lidt]['Glottocode'].split(): polys.append((str(lid), feature, gc)) lids = None diff --git a/etc/errata.csv b/etc/errata.csv new file mode 100644 index 0000000..f2deab2 --- /dev/null +++ b/etc/errata.csv @@ -0,0 +1,6 @@ +LANGUAGE,lat,lon,fix +Djeebbanan Family-Level Isolate,-12.469,131.288,LANGUAGE=Laragyan +Djeebbanan Family-Level Isolate,-12.574,130.733,LANGUAGE=Laragyan +Oba,-6.459,139.430,LANGUAGE=Oba YAQAY +YAU,-3.530,142.015,LANGUAGE=YAU Sandaun +Ono,-6.004,145.121,LANGUAGE=Ono SIANE \ No newline at end of file diff --git a/etc/languages_with_comment.csv b/etc/languages_with_comment.csv index cb6004a..6fa3887 100644 --- a/etc/languages_with_comment.csv +++ b/etc/languages_with_comment.csv @@ -418,7 +418,7 @@ LAMPUNG/Southern,Indonesia,,Indonesia,lamp1243,, LAMPUNG/S-W,Indonesia,,Indonesia,lamp1243,, LAMPUNG/C-W,Indonesia,,Indonesia,lamp1243,, SUMATRA MALAY/Semendo,Indonesia,,,seme1248,Glottolog dialect with matching name and matching country, -SUMATRA MALAY,Indonesia,,Indonesia,,, +SUMATRA MALAY,Indonesia,,Indonesia,nucl1806,Somewhat unspecific; matched to Glottolog subgroup, SUMATRA MALAY/Ranau,Indonesia,,Indonesia,high1292,, NIAS/Sichule,Indonesia,,Indonesia,siku1242,, NIAS/Eastern,Indonesia,,Indonesia,,Label for for multiple polygons with conflicting labels in the Atlas, @@ -1048,7 +1048,7 @@ KONDA,Indonesia,,,kond1303,, PURAGI,Indonesia,,,pura1253,Glottolog languoid with matching name and matching country, KAMPONG BARU,Indonesia,,,kais1235,, Mugin,Indonesia,,,yaha1248,Typo transcribing 'Mugim' from the Atlas; dialect of YAHADIAN, -INANWATAN,Indonesia,,,inan1242,, +INANWATAN,Indonesia,,,suab1238,Glottolog language with matching Ruhlen name in family Inanwatan, YARADIAN,Indonesia,,,yaha1248,, Kasuweri,Indonesia,,,kasu1243,, TAROF,Indonesia,,,taro1259,, @@ -1071,7 +1071,7 @@ MADKI,Indonesia,,,abun1252,, Kebar,Indonesia,,,mpur1239,, Amberbaken,Indonesia,,,mpur1239,Matches Ruhlen namem of Glottolog language Mpur at same location, KARON DORI,Indonesia,,,karo1303,Glottolog languoid with matching name and matching country, -MEAX,Indonesia,,,meax1236,, +MEAX,Indonesia,,,meya1236,Glottolog language with matching Ruhlen name and matching location, Tuf,Indonesia,,,maib1239,Dialect of BRAT, BRAT,Indonesia,,,maib1239,, BRAT Yak,Indonesia,,,maib1239,Dialect of BRAT, @@ -1104,15 +1104,15 @@ Southeastern IRAHUTU,Indonesia,,,irar1238,, South IRAHUTU,Indonesia,,,irar1238,, MAIRASI,Indonesia,,,nucl1594,Glottolog languoid with matching name and coordinate within polygons, NORTH-EASTERN MAIRASI ?,Indonesia,,,nort2908,, -MOI,Indonesia,,Indonesia,moii1254,, +MOI,Indonesia,,Indonesia,moii1235,Glottolog language with matching name and location, URIANKERE,Indonesia,,Indonesia,duri1243,, DAI,Indonesia,,Indonesia,,Multiple polygons with different labels in various locations in Halmahera, -Mosana,Indonesia,,Indonesia,moii1235,, +Mosana,Indonesia,,Indonesia,moii1235,Dialect of MOI, SEGET,Indonesia,,,sege1235,Glottolog languoid with matching name and matching country, -MOI,Indonesia,,,moii1254,, +MOI,Indonesia,,,moii1235,Glottolog language with matching name and location, MOS,Indonesia,,,morb1239,Typo transcribing MOR from the Atlas, North IRAHUTU,Indonesia,,,irar1238,, -TANAH MERAH,Indonesia,,,tana1288,, +TANAH MERAH,Indonesia,,,tana1288,The 'other' Tanah Merah also called Sumuri, BARAU,Indonesia,,,bara1347,, ERCKWANAS,Indonesia,,,erok1237,, BAHAM,Indonesia,,,baha1258,Glottolog languoid with matching name and matching country, @@ -1121,7 +1121,8 @@ IHA Kapaur,Indonesia,,,ihaa1241,, ERCKWANAS Iha,Indonesia,,,ihaa1241,, Kaipuri,Indonesia,,,kuru1305,, YAVA,Indonesia,,,cent2052,, -Oba,Indonesia,,,obam1239,, +Oba,Indonesia,,,kamo1255,Oba dialect of KAMORO, +Oba YAQAY,Indonesia,,,obam1239,Oba dialect of YAQAY, IRESIM,Indonesia,,,ires1239,, KOSARE,Indonesia,,,kosa1251,, KOSAREK(WANAM),Indonesia,,,kosa1249,Glottolog language with matching dialect 'Kosarek' and MultiTree name 'Wanam', @@ -1138,11 +1139,11 @@ Paniai,Indonesia,,,ekar1243,Area matches the Glottolog language Ekari in the Pan Tigi,Indonesia,,,ekar1243,, Western KAMORO,Indonesia,,,kamo1255,, Tarya,Indonesia,,,tary1239,Glottolog languoid with matching name and coordinate within polygons, -Central Kamora,Indonesia,,,asma1256,, -Kamora,Indonesia,,,kamo1255,, -Upper Wania,Indonesia,,,kamo1255,, +Central Kamora,Indonesia,,,kamo1255,Typo transcribing 'Central' dialect of KAMORO - not of the neighbour dialect 'Kamora', +Kamora,Indonesia,,,kamo1255,The 'Kamora' dialect of KAMORO, +Upper Wania,Indonesia,,,kamo1255,Dialect of KAMORO, Wania KAMORO,Indonesia,,,kamo1255,, -Mukumuga,Indonesia,,,kamo1255,, +Mukumuga,Indonesia,,,kamo1255,Dialect of KAMORO, South MONI,Indonesia,,,moni1261,Dialect labeled 'South' of MONI, Amung UHUNDUNI,Indonesia,,,dama1272,Matches ELCAT names 'Amung' and 'Uhunduni' for Glottoog language Damal, Enggipiloe,Indonesia,,,engg1246,, @@ -1153,9 +1154,9 @@ NDUGA,Indonesia,,,ndug1245,Glottolog languoid with matching name and matching co NORTH NGALIK,Indonesia,,,angg1239,, WANO,Indonesia,,,wano1243,Glottolog languoid with matching name and matching country, DEM,,,,demm1245,'Dem Stock-Level Isolate', -IIaga,Indonesia,,,ilag1236,, -Sinak,Indonesia,,,west2594,, -North Balim,Indonesia,,,gran1246,, +IIaga,Indonesia,,,ilag1236,Dialect of WESTERN DANI, +Sinak,Indonesia,,,west2594,Dialect of WESTERN DANI, +North Balim,Indonesia,,,nort3397,Dialect of WESTERN DANI, WESTERN BANI Yamo,Indonesia,,,west2594,, Swart Valley,Indonesia,,,swar1236,, Bokondini,Indonesia,,,west2596,, @@ -1225,7 +1226,7 @@ CITAK ASMAT,Indonesia,,,cita1246,Glottolog languoid with matching name and coord UPPER KAEME R?,Indonesia,,,awyu1263,'[Unclassified] Awyu-Dumut Family languages', South AIRO-SUMAGHAGHE,Indonesia,,,asue1235,, Wideman R ?,Indonesia,,,asue1235,Dialect of PISA in the Atlas; mapped to Glottolog language with WALS and Ruhlen name Pisa, -Kayagar Stock-Level Family,Indonesia,,,kaya1327,, +Kayagar Stock-Level Family,Indonesia,,,tama1336,Polygon is labeled as Yogo dialect of TAMAGARIO, Tamario,Indonesia,,,tama1336,, South SAWUY,Indonesia,,,sawi1257,Matched to Glottolog language with matching MultiTree name at matching location, North SAWUY,Indonesia,,,sawi1257,, @@ -1242,7 +1243,7 @@ SIAGHA-YENIMU,Indonesia,,,sout2941,, AGHU,Indonesia,,,aghu1255,Glottolog languoid with matching name and matching country, KIA R?,Indonesia,,,jair1235,, EDERAH R?,Indonesia,,,eder1237,, -MAPI R?,Indonesia,,,yaqa1246,, +MAPI R?,Indonesia,,,yaqa1246,Glottolog language with matching location and WALS and MultiTree alternative name Mapi, UPPER DIGUL R,Indonesia,,,awyu1263,'[Unclassified] Awyu-Dumut Family languages', WANGGOM,Indonesia,,,wang1299,Glottolog languoid with matching name and matching country, WAMBON,Indonesia,,,wamb1259,Glottolog languoid with matching name and matching country, @@ -1398,7 +1399,7 @@ Etoro,,Papua New Guinea,,edol1239,, Komiofi,,Papua New Guinea,,komo1262,, BEAMI Bedamini,,Papua New Guinea,,beam1240,, N Beami?,,Papua New Guinea,,nort2903,, -KWARE,,Papua New Guinea,,uare1241,, +KWARE,,Papua New Guinea,,aime1238,Glottolog language with matching location and ELCAT name Kware, TOMU,,Papua New Guinea,,odoo1238,Glottolog language with matching ELCAT name, NOMAD Samo,,Papua New Guinea,,samo1303,In the Atlas listed as dialect of NOMAD; matched to the Glottolog language because the sister dialects in the area are languages in Glottolog as well, Kubo,,Papua New Guinea,,kubo1242,Glottolog languoid with matching name and matching country (infered from island name), @@ -1471,13 +1472,13 @@ YIMAS,,Papua New Guinea,,yima1243,Glottolog languoid with matching name and matc ALAMBLAK Karawari,,Papua New Guinea,,kara1494,Matches Glottolog parent language + dialect name, BAHINEMO,,Papua New Guinea,,bahi1254,Glottolog languoid with matching name and matching country (infered from island name), BISIS,,Papua New Guinea,,bisi1244,Glottolog languoid with matching name and matching country (infered from island name), -MARI,,Papua New Guinea,,mari1429,, +MARI,,Papua New Guinea,,mari1432,Polygon is in East Sepik province; thus mapped to Mari (East Sepik), KAPRIMAN,,Papua New Guinea,,kapr1245,Glottolog languoid with matching name and matching country (infered from island name), Karambit,,Papua New Guinea,,kara1493,, WATAKATAUT,,Papua New Guinea,,wata1251,, North Mansap BUNA,,Papua New Guinea,,kasm1239,, South MANSAP buna,,Papua New Guinea,,masa1310,, -MAND,,Papua New Guinea,,atem1241,Glottolog languoid with matching name and matching country (infered from island name), +MAND,,Papua New Guinea,,wiar1238,Typo transcribing MANDI fro the Atlas; Glottolog language with matching ELCAT name and matching location, URIMO,,Papua New Guinea,,urim1251,Glottolog languoid with matching name and matching country (infered from island name), MUNIWARA,,Papua New Guinea,,juwa1238,, Yibab,,Papua New Guinea,,kama1367,'Yibab-Wandomi d of Kamasau W&H 1981',Carrington[476] @@ -1585,7 +1586,8 @@ ROINDJI,,Papua New Guinea,,ronj1237,, MALASANGA,,Papua New Guinea,,mala1487,Glottolog languoid with matching name and matching country (infered from island name), SIO,,Papua New Guinea,,sioo1240,Glottolog languoid with matching name and matching country (infered from island name), GITUA,,Papua New Guinea,,gitu1237,Glottolog languoid with matching name and matching country (infered from island name), -YAU,,Papua New Guinea,,yaum1237,, +YAU,,Papua New Guinea,,yaum1237,Mapped to Yau-Nungon based on location and name, +YAU Sandaun,,Papua New Guinea,,yaus1235,There are two languages YAU in PNG; this one is in Sandaun Province, South Central TIMBE,,Papua New Guinea,,cent2113,, East KOMBA,,Papua New Guinea,,komb1273,, WELLKI,,Papua New Guinea,,weli1239,, @@ -1747,7 +1749,7 @@ Averi,,Papua New Guinea,,aver1239,, Zuwadze,,Papua New Guinea,,zuwa1238,Glottolog languoid with closely matching name and matching location, Asapa,,Papua New Guinea,,asap1239,Glottolog languoid with matching name and coordinate within polygons, AOMIE(OMIE),,Papua New Guinea,,omie1241,, -Gora,,Papua New Guinea,,gora1264,, +Gora,,Papua New Guinea,,gora1266,Dialect of AOMIE, Nami,,Papua New Guinea,,nami1257,, Mesan,,Papua New Guinea,,mesa1247,, MANAGALASI,,Papua New Guinea,,esee1247,, @@ -1763,8 +1765,8 @@ Manoa,,Papua New Guinea,,nucl1630,, SIRIO,,Papua New Guinea,,nawa1258,, BARAI,,Papua New Guinea,,nucl1630,Glottolog languoid with matching name and matching country (infered from island name), Barai,,Papua New Guinea,,nucl1630,Glottolog languoid with matching name and matching country (infered from island name), -Garia,,Papua New Guinea,,suma1270,, -KWALE,,Papua New Guinea,,uare1241,, +Garia,,Papua New Guinea,,uare1241,Dialect of KWALE, +KWALE,,Papua New Guinea,,uare1241,Glottolog language with matching location and MultiTree name Kwale in Humene-Kwale group, S-E KOIARI,,Papua New Guinea,,gras1249,, Lagume,,Papua New Guinea,,lagu1249,, baiba,,Papua New Guinea,,nucl1630,Unspecified dialect of BARAI; label 'Nigubaiba' overlaps polygon partially, @@ -1877,7 +1879,7 @@ SARAM,,Papua New Guinea,,saka1292,, IRUMU,,Papua New Guinea,,tuma1250,, Wapu,,Papua New Guinea,,wapu1240,, YAGAWAK,,Papua New Guinea,,yaga1259,, -BAM,,Papua New Guinea,,biem1237,, +BAM,,Papua New Guinea,,bamm1240,BAM language neighbouring WANTOAT Central, EAST NEK,,Papua New Guinea,,east2506,, SOUTH NUK,,Papua New Guinea,,sout2935,Glottolog languoid with matching name and coordinate within polygons, NORTH NUK,,Papua New Guinea,,nort2911,, @@ -1941,7 +1943,8 @@ Kami-Kulaka,,Papua New Guinea,,kami1259,, Kamate,,Papua New Guinea,,kama1369,, NOMANE,,Papua New Guinea,,noma1262,Glottolog languoid with matching name and matching country (infered from island name), Kiari,,Papua New Guinea,,kiar1239,, -Ono,,Papua New Guinea,,onoo1247,, +Ono SIANE,,Papua New Guinea,,onaa1244,Ono dialect of SIANE, +Ono,,Papua New Guinea,,onoo1247,Ono dialect of WERI, Koreipa,,Papua New Guinea,,kole1238,, Komunku,,Papua New Guinea,,komo1263,, Lambau,,Papua New Guinea,,lamb1275,Glottolog languoid with matching name and coordinate within polygons, @@ -1960,7 +1963,7 @@ Kai,,Papua New Guinea,,kiaa1241,Typo transcribing 'Kai' dialect of MARIGL; in Gl MARIGL,,Papua New Guinea,,mari1436,Glottolog languoid with matching name and coordinate within polygons, Yuri,,Papua New Guinea,,yuri1250,Matching dialect name in matching location with matching neighbor Marigl, Eia,,Papua New Guinea,,goli1247,Typo transcribing 'Era' dialect of MARIGL; in Glottolog matched to Golin with dialects matching the neighboring dialects, -MARI,,,,mari1429,, +MARI,,,,mari1429,Mari in Madang province, BINUMARIEN,,,,binu1245,, GAHUKU,,,,gahu1246,, KENATI,,,,kena1250,, @@ -2070,7 +2073,7 @@ YANINGARA,,,,kani1285,Typo transcribing KANINGARA from Atlas which we map to Kan LAEKO-LIBUAT,,Papua New Guinea,,laek1243,Glottolog languoid with matching name and matching country (infered from island name), BELI,,Papua New Guinea,,beli1258,Glottolog languoid with closely matching name and coordinate within polygons, PAHI,,Papua New Guinea,,pahi1246,Glottolog languoid with matching name and matching country (infered from island name), -AUQU,,Papua New Guinea,,augu1243,, +AUQU,,Papua New Guinea,,kalo1262,Polygon is labeled KALOU in the Atlas, PASI,,,,pasi1259,Glottolog languoid with matching name and coordinate within polygons, Tau,,Papua New Guinea,,tauu1239,, Apos,,Papua New Guinea,,apos1239,, @@ -2146,10 +2149,10 @@ NAFRI,Indonesia,,,nafr1241,Glottolog languoid with matching name and matching co East SENTANI,Indonesia,,,east2518,, MORWAP,,,,else1239,, TARPIA,Indonesia,,,tarp1240,Glottolog languoid with matching name and matching country, -Central TANAHMERAH,Indonesia,,,tana1288,, -Eastern TANAHMERAH,Indonesia,,,tana1288,, +Central TANAHMERAH,Indonesia,,,tabl1243,Tabla has Ruhlen name Tanahmerah and is spoken in the area, +Eastern TANAHMERAH,Indonesia,,,tabl1243,Tabla has Ruhlen name Tanahmerah and is spoken in the area, DEMTA,Indonesia,,,demt1241,Glottolog languoid with matching name and matching country, -Western TANAHMERAH,Indonesia,,,tana1288,, +Western TANAHMERAH,Indonesia,,,tabl1243,Tabla has Ruhlen name Tanahmerah and is spoken in the area, Makwei,Indonesia,,,mekw1241,, SOBEI,Indonesia,,,sobe1238,Glottolog languoid with matching name and matching country, BONGO,Indonesia,,,bong1287,Glottolog languoid with closely matching name and coordinate within polygons, @@ -2283,6 +2286,7 @@ BARUYA,,Papua New Guinea,,baru1267,Glottolog languoid with matching name and mat KEURU Aheave,,Papua New Guinea,,ahea1235,, DYUGUN,Australia,,Australia,dyug1238,Glottolog languoid with matching name and matching country, Djeebbanan Family-Level Isolate,Australia,,Australia,djee1236,, +Laragyan,Australia,,Australia,lara1258,matched to Glottolog language with matching location and AIATSIS name, Burarran Family,Australia,,Australia,bure1238,, Nakkaran Family-Level Isolate,Australia,,Australia,naka1260,, Gunwinyguan Family,Australia,,Australia,gunw1250,, @@ -2620,7 +2624,7 @@ Karore,,Papua New Guinea,,karo1298,Glottolog languoid with matching name and mat Sengseng,,Papua New Guinea,,seng1281,Glottolog languoid with matching name and matching country (infered from island name), Kaulong,,Papua New Guinea,,kaul1240,Glottolog languoid with matching name and matching country (infered from island name), Miu,,Papua New Guinea,,miuu1237,Glottolog languoid with matching name and matching country (infered from island name), -Gimi,,Papua New Guinea,,gimi1243,, +Gimi,,Papua New Guinea,,gimi1242,Listed as dialect of MOEWEHAFEN; matched to Glottolog language with name Gimi and matching location, RAUTO,,Papua New Guinea,,puli1237,, LAMOGAI,,Papua New Guinea,,lamo1244,Glottolog languoid with matching name and matching country (infered from island name), Babagarupu,,Papua New Guinea,,baba1269,, @@ -2673,7 +2677,7 @@ LUKEP,,,,loke1237,, MANGAP,,,,mbul1263,, WERI Biaru-Waria,,Papua New Guinea,,biar1240,, Sepoe,,Papua New Guinea,,sepo1239,Glottolog languoid with matching name and matching country (infered from island name), -Morigi-lokea,,Papua New Guinea,,mori1271,, +Morigi-lokea,,Papua New Guinea,,sepo1239,Glottolog dialect with matching MultiTree name; dialect of TOARIPI, Moreave Toaripi,,Papua New Guinea,,nucl1581,, DOBU,,Papua New Guinea,,dobu1241,Glottolog languoid with matching name and matching country (infered from island name), SEWA BAY,,Papua New Guinea,,sewa1251,Glottolog languoid with matching name and matching country (infered from island name), @@ -2697,7 +2701,7 @@ Asiaoro,,,,asia1264,Glottolog languoid with matching name and coordinate within Pauia,,,,uppe1428,"A dialect of HEWA in the Atlas, at the upper Lagaip", Yoliapi,,,,hewa1241,Glottolog language with matching ELCAT name, WOGEO,,,,woge1237,, -BAM,,,,biem1237,, +BAM,,,,biem1237,Glottolog language with matching location and MultiTree name Bam, BOIKEN Island,,,,boik1241,, BUNGAIN,,Papua New Guinea,,bung1270,Glottolog languoid with matching name and matching country (infered from island name), Kaiep,,Papua New Guinea,,kaie1237,Glottolog languoid with matching name and matching country (infered from island name),