closes #175

cldf · Apr 26, 2024 · b3f24f9 · b3f24f9
1 parent c49fdca
commit b3f24f9
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 2 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,8 +4,10 @@ The `pycldf` package adheres to [Semantic Versioning](http://semver.org/spec/v2.
 
 ## Unreleased
 
-Fixed bug whereby `dict` returned by `orm.Language.as_geojson_feature` could not be serialized
-by `json.dumps`.
+- Fixed bug whereby `dict` returned by `orm.Language.as_geojson_feature` could not be serialized
+  by `json.dumps`.
+- Fixed bug whereby SQLite conversion would fail when the name of a renamed column clashed with an
+  existing column name in the table.
 
 
 ## [1.37.1] - 2024-03-18

diff --git a/src/pycldf/db.py b/src/pycldf/db.py
@@ -181,14 +181,24 @@ def __init__(self, dataset: Dataset, **kw):
             except (KeyError, ValueError):
                 # If no table type can be determined, there's nothing to translate.
                 pass
+            new_col_names = []
             for col in table.tableSchema.columns:
                 if col.propertyUrl and col.propertyUrl.uri in TERMS.by_uri:
                     # Translate local column names to local names of CLDF Ontology terms, prefixed
                     # with `cldf_`:
                     col_name = 'cldf_{0.name}'.format(TERMS.by_uri[col.propertyUrl.uri])
+                    new_col_names.append(col_name.lower())
                     translations[table.local_name].columns[col.header] = col_name
                     self._retranslate[table.local_name][col_name] = col.header
 
+            for col in table.tableSchema.columns:
+                if not (col.propertyUrl and col.propertyUrl.uri in TERMS.by_uri):
+                    if col.header.lower() in new_col_names:
+                        # A name clash! We translate the old column name!
+                        col_name = '_{}'.format(col.header)
+                        translations[table.local_name].columns[col.header] = col_name
+                        self._retranslate[table.local_name][col_name] = col.header
+
         # Add source table:
         for src in self.dataset.sources:
             for key in src:

diff --git a/tests/test_db.py b/tests/test_db.py
@@ -90,6 +90,22 @@ def test_db_write_extra_columns(md):
         assert len(db.query("""select * from "extra.csv" """)[0]) == 1
 
 
+def test_db_write_clashing_columns(md):
+    ds = Generic.in_dir(md.parent)
+    # Create a table with one col that will be renamed to `cldf_id`, and one col `cldf_Id`.
+    ds.add_table(
+        'extra.csv',
+        dict(name='ID', propertyUrl='http://cldf.clld.org/v1.0/terms.rdf#id'),
+        'cldf_Id')
+    ds.write(md, **{'extra.csv': [dict(ID=1, cldf_Id='Name')]})
+    ds.write_metadata(md)
+
+    db = Database(ds, fname=md.parent / 'db.sqlite')
+    db.write_from_tg()  # Asserts we can write the db.
+    res = db.query("""select _cldf_Id from "extra.csv" """)
+    assert res[0][0] == 'Name', res  # and read!
+
+
 def test_db_write_tables_with_fks(md):
     ds = Generic.in_dir(md.parent)
     t1 = ds.add_table('t1.csv', 'ID', 'Name')