From 141ba60b9407b130f02a29fc751c6b71c1d33ac5 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 16 Apr 2024 22:46:11 +0200 Subject: [PATCH 01/25] fix: syntax --- .github/workflows/data-upgrade.yml | 8 ++++---- .github/workflows/release-new-data.yml | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index bf20a9f..b415833 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -41,7 +41,7 @@ jobs: print("Conversion complete.") - name: Upload JSON files as artifacts - uses: actions/upload-artifact@v4 - with: - name: json-files - path: db/json/ \ No newline at end of file + uses: actions/upload-artifact@v4 + with: + name: json-files + path: db/json/ \ No newline at end of file diff --git a/.github/workflows/release-new-data.yml b/.github/workflows/release-new-data.yml index 26a5f3d..b5898f9 100644 --- a/.github/workflows/release-new-data.yml +++ b/.github/workflows/release-new-data.yml @@ -7,6 +7,8 @@ on: branches: - main pull_request: + branches: + - main paths: - 'db/**' @@ -25,11 +27,11 @@ jobs: python-version: '3.x' - name: Install pandas - run: pip install pandas + run: pip install pandas - name: Convert CSV to JSON - run: | + run: | import os import pandas as pd From 9bd0fd98b8148309ab874fdfee9dc1707c86ff5b Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 16 Apr 2024 22:47:45 +0200 Subject: [PATCH 02/25] fix: test data --- db/csv/quotes.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 5295bd6..22db7e7 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From c7b14062922c6b9fa2ddf01ffa06093cb50d406f Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 16 Apr 2024 22:51:36 +0200 Subject: [PATCH 03/25] fix: try running in python interpreter --- .github/workflows/data-upgrade.yml | 12 +----------- db/csv/quotes.csv | 2 +- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index b415833..4ee0d54 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -25,20 +25,10 @@ jobs: - name: Install pandas run: pip install pandas - - name: Convert CSV to JSON run: | - import os - import pandas as pd - - os.makedirs('db/json', exist_ok=True) - csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')] + python -c "import os; import pandas as pd; os.makedirs('db/json', exist_ok=True); csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')]; [pd.read_csv(f'db/csv/{file}').to_json(f'db/json/{file.replace('.csv', '.json')}', orient='records', lines=True) for file in csv_files]" - for file in csv_files: - df = pd.read_csv(f'db/csv/{file}') - json_path = f'db/json/{file.replace(".csv", ".json")}' - df.to_json(json_path, orient='records', lines=True) - print("Conversion complete.") - name: Upload JSON files as artifacts uses: actions/upload-artifact@v4 diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 22db7e7..5295bd6 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From 235705c15ba8a03fa18d73da7e99424b445f60bc Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 16 Apr 2024 23:01:10 +0200 Subject: [PATCH 04/25] fix: update json handling --- .github/workflows/data-upgrade.yml | 15 ++++++++++++++- db/csv/quotes.csv | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index 4ee0d54..c785400 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -27,7 +27,20 @@ jobs: - name: Convert CSV to JSON run: | - python -c "import os; import pandas as pd; os.makedirs('db/json', exist_ok=True); csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')]; [pd.read_csv(f'db/csv/{file}').to_json(f'db/json/{file.replace('.csv', '.json')}', orient='records', lines=True) for file in csv_files]" + python -c "\ + import os; \ + import pandas as pd; \ + import regex as re; \ + def transform_objectid(text): \ + return re.sub(r'ObjectId\\((.*?)\\)', r'{\"$oid\": \"\\1\"}', text); \ + os.makedirs('db/json', exist_ok=True); \ + csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')]; \ + for file in csv_files: \ + df = pd.read_csv(f'db/csv/{file}'); \ + for column in df.select_dtypes(include=['object']): \ + df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if x else x); \ + json_path = f'db/json/{file.replace('.csv', '.json')}; \ + df.to_json(json_path, orient='records', lines=True);" - name: Upload JSON files as artifacts diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 5295bd6..22db7e7 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From 81e57210f6bfe3885e545ffeb67221668af1056e Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 16 Apr 2024 23:03:43 +0200 Subject: [PATCH 05/25] fix: syntax? --- .github/workflows/data-upgrade.yml | 23 ++++++++++------------- db/csv/quotes.csv | 2 +- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index c785400..6dfcc2b 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -27,19 +27,16 @@ jobs: - name: Convert CSV to JSON run: | - python -c "\ - import os; \ - import pandas as pd; \ - import regex as re; \ - def transform_objectid(text): \ - return re.sub(r'ObjectId\\((.*?)\\)', r'{\"$oid\": \"\\1\"}', text); \ - os.makedirs('db/json', exist_ok=True); \ - csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')]; \ - for file in csv_files: \ - df = pd.read_csv(f'db/csv/{file}'); \ - for column in df.select_dtypes(include=['object']): \ - df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if x else x); \ - json_path = f'db/json/{file.replace('.csv', '.json')}; \ + python -c "import os; import pandas as pd; import regex as re;\ + def transform_objectid(text):\ + return re.sub(r'ObjectId\\((.*?)\\)', r'{\"$oid\": \"\\1\"}', text);\ + os.makedirs('db/json', exist_ok=True);\ + csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')];\ + for file in csv_files:\ + df = pd.read_csv(f'db/csv/{file}');\ + for column in df.select_dtypes(include=['object']):\ + df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if x else x);\ + json_path = 'db/json/' + file.replace('.csv', '.json');\ df.to_json(json_path, orient='records', lines=True);" diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 22db7e7..5295bd6 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From f023892c1e9f9a314d94c6f2f9ff091cb030d965 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 16 Apr 2024 23:09:39 +0200 Subject: [PATCH 06/25] chore: exclude python script --- .github/workflows/data-upgrade.yml | 17 ++++------------- db/convert-csv-to-json.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 13 deletions(-) create mode 100644 db/convert-csv-to-json.py diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index 6dfcc2b..a4f1615 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -25,20 +25,11 @@ jobs: - name: Install pandas run: pip install pandas - - name: Convert CSV to JSON - run: | - python -c "import os; import pandas as pd; import regex as re;\ - def transform_objectid(text):\ - return re.sub(r'ObjectId\\((.*?)\\)', r'{\"$oid\": \"\\1\"}', text);\ - os.makedirs('db/json', exist_ok=True);\ - csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')];\ - for file in csv_files:\ - df = pd.read_csv(f'db/csv/{file}');\ - for column in df.select_dtypes(include=['object']):\ - df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if x else x);\ - json_path = 'db/json/' + file.replace('.csv', '.json');\ - df.to_json(json_path, orient='records', lines=True);" + - name: Install regex + run: pip install regex + - name: Convert CSV to JSON + run: python db/convert_csv_to_json.py - name: Upload JSON files as artifacts uses: actions/upload-artifact@v4 diff --git a/db/convert-csv-to-json.py b/db/convert-csv-to-json.py new file mode 100644 index 0000000..0e88425 --- /dev/null +++ b/db/convert-csv-to-json.py @@ -0,0 +1,21 @@ +import os +import pandas as pd +import regex as re + +def transform_objectid(text): + """Replace MongoDB ObjectId references to proper JSON format.""" + return re.sub(r'ObjectId\((.*?)\)', r'{"$oid": "\1"}', text) + +def main(): + os.makedirs('db/json', exist_ok=True) + csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')] + + for file in csv_files: + df = pd.read_csv(f'db/csv/{file}') + for column in df.select_dtypes(include=['object']): + df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if x else x) + json_path = f'db/json/{file.replace(".csv", ".json")}' + df.to_json(json_path, orient='records', lines=True) + +if __name__ == "__main__": + main() \ No newline at end of file From 8208b0ade3e0eae78b94675ff45dd75517efc703 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 16 Apr 2024 23:10:59 +0200 Subject: [PATCH 07/25] fix: file path --- .github/workflows/data-upgrade.yml | 2 +- db/csv/quotes.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index a4f1615..e32994f 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -29,7 +29,7 @@ jobs: run: pip install regex - name: Convert CSV to JSON - run: python db/convert_csv_to_json.py + run: python db/convert-csv-to-json.py - name: Upload JSON files as artifacts uses: actions/upload-artifact@v4 diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 5295bd6..22db7e7 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From bf2dc6261c4aa4db78f6d94ee81406105bfcd33e Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 16 Apr 2024 23:15:41 +0200 Subject: [PATCH 08/25] fix: remove backlashes --- db/convert-csv-to-json.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/db/convert-csv-to-json.py b/db/convert-csv-to-json.py index 0e88425..e7fa5dc 100644 --- a/db/convert-csv-to-json.py +++ b/db/convert-csv-to-json.py @@ -4,17 +4,25 @@ def transform_objectid(text): """Replace MongoDB ObjectId references to proper JSON format.""" - return re.sub(r'ObjectId\((.*?)\)', r'{"$oid": "\1"}', text) + # This pattern captures the contents inside ObjectId() + pattern = re.compile(r'ObjectId\((.*?)\)') + # Replace using a function to format as a dictionary object + return pattern.sub(lambda match: f'{{"$oid": "{match.group(1)}"}}', text) def main(): - os.makedirs('db/json', exist_ok=True) - csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')] + csv_folder = 'db/csv/' + json_folder = 'db/json/' + os.makedirs(json_folder, exist_ok=True) + csv_files = [f for f in os.listdir(csv_folder) if f.endswith('.csv')] for file in csv_files: - df = pd.read_csv(f'db/csv/{file}') + file_path = os.path.join(csv_folder, file) + df = pd.read_csv(file_path) + # Apply transformation to each string column for column in df.select_dtypes(include=['object']): - df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if x else x) - json_path = f'db/json/{file.replace(".csv", ".json")}' + df[column] = df[column].apply(transform_objectid) + json_path = os.path.join(json_folder, file.replace(".csv", ".json")) + # Use pandas to_json method to ensure proper JSON formatting df.to_json(json_path, orient='records', lines=True) if __name__ == "__main__": From cd5be2976ecb05a3e53c54cf07d03b4cf8eabfe7 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 16 Apr 2024 23:20:04 +0200 Subject: [PATCH 09/25] fix: array problem --- .github/workflows/data-upgrade.yml | 3 +++ db/convert-csv-to-json.py | 30 +++++++++++++++--------------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index e32994f..685d63e 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -28,6 +28,9 @@ jobs: - name: Install regex run: pip install regex + - name: Install json + run: pip install json + - name: Convert CSV to JSON run: python db/convert-csv-to-json.py diff --git a/db/convert-csv-to-json.py b/db/convert-csv-to-json.py index e7fa5dc..31f8beb 100644 --- a/db/convert-csv-to-json.py +++ b/db/convert-csv-to-json.py @@ -1,29 +1,29 @@ import os import pandas as pd import regex as re +import json # Needed to write JSON files def transform_objectid(text): """Replace MongoDB ObjectId references to proper JSON format.""" - # This pattern captures the contents inside ObjectId() - pattern = re.compile(r'ObjectId\((.*?)\)') - # Replace using a function to format as a dictionary object - return pattern.sub(lambda match: f'{{"$oid": "{match.group(1)}"}}', text) + return re.sub(r'ObjectId\((.*?)\)', r'{"$oid": "\1"}', text) def main(): - csv_folder = 'db/csv/' - json_folder = 'db/json/' - os.makedirs(json_folder, exist_ok=True) - csv_files = [f for f in os.listdir(csv_folder) if f.endswith('.csv')] + os.makedirs('db/json', exist_ok=True) + csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')] + all_data = [] # List to collect all data + for file in csv_files: - file_path = os.path.join(csv_folder, file) - df = pd.read_csv(file_path) - # Apply transformation to each string column + df = pd.read_csv(f'db/csv/{file}') for column in df.select_dtypes(include=['object']): - df[column] = df[column].apply(transform_objectid) - json_path = os.path.join(json_folder, file.replace(".csv", ".json")) - # Use pandas to_json method to ensure proper JSON formatting - df.to_json(json_path, orient='records', lines=True) + df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if x else x) + # Convert DataFrame to a list of dictionaries and extend all_data with these records + all_data.extend(df.to_dict(orient='records')) + + # Write all collected data to a single JSON file + json_path = 'db/json/all_data.json' + with open(json_path, 'w') as f: + json.dump(all_data, f, indent=4) if __name__ == "__main__": main() \ No newline at end of file From f3a0a056126e11722d6316e869c2ad6698ec84fe Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 16 Apr 2024 23:21:14 +0200 Subject: [PATCH 10/25] fix: json issue --- .github/workflows/data-upgrade.yml | 3 --- db/csv/quotes.csv | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index 685d63e..e32994f 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -28,9 +28,6 @@ jobs: - name: Install regex run: pip install regex - - name: Install json - run: pip install json - - name: Convert CSV to JSON run: python db/convert-csv-to-json.py diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 22db7e7..5295bd6 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From ef6ae8f83303ace4d886e2c135e5c869bfd8616e Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 16 Apr 2024 23:23:32 +0200 Subject: [PATCH 11/25] wip --- db/convert-csv-to-json.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/db/convert-csv-to-json.py b/db/convert-csv-to-json.py index 31f8beb..8fc4382 100644 --- a/db/convert-csv-to-json.py +++ b/db/convert-csv-to-json.py @@ -1,29 +1,23 @@ import os import pandas as pd import regex as re -import json # Needed to write JSON files def transform_objectid(text): """Replace MongoDB ObjectId references to proper JSON format.""" return re.sub(r'ObjectId\((.*?)\)', r'{"$oid": "\1"}', text) def main(): - os.makedirs('db/json', exist_ok=True) + os.makedirs('db/json', exist_ok=True) # Ensure the directory for JSON files exists csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')] - all_data = [] # List to collect all data - for file in csv_files: df = pd.read_csv(f'db/csv/{file}') + # Transform all string columns that may contain ObjectId references for column in df.select_dtypes(include=['object']): df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if x else x) - # Convert DataFrame to a list of dictionaries and extend all_data with these records - all_data.extend(df.to_dict(orient='records')) - - # Write all collected data to a single JSON file - json_path = 'db/json/all_data.json' - with open(json_path, 'w') as f: - json.dump(all_data, f, indent=4) + # Save each dataframe as a JSON file with all objects in a single array + json_path = f'db/json/{file.replace(".csv", ".json")}' + df.to_json(json_path, orient='records', indent=4) if __name__ == "__main__": main() \ No newline at end of file From 86c6505288648744bcc3368d8d6b66ee4ec353eb Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 13:58:30 +0200 Subject: [PATCH 12/25] fix: remove '\' --- db/convert-csv-to-json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/convert-csv-to-json.py b/db/convert-csv-to-json.py index 8fc4382..e522204 100644 --- a/db/convert-csv-to-json.py +++ b/db/convert-csv-to-json.py @@ -17,7 +17,7 @@ def main(): df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if x else x) # Save each dataframe as a JSON file with all objects in a single array json_path = f'db/json/{file.replace(".csv", ".json")}' - df.to_json(json_path, orient='records', indent=4) + df.to_json(json_path, orient='records', indent=4, ensure_ascii=False) if __name__ == "__main__": main() \ No newline at end of file From 1345913ceb1ed6beb45656064113f8a2ebd74cea Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 14:05:28 +0200 Subject: [PATCH 13/25] fix: \ --- db/convert-csv-to-json.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/db/convert-csv-to-json.py b/db/convert-csv-to-json.py index e522204..9be3b81 100644 --- a/db/convert-csv-to-json.py +++ b/db/convert-csv-to-json.py @@ -4,7 +4,8 @@ def transform_objectid(text): """Replace MongoDB ObjectId references to proper JSON format.""" - return re.sub(r'ObjectId\((.*?)\)', r'{"$oid": "\1"}', text) + # Use non-capturing group and directly format the string with $oid. + return re.sub(r'ObjectId\(([^)]+)\)', r'{"$oid": "\1"}', text) def main(): os.makedirs('db/json', exist_ok=True) # Ensure the directory for JSON files exists @@ -14,10 +15,10 @@ def main(): df = pd.read_csv(f'db/csv/{file}') # Transform all string columns that may contain ObjectId references for column in df.select_dtypes(include=['object']): - df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if x else x) + df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if pd.notna(x) else x) # Save each dataframe as a JSON file with all objects in a single array json_path = f'db/json/{file.replace(".csv", ".json")}' - df.to_json(json_path, orient='records', indent=4, ensure_ascii=False) + df.to_json(json_path, orient='records', indent=4) if __name__ == "__main__": main() \ No newline at end of file From 07f03a778f061e00972eec073f05436c4aae63be Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 14:35:33 +0200 Subject: [PATCH 14/25] try fix --- db/convert-csv-to-json.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/db/convert-csv-to-json.py b/db/convert-csv-to-json.py index 9be3b81..327e6dd 100644 --- a/db/convert-csv-to-json.py +++ b/db/convert-csv-to-json.py @@ -1,11 +1,16 @@ import os import pandas as pd +import json import regex as re def transform_objectid(text): """Replace MongoDB ObjectId references to proper JSON format.""" # Use non-capturing group and directly format the string with $oid. - return re.sub(r'ObjectId\(([^)]+)\)', r'{"$oid": "\1"}', text) + pattern = r'ObjectId\(([^)]+)\)' + replacements = re.findall(pattern, text) + for r in replacements: + text = text.replace(f'ObjectId({r})', f'{{"$oid": "{r}"}}') + return text def main(): os.makedirs('db/json', exist_ok=True) # Ensure the directory for JSON files exists @@ -16,6 +21,9 @@ def main(): # Transform all string columns that may contain ObjectId references for column in df.select_dtypes(include=['object']): df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if pd.notna(x) else x) + # Convert transformed string JSON to actual JSON objects + for column in df.select_dtypes(include=['object']): + df[column] = df[column].apply(lambda x: json.loads(x) if pd.notna(x) and x.startswith('{') else x) # Save each dataframe as a JSON file with all objects in a single array json_path = f'db/json/{file.replace(".csv", ".json")}' df.to_json(json_path, orient='records', indent=4) From f6350d18569e9a9a35f3efb3f99309de4d66106d Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 15:17:48 +0200 Subject: [PATCH 15/25] feat: upload updated data --- .github/workflows/data-upgrade.yml | 26 +++++++++++++++++++++++++- db/csv/quotes.csv | 2 +- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index e32994f..515872d 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -35,4 +35,28 @@ jobs: uses: actions/upload-artifact@v4 with: name: json-files - path: db/json/ \ No newline at end of file + path: db/json/ + + #remove after testing + mongoimport: + needs: convert + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Download JSON files as artifacts + uses: actions/download-artifact@v4 + with: + name: json-files + path: db/json/ + + - name: Import to MongoDB + run: | + for file in db/json/*.json; do + collection=$(basename "$file" .json) + echo "Importing $file to collection $collection" + mongoimport --type json --uri ${{ secrets.MONGODB_URI }} --collection $collection --file "$file" --drop --maintainInsertionOrder --jsonArray + done + env: + MONGODB_URI: ${{ secrets.MONGODB_URI }} \ No newline at end of file diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 5295bd6..22db7e7 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From 4d5029bd98c9d90157a2132b3e880c3641e4bec6 Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 15:22:37 +0200 Subject: [PATCH 16/25] install mongodb tools --- .github/workflows/data-upgrade.yml | 7 +++++++ db/csv/quotes.csv | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index 515872d..2fa839f 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -51,6 +51,13 @@ jobs: name: json-files path: db/json/ + + - name: Install MongoDB Tools + run: | + sudo apt-get update + sudo apt-get install -y mongodb-database-tools + + - name: Import to MongoDB run: | for file in db/json/*.json; do diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 22db7e7..5295bd6 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From 4b8d3bb486d1423a35e28a2f3c85818ba56272b1 Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 15:30:42 +0200 Subject: [PATCH 17/25] mongo install? --- .github/workflows/data-upgrade.yml | 7 +++++-- db/csv/quotes.csv | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index 2fa839f..fc2e8ef 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -55,8 +55,11 @@ jobs: - name: Install MongoDB Tools run: | sudo apt-get update - sudo apt-get install -y mongodb-database-tools - + sudo apt-get install -y wget gnupg + wget -qO - https://www.mongodb.org/static/pgp/server-6.0.asc | sudo apt-key add - + echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/6.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-6.0.list + sudo apt-get update + sudo apt-get install -y mongodb-mongosh - name: Import to MongoDB run: | diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 5295bd6..22db7e7 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From 7fc41d2821330602200793e74562fa2808aee2fb Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 15:37:42 +0200 Subject: [PATCH 18/25] try mongo --- .github/workflows/data-upgrade.yml | 16 ++++++++-------- db/csv/quotes.csv | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index fc2e8ef..ee16b80 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -41,6 +41,10 @@ jobs: mongoimport: needs: convert runs-on: ubuntu-latest + # added for testing upload + strategy: + matrix: + mongodb-version: ['6.0'] steps: - name: Checkout repository uses: actions/checkout@v4 @@ -52,14 +56,10 @@ jobs: path: db/json/ - - name: Install MongoDB Tools - run: | - sudo apt-get update - sudo apt-get install -y wget gnupg - wget -qO - https://www.mongodb.org/static/pgp/server-6.0.asc | sudo apt-key add - - echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/6.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-6.0.list - sudo apt-get update - sudo apt-get install -y mongodb-mongosh + - name: Start MongoDB + uses: supercharge/mongodb-github-action@1.10.0 + with: + mongodb-version: ${{ matrix.mongodb-version }} - name: Import to MongoDB run: | diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 22db7e7..5295bd6 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From cc832197d0fdd0c2fd95ac90444d48929645acd4 Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 15:42:15 +0200 Subject: [PATCH 19/25] try again --- .github/workflows/data-upgrade.yml | 5 +---- db/csv/quotes.csv | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index ee16b80..cb54a0e 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -55,13 +55,10 @@ jobs: name: json-files path: db/json/ - - - name: Start MongoDB + - name: Import to MongoDB uses: supercharge/mongodb-github-action@1.10.0 with: mongodb-version: ${{ matrix.mongodb-version }} - - - name: Import to MongoDB run: | for file in db/json/*.json; do collection=$(basename "$file" .json) diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 5295bd6..22db7e7 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From e8ceb07938c1b2aaf39aec8760dfd1c282153713 Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 15:47:05 +0200 Subject: [PATCH 20/25] ... --- .github/workflows/data-upgrade.yml | 10 +++++++--- db/csv/quotes.csv | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index cb54a0e..dbc5a0a 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -55,10 +55,14 @@ jobs: name: json-files path: db/json/ + - name: Install MongoDB Tools + run: | + wget -qO - https://www.mongodb.org/static/pgp/server-5.0.asc | sudo apt-key add - + echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu $(lsb_release -sc)/mongodb-org/5.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-5.0.list + sudo apt-get update + sudo apt-get install -y mongodb-database-tools + - name: Import to MongoDB - uses: supercharge/mongodb-github-action@1.10.0 - with: - mongodb-version: ${{ matrix.mongodb-version }} run: | for file in db/json/*.json; do collection=$(basename "$file" .json) diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 22db7e7..5295bd6 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From 074b129fbc6d812dfd7d9086326da6341235c48c Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 15:50:53 +0200 Subject: [PATCH 21/25] ... --- .github/workflows/data-upgrade.yml | 2 +- db/csv/quotes.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index dbc5a0a..cf0c692 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -67,7 +67,7 @@ jobs: for file in db/json/*.json; do collection=$(basename "$file" .json) echo "Importing $file to collection $collection" - mongoimport --type json --uri ${{ secrets.MONGODB_URI }} --collection $collection --file "$file" --drop --maintainInsertionOrder --jsonArray + mongoimport --type json --uri ${{ secrets.MONGODB_URI }} --collection "$collection" --file "$file" --drop --maintainInsertionOrder --jsonArray done env: MONGODB_URI: ${{ secrets.MONGODB_URI }} \ No newline at end of file diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 5295bd6..22db7e7 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From d22e237e53e86171fa18f0df01a625bea734410d Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 15:58:32 +0200 Subject: [PATCH 22/25] ... --- .github/workflows/data-upgrade.yml | 7 +------ db/csv/quotes.csv | 2 +- db/import_json_to_mongo.sh | 10 ++++++++++ 3 files changed, 12 insertions(+), 7 deletions(-) create mode 100644 db/import_json_to_mongo.sh diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index cf0c692..c34d195 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -63,11 +63,6 @@ jobs: sudo apt-get install -y mongodb-database-tools - name: Import to MongoDB - run: | - for file in db/json/*.json; do - collection=$(basename "$file" .json) - echo "Importing $file to collection $collection" - mongoimport --type json --uri ${{ secrets.MONGODB_URI }} --collection "$collection" --file "$file" --drop --maintainInsertionOrder --jsonArray - done + run: ./import_json_to_mongo.sh env: MONGODB_URI: ${{ secrets.MONGODB_URI }} \ No newline at end of file diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 22db7e7..5295bd6 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) diff --git a/db/import_json_to_mongo.sh b/db/import_json_to_mongo.sh new file mode 100644 index 0000000..204bc51 --- /dev/null +++ b/db/import_json_to_mongo.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Script to import JSON files to MongoDB collections +for file in db/json/*.json; do + # Extract the collection name from the filename + collection=$(basename "$file" .json) + echo "Importing $file to collection $collection" + # Run mongoimport command + mongoimport --type json --uri "$MONGODB_URI" --collection $collection --file "$file" --drop --maintainInsertionOrder --jsonArray +done \ No newline at end of file From f60a8d2cd33594f08c8695ce5aab38c67fc9c399 Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 16:00:12 +0200 Subject: [PATCH 23/25] fix path --- .github/workflows/data-upgrade.yml | 2 +- db/csv/quotes.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index c34d195..967097a 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -63,6 +63,6 @@ jobs: sudo apt-get install -y mongodb-database-tools - name: Import to MongoDB - run: ./import_json_to_mongo.sh + run: db/import_json_to_mongo.sh env: MONGODB_URI: ${{ secrets.MONGODB_URI }} \ No newline at end of file diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 5295bd6..22db7e7 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From 7d13e3de0bc1c5a349336092ce9d7544ffde0f19 Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 16:05:49 +0200 Subject: [PATCH 24/25] meh --- .github/workflows/data-upgrade.yml | 7 +++++-- db/csv/quotes.csv | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index 967097a..7c9b6d1 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -63,6 +63,9 @@ jobs: sudo apt-get install -y mongodb-database-tools - name: Import to MongoDB - run: db/import_json_to_mongo.sh + run: | + chmod +x ./db/import_json_to_mongo.sh + ./db/import_json_to_mongo.sh + shell: bash env: - MONGODB_URI: ${{ secrets.MONGODB_URI }} \ No newline at end of file + MONGODB_URI: ${{ secrets.MONGODB_URI }} \ No newline at end of file diff --git a/db/csv/quotes.csv b/db/csv/quotes.csv index 22db7e7..5295bd6 100644 --- a/db/csv/quotes.csv +++ b/db/csv/quotes.csv @@ -1,5 +1,5 @@ dialog,movie,character,_id -Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) +Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea) Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb) Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec) From d06748faee406d1f5db44355f76f4a90c2057a79 Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 27 Apr 2024 16:12:34 +0200 Subject: [PATCH 25/25] fix ymls --- .github/workflows/data-upgrade.yml | 35 +------------------- .github/workflows/release-new-data.yml | 46 ++++++++++++++++++-------- 2 files changed, 34 insertions(+), 47 deletions(-) diff --git a/.github/workflows/data-upgrade.yml b/.github/workflows/data-upgrade.yml index 7c9b6d1..e32994f 100644 --- a/.github/workflows/data-upgrade.yml +++ b/.github/workflows/data-upgrade.yml @@ -35,37 +35,4 @@ jobs: uses: actions/upload-artifact@v4 with: name: json-files - path: db/json/ - - #remove after testing - mongoimport: - needs: convert - runs-on: ubuntu-latest - # added for testing upload - strategy: - matrix: - mongodb-version: ['6.0'] - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Download JSON files as artifacts - uses: actions/download-artifact@v4 - with: - name: json-files - path: db/json/ - - - name: Install MongoDB Tools - run: | - wget -qO - https://www.mongodb.org/static/pgp/server-5.0.asc | sudo apt-key add - - echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu $(lsb_release -sc)/mongodb-org/5.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-5.0.list - sudo apt-get update - sudo apt-get install -y mongodb-database-tools - - - name: Import to MongoDB - run: | - chmod +x ./db/import_json_to_mongo.sh - ./db/import_json_to_mongo.sh - shell: bash - env: - MONGODB_URI: ${{ secrets.MONGODB_URI }} \ No newline at end of file + path: db/json/ \ No newline at end of file diff --git a/.github/workflows/release-new-data.yml b/.github/workflows/release-new-data.yml index b5898f9..0a35c9a 100644 --- a/.github/workflows/release-new-data.yml +++ b/.github/workflows/release-new-data.yml @@ -31,21 +31,41 @@ jobs: - name: Convert CSV to JSON - run: | - import os - import pandas as pd - - os.makedirs('db/json', exist_ok=True) - csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')] - - for file in csv_files: - df = pd.read_csv(f'db/csv/{file}') - json_path = f'db/json/{file.replace(".csv", ".json")}' - df.to_json(json_path, orient='records', lines=True) - print("Conversion complete.") + run: python db/convert-csv-to-json.py - name: Upload JSON files as artifacts uses: actions/upload-artifact@v4 with: name: json-files - path: db/json/ \ No newline at end of file + path: db/json/ + + mongoimport: + needs: convert + runs-on: ubuntu-latest + strategy: + matrix: + mongodb-version: ['6.0'] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Download JSON files as artifacts + uses: actions/download-artifact@v4 + with: + name: json-files + path: db/json/ + + - name: Install MongoDB Tools + run: | + wget -qO - https://www.mongodb.org/static/pgp/server-5.0.asc | sudo apt-key add - + echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu $(lsb_release -sc)/mongodb-org/5.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-5.0.list + sudo apt-get update + sudo apt-get install -y mongodb-database-tools + + - name: Import to MongoDB + run: | + chmod +x ./db/import_json_to_mongo.sh + ./db/import_json_to_mongo.sh + shell: bash + env: + MONGODB_URI: ${{ secrets.MONGODB_URI }} \ No newline at end of file