Skip to content

Commit

Permalink
Merge pull request #140 from gitfrosh/beta
Browse files Browse the repository at this point in the history
v2.5.0
  • Loading branch information
gitfrosh authored Apr 29, 2024
2 parents 617e1eb + bb1bc34 commit 35e064d
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 31 deletions.
23 changes: 7 additions & 16 deletions .github/workflows/data-upgrade.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,14 @@ jobs:
- name: Install pandas
run: pip install pandas

- name: Install regex
run: pip install regex

- name: Convert CSV to JSON
run: |
import os
import pandas as pd
os.makedirs('db/json', exist_ok=True)
csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')]
for file in csv_files:
df = pd.read_csv(f'db/csv/{file}')
json_path = f'db/json/{file.replace(".csv", ".json")}'
df.to_json(json_path, orient='records', lines=True)
print("Conversion complete.")
run: python db/convert-csv-to-json.py

- name: Upload JSON files as artifacts
uses: actions/upload-artifact@v4
with:
name: json-files
path: db/json/
uses: actions/upload-artifact@v4
with:
name: json-files
path: db/json/
52 changes: 38 additions & 14 deletions .github/workflows/release-new-data.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ on:
branches:
- main
pull_request:
branches:
- main
paths:
- 'db/**'

Expand All @@ -25,25 +27,47 @@ jobs:
python-version: '3.x'

- name: Install pandas
run: pip install pandas
run: pip install pandas

- name: Install regex
run: pip install regex

- name: Convert CSV to JSON
run: |
import os
import pandas as pd
os.makedirs('db/json', exist_ok=True)
csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')]
for file in csv_files:
df = pd.read_csv(f'db/csv/{file}')
json_path = f'db/json/{file.replace(".csv", ".json")}'
df.to_json(json_path, orient='records', lines=True)
print("Conversion complete.")
run: python db/convert-csv-to-json.py

- name: Upload JSON files as artifacts
uses: actions/upload-artifact@v4
with:
name: json-files
path: db/json/
path: db/json/

mongoimport:
needs: convert
runs-on: ubuntu-latest
strategy:
matrix:
mongodb-version: ['6.0']
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Download JSON files as artifacts
uses: actions/download-artifact@v4
with:
name: json-files
path: db/json/

- name: Install MongoDB Tools
run: |
wget -qO - https://www.mongodb.org/static/pgp/server-5.0.asc | sudo apt-key add -
echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu $(lsb_release -sc)/mongodb-org/5.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-5.0.list
sudo apt-get update
sudo apt-get install -y mongodb-database-tools
- name: Import to MongoDB
run: |
chmod +x ./db/import_json_to_mongo.sh
./db/import_json_to_mongo.sh
shell: bash
env:
MONGODB_URI: ${{ secrets.MONGODB_URI }}
32 changes: 32 additions & 0 deletions db/convert-csv-to-json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
import pandas as pd
import json
import regex as re

def transform_objectid(text):
"""Replace MongoDB ObjectId references to proper JSON format."""
# Use non-capturing group and directly format the string with $oid.
pattern = r'ObjectId\(([^)]+)\)'
replacements = re.findall(pattern, text)
for r in replacements:
text = text.replace(f'ObjectId({r})', f'{{"$oid": "{r}"}}')
return text

def main():
os.makedirs('db/json', exist_ok=True) # Ensure the directory for JSON files exists
csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')]

for file in csv_files:
df = pd.read_csv(f'db/csv/{file}')
# Transform all string columns that may contain ObjectId references
for column in df.select_dtypes(include=['object']):
df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if pd.notna(x) else x)
# Convert transformed string JSON to actual JSON objects
for column in df.select_dtypes(include=['object']):
df[column] = df[column].apply(lambda x: json.loads(x) if pd.notna(x) and x.startswith('{') else x)
# Save each dataframe as a JSON file with all objects in a single array
json_path = f'db/json/{file.replace(".csv", ".json")}'
df.to_json(json_path, orient='records', indent=4)

if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion db/csv/quotes.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dialog,movie,character,_id
Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9)
Deagol!!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7e9)
Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ea)
Deagol!,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7eb)
Give us that! Deagol my love,ObjectId(5cd95395de30eff6ebccde5d),ObjectId(5cd99d4bde30eff6ebccfe9e),ObjectId(5cd96e05de30eff6ebcce7ec)
Expand Down
10 changes: 10 additions & 0 deletions db/import_json_to_mongo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# Script to import JSON files to MongoDB collections
for file in db/json/*.json; do
# Extract the collection name from the filename
collection=$(basename "$file" .json)
echo "Importing $file to collection $collection"
# Run mongoimport command
mongoimport --type json --uri "$MONGODB_URI" --collection $collection --file "$file" --drop --maintainInsertionOrder --jsonArray
done

0 comments on commit 35e064d

Please sign in to comment.