-
-
Notifications
You must be signed in to change notification settings - Fork 69
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #140 from gitfrosh/beta
v2.5.0
- Loading branch information
Showing
5 changed files
with
88 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import os | ||
import pandas as pd | ||
import json | ||
import regex as re | ||
|
||
def transform_objectid(text): | ||
"""Replace MongoDB ObjectId references to proper JSON format.""" | ||
# Use non-capturing group and directly format the string with $oid. | ||
pattern = r'ObjectId\(([^)]+)\)' | ||
replacements = re.findall(pattern, text) | ||
for r in replacements: | ||
text = text.replace(f'ObjectId({r})', f'{{"$oid": "{r}"}}') | ||
return text | ||
|
||
def main(): | ||
os.makedirs('db/json', exist_ok=True) # Ensure the directory for JSON files exists | ||
csv_files = [f for f in os.listdir('db/csv') if f.endswith('.csv')] | ||
|
||
for file in csv_files: | ||
df = pd.read_csv(f'db/csv/{file}') | ||
# Transform all string columns that may contain ObjectId references | ||
for column in df.select_dtypes(include=['object']): | ||
df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if pd.notna(x) else x) | ||
# Convert transformed string JSON to actual JSON objects | ||
for column in df.select_dtypes(include=['object']): | ||
df[column] = df[column].apply(lambda x: json.loads(x) if pd.notna(x) and x.startswith('{') else x) | ||
# Save each dataframe as a JSON file with all objects in a single array | ||
json_path = f'db/json/{file.replace(".csv", ".json")}' | ||
df.to_json(json_path, orient='records', indent=4) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/bin/bash | ||
|
||
# Script to import JSON files to MongoDB collections | ||
for file in db/json/*.json; do | ||
# Extract the collection name from the filename | ||
collection=$(basename "$file" .json) | ||
echo "Importing $file to collection $collection" | ||
# Run mongoimport command | ||
mongoimport --type json --uri "$MONGODB_URI" --collection $collection --file "$file" --drop --maintainInsertionOrder --jsonArray | ||
done |