moja-global · Joyakis · Jan 18, 2021 · Jan 18, 2021 · Jan 18, 2021 · Jan 18, 2021
diff --git a/.dvc.yaml.swp b/.dvc.yaml.swp
diff --git a/.dvc/.gitignore b/.dvc/.gitignore
@@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
diff --git a/.dvc/config b/.dvc/config
@@ -0,0 +1,4 @@
+[core]
+    remote = mygoogledrive
+['remote "mygoogledrive"']
+    url = ../gdrive:1mNe5F-CMQBm8E8Ah13WhWyDBMV_5vQky
diff --git a/.dvcignore b/.dvcignore
@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
diff --git a/.github/workflows/process-json-file.yml b/.github/workflows/process-json-file.yml
@@ -0,0 +1,43 @@
+name: process-json-file
+
+on:
+  # Run the workflow every day at 6:00am UTC
+  schedule:
+    - cron: "0 6 * * *"
+
+jobs:
+  process-json:
+    runs-on: ubuntu-latest
+
+    steps:
+      # Checkout the code from the repository
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      # Install Python
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+
+      # Install dependencies
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+
+      # Run the Python script to process the JSON file
+      - name: Process JSON file
+        run: python data.py
+
+      # Commit changes to the repository
+      - name: Commit changes
+        run: |
+          git config --global user.name "Your Name"
+          git config --global user.email "[email protected]"
+          git add processed_data.json
+          git commit -m "Process data"
+
+      # Push changes to the remote repository
+      - name: Push changes
+        uses: ad-m/[email protected]
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/requirements.txt b/.github/workflows/requirements.txt
@@ -0,0 +1,3 @@
+geopandas 
+matplotlib
+pandas
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+*.tif
diff --git a/Data/Administrative/GlobalRoadsOpenAccess_gROADS.ipynb b/Data/Administrative/GlobalRoadsOpenAccess_gROADS.ipynb
diff --git a/Data/EcoRegion/.gitignore b/Data/EcoRegion/.gitignore
@@ -0,0 +1 @@
+/HoldridgeLifeZones.json
diff --git a/Data/EcoRegion/HoldridgeLifeZones.json.dvc b/Data/EcoRegion/HoldridgeLifeZones.json.dvc
@@ -0,0 +1,4 @@
+outs:
+- md5: 6f5c2d925d682513d2ec64e706659492
+  size: 1914744
+  path: HoldridgeLifeZones.json
diff --git a/Data/EcoRegion/HoldridgeLifeZones.yaml b/Data/EcoRegion/HoldridgeLifeZones.yaml
@@ -0,0 +1,6 @@
+path: <Land_Sector_Datasets/Data/EcoRegion>
+meta:
+   description: <LevelI,LevelII,LevelIII>
+   author: <joyakinyi>
+   email: <[email protected]>
+
diff --git a/Data/LandCover/Hansen v1.7 Global Forest Change.ipynb b/Data/LandCover/Hansen v1.7 Global Forest Change.ipynb
diff --git a/Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif b/Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif
diff --git a/Data/Soil/Transformed/transformed_file.tif b/Data/Soil/Transformed/transformed_file.tif
diff --git a/Data/data.py b/Data/data.py
@@ -0,0 +1,19 @@
+import json
+
+# Read in the JSON data
+with open('KEN_AL2_Kenya_GEZ.json', 'r') as f:
+    data = json.load(f)
+
+# Process the data
+processed_data = []
+for item in data:
+    # Perform some transformation on the data
+    processed_item = {
+        'name': item['name'],
+        'age': item['gez_code'] * 2
+    }
+    processed_data.append(processed_item)
+
+# Write the processed data to a new JSON file
+with open('processed_data.json', 'w') as f:
+    json.dump(processed_data, f)
diff --git a/Data/forest-management-data-2015/.gitignore b/Data/forest-management-data-2015/.gitignore
@@ -0,0 +1,5 @@
+/reference_data_set_updated.csv
+/metafile.txt
+*.csv
+
+
diff --git a/Data/forest-management-data-2015/metafile.txt.dvc b/Data/forest-management-data-2015/metafile.txt.dvc
@@ -0,0 +1,4 @@
+outs:
+- md5: e698f3972b8775c503e9a9fae56ad50b
+  size: 903
+  path: metafile.txt
diff --git a/Data/forest-management-data-2015/reference_data_set_updated.csv.dvc b/Data/forest-management-data-2015/reference_data_set_updated.csv.dvc
@@ -0,0 +1,4 @@
+outs:
+- md5: b6cc94f41fff3d6cd09f9a386090bcc6
+  size: 10776130
+  path: reference_data_set_updated.csv
diff --git a/Data/loaded_data.pkl b/Data/loaded_data.pkl
diff --git a/conda b/conda
diff --git a/dvc.lock b/dvc.lock
@@ -0,0 +1,17 @@
+schema: '2.0'
+stages:
+  extract:
+    cmd: echo "No extraction needed for tif file in Data/Soil"
+  transform:
+    cmd:
+    - gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -to SRC_METHOD=NO_GEOTRANSFORM -tr
+      0.5 0.5 -r near -te -180.0 -90.0 180.0 90.0 -te_srs EPSG:4326 -of GTiff Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif
+      Data/Soil/transformed_file.tif
+    deps:
+    - path: Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif
+      md5: cf9794c1d61bb6eeacaa10dfa5954931
+      size: 1038378
+    outs:
+    - path: Data/Soil/transformed_file.tif
+      md5: 2ec4f2db772d40135fb4abdc92e534dc
+      size: 1038378
diff --git a/dvc.yaml b/dvc.yaml
@@ -0,0 +1,12 @@
+stages:
+  load_data:
+    cmd: python load.py
+    outs:
+      - Data/loaded_data.pkl
+
+  process_data:
+    cmd: python process.py
+    deps:
+      - Data/loaded_data.pkl
+    outs:
+      - Data/processed_data.csv
diff --git a/load.py b/load.py
@@ -0,0 +1,16 @@
+import pandas as pd
+
+data_path = 'Data/forest-management-data-2015/reference_data_set_updated.csv'
+metafile_path = 'Data/forest-management-data-2015/metafile.txt'
+
+# Load the dataset
+df = pd.read_csv(data_path)
+
+# Load the metafile
+with open(metafile_path,'r',encoding='utf-8') as f:
+    metafile_contents = f.read()
+
+# Process the data and save the result
+df=df.dropna()
+# Save the whole DataFrame to pickle
+df.to_pickle('Data/loaded_data.pkl')