diff --git a/.dvc.yaml.swp b/.dvc.yaml.swp new file mode 100644 index 000000000..f8462fc4e Binary files /dev/null and b/.dvc.yaml.swp differ diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 000000000..69f937ac5 --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 000000000..bc6eecd93 --- /dev/null +++ b/.dvc/config @@ -0,0 +1,4 @@ +[core] + remote = mygoogledrive +['remote "mygoogledrive"'] + url = ../gdrive:1mNe5F-CMQBm8E8Ah13WhWyDBMV_5vQky diff --git a/.dvcignore b/.dvcignore new file mode 100644 index 000000000..2ccdbc99b --- /dev/null +++ b/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/.github/workflows/process-json-file.yml b/.github/workflows/process-json-file.yml new file mode 100644 index 000000000..375b73ab6 --- /dev/null +++ b/.github/workflows/process-json-file.yml @@ -0,0 +1,43 @@ +name: process-json-file + +on: + # Run the workflow every day at 6:00am UTC + schedule: + - cron: "0 6 * * *" + +jobs: + process-json: + runs-on: ubuntu-latest + + steps: + # Checkout the code from the repository + - name: Checkout code + uses: actions/checkout@v2 + + # Install Python + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + # Install dependencies + - name: Install dependencies + run: pip install -r requirements.txt + + # Run the Python script to process the JSON file + - name: Process JSON file + run: python data.py + + # Commit changes to the repository + - name: Commit changes + run: | + git config --global user.name "Your Name" + git config --global user.email "youremail@example.com" + git add processed_data.json + git commit -m "Process data" + + # Push changes to the remote repository + - name: Push changes + uses: ad-m/github-push-action@v0.5.0 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/requirements.txt b/.github/workflows/requirements.txt new file mode 100644 index 000000000..48989c65e --- /dev/null +++ b/.github/workflows/requirements.txt @@ -0,0 +1,3 @@ +geopandas +matplotlib +pandas \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..7d5f2acc6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.tif diff --git a/Data/Administrative/GlobalRoadsOpenAccess_gROADS.ipynb b/Data/Administrative/GlobalRoadsOpenAccess_gROADS.ipynb deleted file mode 100644 index 325395f88..000000000 --- a/Data/Administrative/GlobalRoadsOpenAccess_gROADS.ipynb +++ /dev/null @@ -1,81 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Title: Global Roads Open Access Data Set (gROADS), v1 (1980 – 2010)\n", - "\n", - "### Description\n", - "The Global Roads Open Access Data Set, Version 1 (gROADSv1) was developed under the auspices of the CODATA Global Roads Data Development Task Group. The data set combines the best available roads data by country into a global roads coverage, using the UN Spatial Data Infrastructure Transport (UNSDI-T) version 2 as a common data model. All country road networks have been joined topologically at the borders, and many countries have been edited for internal topology. Source data for each country are provided in the documentation, and users are encouraged to refer to the readme file for use constraints that apply to a small number of countries. Because the data are compiled from multiple sources, the date range for road network representations ranges from the 1980s to 2010 depending on the country (most countries have no confirmed date), and spatial accuracy varies. The baseline global data set was compiled by the Information Technology Outreach Services (ITOS) of the University of Georgia. Updated data for 27 countries and 6 smaller geographic entities were assembled by Columbia University's Center for International Earth Science Information Network (CIESIN), with a focus largely on developing countries with the poorest data coverage.\n", - "\n", - "### FLINT\n", - "This dataset has been pre-processed/checked and is suitable for use in FLINT. Please adhere to individual dataset licence conditions and citations. Processed data can be accessed here: https://datasets.mojaglobal.workers.dev/\n", - "\n", - "### Format\n", - "Extent: Global coverage
\n", - "Format: vector polyline geojson .json
\n", - "Cordinate system: EPSG:4326 (WGS84)
\n", - " Year: 1980-2010
\n", - "Size: 1GB\n", - "\n", - "### Original source\n", - "Original Source: Downloaded from: https://sedac.ciesin.columbia.edu/data/set/groads-global-roads-open-access-v1/docs
\n", - "Vector - polyline (Feature Class, ESRI Geodatabase)\n", - "\n", - "### Licence\n", - "Users may use and redistribute these data without explicit written permission from CIESIN or Information Technology Outreach Services (ITOS)/University of Georgia, with the exception of roads data associated with countries that are listed in the data documentation as requiring additional credits or holding special restrictions. Users are advised to consult the data documentation for further information and to obtain necessary permissions or adhere to relevant restrictions that apply to each of those data sets.\n", - "\n", - "### Citation\n", - "Center for International Earth Science Information Network - CIESIN - Columbia University, and Information Technology Outreach Services - ITOS - University of Georgia (2013). Global Roads Open Access Data Set, Version 1 (gROADSv1). Accessed 1st December 2020 from https://doi.org/10.7927/H4VD6WCT\n", - "\n", - "### Metadata\n", - "\n", - "\n", - "### Notes\n", - "Known issues: Overlapping road segments, potentially fixed by dissolving some attributes such as ROADID (both json types are provided in the library). Please note, this layer will take a long time to draw, with over 1.6 million segments.\n", - "\n", - "### Processing\n", - "Transform coordinate system to EPSG:4326 (WGS84) and raster to geotiff with cell alignment of 0.01degree ~1km." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - " #Code to transform coodinate system \n", - " gdalwarp -t_srs EPSG:4326 -tr 0.01 0.01 -r near -te -180.0 -90.0 180.0 90.0 -te_srs EPSG:4326 -of GTiff C:/Users/LennyJenny/Documents/ArcGIS/world/UNFCCC/downloads/GlobalScreeningHabiat/01_Data/crhab_lpu/hdr.adf C:/Users/LennyJenny/Documents/ArcGIS/world/UNFCCC/data_process/tiff/CriticalHabitatGlobalScreening.tif" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/Data/EcoRegion/.gitignore b/Data/EcoRegion/.gitignore new file mode 100644 index 000000000..36be213dc --- /dev/null +++ b/Data/EcoRegion/.gitignore @@ -0,0 +1 @@ +/HoldridgeLifeZones.json diff --git a/Data/EcoRegion/HoldridgeLifeZones.json.dvc b/Data/EcoRegion/HoldridgeLifeZones.json.dvc new file mode 100644 index 000000000..c0b653eeb --- /dev/null +++ b/Data/EcoRegion/HoldridgeLifeZones.json.dvc @@ -0,0 +1,4 @@ +outs: +- md5: 6f5c2d925d682513d2ec64e706659492 + size: 1914744 + path: HoldridgeLifeZones.json diff --git a/Data/EcoRegion/HoldridgeLifeZones.yaml b/Data/EcoRegion/HoldridgeLifeZones.yaml new file mode 100644 index 000000000..1f2be64e3 --- /dev/null +++ b/Data/EcoRegion/HoldridgeLifeZones.yaml @@ -0,0 +1,6 @@ +path: +meta: + description: + author: + email: + diff --git a/Data/LandCover/Hansen v1.7 Global Forest Change.ipynb b/Data/LandCover/Hansen v1.7 Global Forest Change.ipynb deleted file mode 100644 index b0a77f301..000000000 --- a/Data/LandCover/Hansen v1.7 Global Forest Change.ipynb +++ /dev/null @@ -1,141 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Title: Hansen v1.7 Global Forest Change\n", - "\n", - "### Description\n", - "Results from time-series analysis of Landsat images in characterizing global forest extent and change from 2000 through 2019. For additional information about these results, please see the associated journal article (Hansen et al., Science 2013).\n", - "\n", - "### FLINT\n", - "This dataset has been checked and is suitable for use in FLINT. Please adhere to individual dataset licence conditions and citations. Processed data can be accessed here: [Land Sector Datasets](https://datasets.mojaglobal.workers.dev/) https://datasets.mojaglobal.workers.dev/
\n", - "\n", - "### Format\n", - "Extent: Global coverage (excludes Greenland and oceans)
\n", - "Resolution: Raster 10 degree tiles 0.00025-degree ~30m resolution
\n", - "File type: geotiff (.tif)
\n", - "Cordinate system: EPSG:4326 (WGS84)
\n", - "Temporal Resolution: 2000-2019
\n", - "Size: 30+GB\n", - "\n", - "### Original source\n", - "http://earthenginepartners.appspot.com/science-2013-global-forest/download_v1.7.html\n", - "\n", - "### Licence\n", - "This work is licensed under a Creative Commons Attribution 4.0 International License. You are free to copy and redistribute the material in any medium or format, and to transform and build upon the material for any purpose, even commercially. You must give appropriate credit, provide a link to the license, and indicate if changes were made.\n", - "\n", - "### Citation\n", - "Use the following credit when these data are displayed: \"Source: Hansen/UMD/Google/USGS/NASA\"
\n", - "\n", - "Use the following credit when these data are cited: Hansen, M. C., P. V. Potapov, R. Moore, M. Hancher, S. A. Turubanova, A. Tyukavina, D. Thau, S. V. Stehman, S. J. Goetz, T. R. Loveland, A. Kommareddy, A. Egorov, L. Chini, C. O. Justice, and J. R. G. Townshend. 2013. “High-Resolution Global Maps of 21st-Century Forest Cover Change.” Science 342 (15 November): 850–53. Data available on-line from: http://earthenginepartners.appspot.com/science-2013-global-forest.\n", - "\n", - "### Original format\n", - "Global coverage, tiled 10x10 degrees tiles, geotiff (.tif)
\n", - "cell size 0.00025 degree, resolution 40000 x 40000 Pixels
\n", - "Cordinate system EPSG: 4326 (WGS84)\n", - "\n", - "### Metadata\n", - "Full metadata can be viewed here http://earthenginepartners.appspot.com/science-2013-global-forest/download_v1.7.html\n", - "\n", - "Dataset names: treecover2000, loss, gain, datamask, first and last\n", - "\n", - "Tree canopy cover for year 2000 (treecover2000) - Tree cover in the year 2000, defined as canopy closure for all vegetation taller than 5m in height. Encoded as a percentage per output grid cell, in the range 0–100.
\n", - "\n", - "Global forest cover gain 2000–2012 (gain) Forest gain during the period 2000–2012, defined as the inverse of loss, or a non-forest to forest change entirely within the study period. Encoded as either 1 (gain) or 0 (no gain).
\n", - "\n", - "Year of gross forest cover loss event (lossyear) Forest loss during the period 2000–2019, defined as a stand-replacement disturbance, or a change from a forest to non-forest state. Encoded as either 0 (no loss) or else a value in the range 1–17, representing loss detected primarily in the year 2001–2019, respectively.
\n", - "\n", - "Data mask (datamask) Three values representing areas of no data (0), mapped land surface (1), and permanent water bodies (2).
\n", - "\n", - "Circa year 2000 Landsat 7 cloud-free image composite (first) Reference multispectral imagery from the first available year, typically 2000. If no cloud-free observations were available for year 2000, imagery was taken from the closest year with cloud-free data, within the range 1999–2012.
\n", - "\n", - "Circa year 2019 Landsat cloud-free image composite (last) Reference multispectral imagery from the last available year, typically 2019. If no cloud-free observations were available for year 2019, imagery was taken from the closest year with cloud-free data, within the range 2010–2015.\n", - "\n", - "### Notes\n", - "Tiles may contain no data if over the ocean, please see see datamask to mask waterbodies etc. Updates are intended so please check back at the original source(http://earthenginepartners.appspot.com/science-2013-global-forest/download_v1.7.html).\n", - "\n", - "### Processing\n", - "Dataset is FLINT ready in native form. Code is provided below to bulk download tiles for global coverage, or you can head to the original source to download select tiles." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Bulk download tiles\n", - "#Text URLS treecover2000.txt, loss.txt, gain.txt, datamask.txt, first.txt, last.txt\n", - "#Python 3\n", - "import urllib.request\n", - "import os.path\n", - "\n", - "#change this path to txt file URLS treecover2000.txt, loss.txt, gain.txt, datamask.txt, first.txt, last.txt from http://earthenginepartners.appspot.com/science-2013-global-forest/download_v1.7.html\n", - "enterpath = r\"C:/data/gain.txt\"\n", - "\n", - "#text file containing urls for download downloaded from https://earthenginepartners.appspot.com/science-2013-global-forest/download_v1.7.html (see download instructions)\n", - "links = open(enterpath, 'r')\n", - "\n", - "#directory to save tifs into (insert your own folder path)\n", - "folder = \"C:/data/downloads/hansen/gain\"\n", - "\n", - "#open and download links in bulk\n", - "for link in links:\n", - " link = link.strip()\n", - " name = link.rsplit('/', 1)[-1]\n", - " filename = os.path.join(folder, name)\n", - " if not os.path.isfile(filename):\n", - " print('Downloading: '+ filename)\n", - " try:\n", - " urllib.request.urlretrieve(link, filename)\n", - " \n", - " except Exception as inst:\n", - " print(inst)\n", - " print(' Encountered unknown error. Continuing.')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Use this code to resample a different raster resolution at -tr using GDAL (or QGIS)\n", - "gdalwarp -t_srs EPSG:4326 -dstnodata 255.0 -tr 0.05 0.05 -r near -te -180.0 -90.0 180.0 90.0 -te_srs EPSG:4326 -of GTiff \"C:/Data/Hansen/Tile.tif\" \"C:/Data/Hansen/resampledTile.tif\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Use this code to merge multiple rasters (noting may result in very large file sizes) input tifs or specify text file of tif list file locations\n", - "python3 -m gdal_merge -ot Int32 -of GTiff -co COMPRESS=DEFLATE -co PREDICTOR=2 -co ZLEVEL=9 -o C:/Data/data_process/tiff/merge.tif input1.tif input2.tif...\"" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif b/Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif new file mode 100644 index 000000000..b757b2721 Binary files /dev/null and b/Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif differ diff --git a/Data/Soil/Transformed/transformed_file.tif b/Data/Soil/Transformed/transformed_file.tif new file mode 100644 index 000000000..69454f804 Binary files /dev/null and b/Data/Soil/Transformed/transformed_file.tif differ diff --git a/Data/data.py b/Data/data.py new file mode 100644 index 000000000..f974548a0 --- /dev/null +++ b/Data/data.py @@ -0,0 +1,19 @@ +import json + +# Read in the JSON data +with open('KEN_AL2_Kenya_GEZ.json', 'r') as f: + data = json.load(f) + +# Process the data +processed_data = [] +for item in data: + # Perform some transformation on the data + processed_item = { + 'name': item['name'], + 'age': item['gez_code'] * 2 + } + processed_data.append(processed_item) + +# Write the processed data to a new JSON file +with open('processed_data.json', 'w') as f: + json.dump(processed_data, f) diff --git a/Data/forest-management-data-2015/.gitignore b/Data/forest-management-data-2015/.gitignore new file mode 100644 index 000000000..332972ab7 --- /dev/null +++ b/Data/forest-management-data-2015/.gitignore @@ -0,0 +1,5 @@ +/reference_data_set_updated.csv +/metafile.txt +*.csv + + diff --git a/Data/forest-management-data-2015/metafile.txt.dvc b/Data/forest-management-data-2015/metafile.txt.dvc new file mode 100644 index 000000000..626244b5b --- /dev/null +++ b/Data/forest-management-data-2015/metafile.txt.dvc @@ -0,0 +1,4 @@ +outs: +- md5: e698f3972b8775c503e9a9fae56ad50b + size: 903 + path: metafile.txt diff --git a/Data/forest-management-data-2015/reference_data_set_updated.csv.dvc b/Data/forest-management-data-2015/reference_data_set_updated.csv.dvc new file mode 100644 index 000000000..d1c006e3d --- /dev/null +++ b/Data/forest-management-data-2015/reference_data_set_updated.csv.dvc @@ -0,0 +1,4 @@ +outs: +- md5: b6cc94f41fff3d6cd09f9a386090bcc6 + size: 10776130 + path: reference_data_set_updated.csv diff --git a/Data/loaded_data.pkl b/Data/loaded_data.pkl new file mode 100644 index 000000000..28110248f Binary files /dev/null and b/Data/loaded_data.pkl differ diff --git a/conda b/conda new file mode 100644 index 000000000..e69de29bb diff --git a/dvc.lock b/dvc.lock new file mode 100644 index 000000000..1328f5ec1 --- /dev/null +++ b/dvc.lock @@ -0,0 +1,17 @@ +schema: '2.0' +stages: + extract: + cmd: echo "No extraction needed for tif file in Data/Soil" + transform: + cmd: + - gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -to SRC_METHOD=NO_GEOTRANSFORM -tr + 0.5 0.5 -r near -te -180.0 -90.0 180.0 90.0 -te_srs EPSG:4326 -of GTiff Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif + Data/Soil/transformed_file.tif + deps: + - path: Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif + md5: cf9794c1d61bb6eeacaa10dfa5954931 + size: 1038378 + outs: + - path: Data/Soil/transformed_file.tif + md5: 2ec4f2db772d40135fb4abdc92e534dc + size: 1038378 diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 000000000..9982ded06 --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,12 @@ +stages: + load_data: + cmd: python load.py + outs: + - Data/loaded_data.pkl + + process_data: + cmd: python process.py + deps: + - Data/loaded_data.pkl + outs: + - Data/processed_data.csv diff --git a/load.py b/load.py new file mode 100644 index 000000000..5274db763 --- /dev/null +++ b/load.py @@ -0,0 +1,16 @@ +import pandas as pd + +data_path = 'Data/forest-management-data-2015/reference_data_set_updated.csv' +metafile_path = 'Data/forest-management-data-2015/metafile.txt' + +# Load the dataset +df = pd.read_csv(data_path) + +# Load the metafile +with open(metafile_path,'r',encoding='utf-8') as f: + metafile_contents = f.read() + +# Process the data and save the result +df=df.dropna() +# Save the whole DataFrame to pickle +df.to_pickle('Data/loaded_data.pkl') diff --git a/process.py b/process.py new file mode 100644 index 000000000..d3872b327 --- /dev/null +++ b/process.py @@ -0,0 +1,18 @@ +import pandas as pd +import subprocess + +data_path = 'Data/loaded_data.pkl' +output_path = 'Data/processed_data.csv' + +# Load the data +df = pd.read_pickle(data_path) + +# Process the data +df = df.dropna() +df = df.reset_index(drop=True) + +# Save the processed data +df.to_csv(output_path, index=False) + +# Add the output file to DVC +subprocess.run(['dvc', 'add', output_path])