Huge update to website & website generation

npanuhin · Dec 19, 2023 · 1644505 · 1644505
1 parent 19c0d37
commit 1644505
Show file tree

Hide file tree

Showing 16 changed files with 259 additions and 126 deletions.
diff --git a/.github/workflows/daily_update.yml b/.github/workflows/daily_update.yml
@@ -15,10 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       # - name: Clone repository
-      #   uses: actions/checkout@v3
-      #   with:
-      #     token: ${{ secrets.PAT }}
-      #     ssh-key: ${{ secrets.SSH_KEY }}
+      #   uses: actions/checkout@v4
 
       - name: Fetch repository info
         id: repository

diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -16,7 +16,7 @@ jobs:
 
     steps:
       - name: Clone repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set up Python
         uses: actions/setup-python@v4
@@ -26,8 +26,7 @@ jobs:
           cache-dependency-path: '.github/flake8.requirements.txt'
 
       - name: Install dependencies
-        run: |
-          pip install -U -r ".github/flake8.requirements.txt"
+        run: pip install -r ".github/flake8.requirements.txt"
 
       - name: Lint with flake8
         run: flake8 --show-source --statistics
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
@@ -3,11 +3,11 @@ name: Deploy static content to Pages
 
 on:
   # Runs on pushes targeting the default branch
-  # push:
-  #   branches: ["master"]
   push:
-    paths:
-      - src/website/**
+    branches: ["master"]
+  # push:
+  #   paths:
+  #     - src/website/**
   workflow_dispatch:
 
 # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
@@ -16,39 +16,42 @@ permissions:
   pages: write
   id-token: write
 
-# Allow one concurrent deployment
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
 concurrency:
   group: "pages"
   cancel-in-progress: false
 
 jobs:
+  # Single deploy job since we're just deploying
   deploy:
-    name: GitHub Pages deployment
     environment:
       name: github-pages
       url: ${{ steps.deployment.outputs.page_url }}
     runs-on: ubuntu-latest
     steps:
-      - name: Fetch repository info
-        id: repository
-        run: |
-          echo "::set-output name=name::$(echo '${{ github.repository }}' | awk -F '/' '{print $2}')"
-          echo "::set-output name=url::$(echo https://${{ secrets.PAT }}@github.com/${{ github.repository }}.git)"
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.11
+          # No pip cache
 
-      - name: Partially clone repository
-        run: |
-          git clone --depth 1 --filter=blob:none --sparse ${{ steps.repository.outputs.url }}
-          cd ${{ steps.repository.outputs.name }}
-          git sparse-checkout set --no-cone "src/website/"
+      - name: Generate website
+        working-directory: src/scripts
+        run: python assemble_website.py
 
-      - name: Setup GitHub Pages
-        uses: actions/configure-pages@v2
+      - name: Setup Pages
+        uses: actions/configure-pages@v4
 
       - name: Upload artifact
-        uses: actions/upload-pages-artifact@v1
+        uses: actions/upload-pages-artifact@v2
         with:
-          path: ${{ steps.repository.outputs.name }}/src/website
+          # Upload src/website folder
+          path: 'src/website'
 
       - name: Deploy to GitHub Pages
         id: deployment
-        uses: actions/deploy-pages@v1
+        uses: actions/deploy-pages@v3
diff --git a/README.md b/README.md
@@ -22,9 +22,12 @@ Stages (roughly in order of importance):
 - [ ] Fix metadata for all images (currently done: ?/?)
 - [x] Finally remove all images from this repo and reduce the size of repo (+ number of commits in repo)
 - [x] Remove `path` key
+- [x] Generate API only for website and not store it in Git repo (+ minified)
 - [ ] Write a comprehensive README
 - [ ] Enable other countries
-- [ ] Improve website + add protection for GCloud (because 5s per image ~= 500'000 images per month if sombody decides to leave the page open for so long xd)
+- [x] Rewrite website
+- [ ] Website: hold current image when hovering over title
+- [ ] Add protection for GCloud (because 5s per image ~= 500'000 images per month if sombody decides to leave the page open for so long xd)
 - [ ] Deal with integrity errors (see [TODO](#todo) below)
 - [ ] Update (and upload to storage) videos, if needed
 - [ ] Find a way to retrieve videos from Bing (identify that today's image is a video, etc.)
@@ -36,8 +39,8 @@ All information is stored in "API files"[^1]. They can be obtained by sending a
 
 ```ruby
 https://bing.npanuhin.me/{country}/{language}.json
-https://bing.npanuhin.me/{country}/{language}.min.json  # For minified version
 ```
+<!-- https://bing.npanuhin.me/{country}/{language}.url.json  # Only dates and urls (format description below) -->
 
 The following countries and languages are currently available: <a href="https://bing.npanuhin.me/US/en.json"><code>US/en</code></a>
 
@@ -75,10 +78,34 @@ One API file consists of an array of image data:
 
 - The `bing_url` field contains the original image URL from Bing (Microsoft) servers. Unfortunately, it is not possible to retrieve images from more than a couple of years ago from these URLs (they all point to the same dummy image)
 
+<!-- URL API files are minified and contain only `date` field as key and `url` field as value (to save space as much as possible):
+```jsonc
+{"2009-06-03":"https://{storage_url}/US/en/2009-06-03.jpg","...":"...",}
+``` -->
+
 > [!NOTE]
-> API files tend to be quite large (a couple of MB). Use a minimified version in production environments
+> API files tend to be quite large (a couple of MB)
 
 > [!TIP]
+> If you only need images, **you can skip loading the API files altogether**! Simply make a request to the storage URL using the format specified above (if 404 is returned, then sadly we don't have this image)
+>
+> If you still need image titles, descriptions, etc., but want to save bandwidth, you can get API files for specific years:
+> ```ruby
+> https://bing.npanuhin.me/{country}/{language}.{year}.json
+> ```
+> These files are minified and typically have a size of 100-500 KB
+
+
+<!-- >
+> **Pro tip**:  
+> If you only need images, **you can skip loading the API files altogether**! Simply make a request to the storage URL using the format specified above (if 404 is returned, then sadly we don't have this image) -->
+
+
+<!-- If you don't need image titles, descriptions, etc., you can use the URL API file, which is *only about 13% the size* of the full API file: -->
+<!-- > [!TIP]
+> If you only need images, **you can skip loading the API files altogether**! Simply make a request to the storage URL using the format specified above (if 404 is returned, then sadly we don't have this image) -->
+
+> [!Important]
 > Feel free to use the API files and images, but please **avoid sending frequent requests** (for images this would incur additional costs for me on Google Cloud Storage).
 >
 > <a name="sometext"></a>If you need to make frequent requests to the API files, I recommend downloading and caching them locally (they are updated only once a day). The same applies to the images (although this will be quite difficult to implement)

diff --git a/src/.gitignore b/src/.gitignore
@@ -1,2 +1,5 @@
 # Cache
 __pycache__/
+
+# Website generated files
+website/api
diff --git a/src/Region.py b/src/Region.py
@@ -5,15 +5,13 @@
 
 
 class Region:
-    def __init__(self, region):
+    def __init__(self, region: str):
         self.lang, self.country = map(str.lower, region.split('-'))
         self.path = mkpath(os.path.dirname(__file__), '../api', self.country.upper())
         self.api_path = mkpath(self.path, self.lang.lower() + '.json')
 
         os.makedirs(self.path, exist_ok=True)
 
-        self.gcloud_images_path = mkpath(self.country.upper(), self.lang.lower())
-
     @property
     def mkt(self):
         return f'{self.lang}-{self.country.upper()}'
@@ -25,9 +23,19 @@ def read_api(self) -> str:
         with open(self.api_path, 'r', encoding='utf-8') as file:
             return json.load(file)
 
-    def write_api(self, api):  # TODO
-        with open(self.api_path, 'w', encoding='utf-8') as file:
-            json.dump(api, file, ensure_ascii=False, indent='\t')
+    def write_api(self, api: list[dict], output_path=None, *args, **kwargs):
+        if output_path is None:
+            output_path = self.api_path
+
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+
+        kwargs = {
+            'ensure_ascii': False,
+            'indent': '\t',
+        } | kwargs
+
+        with open(output_path, 'w', encoding='utf-8') as file:
+            json.dump(api, file, *args, **kwargs)
 
     def __repr__(self):
         return f'Region({self.mkt})'

diff --git a/src/bing.py b/src/bing.py
@@ -160,7 +160,7 @@ def update(region: Region):
             file.write(requests.get(api_by_date[date]['bing_url']).content)
 
         api_by_date[date]['url'] = gcloud.upload_file(
-            image_path, posixpath(mkpath(region.gcloud_images_path, filename)), skip_exists=True
+            image_path, posixpath(mkpath(region.country.upper(), region.lang.lower(), filename)), skip_exists=True
         )
 
         os.remove(image_path)

diff --git a/src/scripts/add_storage_url_to_videos.py → src/old/scripts/add_storage_url_to_videos.py b/src/scripts/add_storage_url_to_videos.py → src/old/scripts/add_storage_url_to_videos.py
diff --git a/src/old/assemble_static_website.py → src/old/scripts/assemble_static_website.py b/src/old/assemble_static_website.py → src/old/scripts/assemble_static_website.py
diff --git a/src/scripts/upload_images.py → src/old/scripts/upload_images.py b/src/scripts/upload_images.py → src/old/scripts/upload_images.py
diff --git a/src/scripts/assemble_website.py b/src/scripts/assemble_website.py
@@ -0,0 +1,51 @@
+import sys
+import os
+
+sys.path.append('../')
+from Region import REGIONS  # noqa: E402
+from utils import mkpath  # noqa: E402
+
+
+WEBSITE_ROOT = '../website'
+
+
+def main():
+    for region in REGIONS:
+        api = region.read_api()
+
+        webiste_directory = mkpath(WEBSITE_ROOT, 'api', region.country.upper())
+
+        os.makedirs(webiste_directory, exist_ok=True)
+
+        for file in os.listdir(webiste_directory):
+            os.remove(mkpath(webiste_directory, file))
+
+        region.write_api(api, mkpath(webiste_directory, region.lang.lower() + '.json'))
+        # region.write_api(
+        #     api,
+        #     mkpath(webiste_directory, region.lang.lower() + '.min.json'),
+        #     indent=None, separators=(',', ':')
+        # )
+
+        # only_urls = {image['date']: image['url'] for image in api}
+
+        # region.write_api(
+        #     only_urls,
+        #     mkpath(webiste_directory, region.lang.lower() + '.url.json'),
+        #     indent=None, separators=(',', ':')
+        # )
+
+        # Split by year
+        min_year = int(min(image['date'] for image in api).split('-')[0])
+        max_year = int(max(image['date'] for image in api).split('-')[0])
+        for year in range(min_year, max_year + 1):
+            year_api = [image for image in api if image['date'].startswith(str(year))]
+            region.write_api(
+                year_api,
+                mkpath(webiste_directory, region.lang.lower() + f'.{year}.json'),
+                indent=None, separators=(',', ':')
+            )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/scripts/get_total_image_count.py b/src/scripts/get_total_image_count.py
@@ -0,0 +1,19 @@
+import sys
+
+sys.path.append('../')
+from Region import REGIONS  # noqa: E402
+
+
+def main():
+    total_image_count = 0
+
+    for region in REGIONS:
+        api = region.read_api()
+
+        total_image_count += len(api)
+
+    print(total_image_count)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/website/api/us.json b/src/website/api/us.json
diff --git a/src/website/index.html b/src/website/index.html
@@ -23,7 +23,7 @@ <h1>Bing Wallpaper Archive</h1>
 		</div>
 	</div>
 
-	<a id="description" class="firstrun" href="" target="_blank"></a>
+	<a id="title" href="" target="_blank"></a>
 </body>
 
 </html>
-Original file line number
+Diff line change
@@ Expand Up / @@ -23,7 +23,7 @@ <h1>Bing Wallpaper Archive</h1> @@
     		</div>
     	</div>
-    	<a id="description" class="firstrun" href="" target="_blank"></a>
+    	<a id="title" href="" target="_blank"></a>
     </body>
     </html>