From 9b29cf74eeb1d2ac77281776f3e7df4fa63cbb3f Mon Sep 17 00:00:00 2001 From: Bobby Stearman <80459294+bobby-didcoding@users.noreply.github.com> Date: Mon, 24 Feb 2025 15:03:18 +0000 Subject: [PATCH] Create sync_s3_images.py --- core/management/commands/sync_s3_images.py | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 core/management/commands/sync_s3_images.py diff --git a/core/management/commands/sync_s3_images.py b/core/management/commands/sync_s3_images.py new file mode 100644 index 000000000..dc79db782 --- /dev/null +++ b/core/management/commands/sync_s3_images.py @@ -0,0 +1,32 @@ +from django.core.management import BaseCommand + +import json +from django.conf import settings +import requests +from bs4 import BeautifulSoup + + +class Command(BaseCommand): + help = 'Used to scrape a give url for all images and dump to json' + + def add_arguments(self, parser): + parser.add_argument('url_to_scrape', type=str, help='Please add a valid url') + + def handle(self, *args, **options): + + def make_json(json_file_path): + data = {} + + with open(json_file_path, 'w', encoding='utf-8') as jsonf: + url = options['url_to_scrape'] + html_page = requests.get(url) + soup = BeautifulSoup(html_page.content, 'html.parser') + images = soup.find_all('img') + data['images'] = [img.get('src') for img in images] + jsonf.write(json.dumps(data, indent=4)) + + json_file_path = settings.ROOT_DIR / 's3_images.json' + + make_json(json_file_path) + + self.stdout.write(self.style.SUCCESS('All done, bye!'))