Merge pull request #57 from phildini/bookwyrm-social-main

Catch Us Up to Upstream
phildini · May 13, 2024 · ada1994 · ada1994
2 parents b549408 + 0371cef
commit ada1994
Show file tree

Hide file tree

Showing 289 changed files with 11,254 additions and 4,270 deletions.
diff --git a/.env.example b/.env.example
@@ -16,6 +16,11 @@ DEFAULT_LANGUAGE="English"
 ## Leave unset to allow all hosts
 # ALLOWED_HOSTS="localhost,127.0.0.1,[::1]"
 
+# Specify when the site is served from a port that is not the default
+# for the protocol (80 for HTTP or 443 for HTTPS).
+# Probably only necessary in development.
+# PORT=1333
+
 MEDIA_ROOT=images/
 
 # Database configuration
@@ -71,14 +76,20 @@ ENABLE_THUMBNAIL_GENERATION=true
 USE_S3=false
 AWS_ACCESS_KEY_ID=
 AWS_SECRET_ACCESS_KEY=
+# seconds for signed S3 urls to expire
+# this is currently only used for user export files
+S3_SIGNED_URL_EXPIRY=900
 
 # Commented are example values if you use a non-AWS, S3-compatible service
 # AWS S3 should work with only AWS_STORAGE_BUCKET_NAME and AWS_S3_REGION_NAME
 # non-AWS S3-compatible services will need AWS_STORAGE_BUCKET_NAME,
-# along with both AWS_S3_CUSTOM_DOMAIN and AWS_S3_ENDPOINT_URL
+# along with both AWS_S3_CUSTOM_DOMAIN and AWS_S3_ENDPOINT_URL.
+# AWS_S3_URL_PROTOCOL must end in ":" and defaults to the same protocol as
+# the BookWyrm instance ("http:" or "https:", based on USE_SSL).
 
 # AWS_STORAGE_BUCKET_NAME=        # "example-bucket-name"
 # AWS_S3_CUSTOM_DOMAIN=None       # "example-bucket-name.s3.fr-par.scw.cloud"
+# AWS_S3_URL_PROTOCOL=None        # "http:"
 # AWS_S3_REGION_NAME=None         # "fr-par"
 # AWS_S3_ENDPOINT_URL=None        # "https://s3.fr-par.scw.cloud"
 
@@ -133,9 +144,9 @@ HTTP_X_FORWARDED_PROTO=false
 TWO_FACTOR_LOGIN_VALIDITY_WINDOW=2
 TWO_FACTOR_LOGIN_MAX_SECONDS=60
 
-# Additional hosts to allow in the Content-Security-Policy, "self" (should be DOMAIN)
-# and AWS_S3_CUSTOM_DOMAIN (if used) are added by default.
-# Value should be a comma-separated list of host names.
+# Additional hosts to allow in the Content-Security-Policy, "self" (should be
+# DOMAIN with optionally ":" + PORT) and AWS_S3_CUSTOM_DOMAIN (if used) are
+# added by default.  Value should be a comma-separated list of host names.
 CSP_ADDITIONAL_HOSTS=
 
 # Time before being logged out (in seconds)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
@@ -43,7 +43,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v2
+      uses: github/codeql-action/init@v3
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -54,7 +54,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@v2
+      uses: github/codeql-action/autobuild@v3
 
     # ℹ️ Command-line programs to run using the OS shell.
     # 📚 https://git.io/JvXDl
@@ -68,4 +68,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v2
+      uses: github/codeql-action/analyze@v3
diff --git a/.github/workflows/lint-frontend.yaml b/.github/workflows/lint-frontend.yaml
@@ -22,7 +22,8 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Install modules
-        run: npm install stylelint stylelint-config-recommended stylelint-config-standard stylelint-order eslint
+      #  run: npm install stylelint stylelint-config-recommended stylelint-config-standard stylelint-order eslint
+        run: npm install eslint@^8.9.0
 
       # See .stylelintignore for files that are not linted.
       # - name: Run stylelint

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
@@ -94,6 +94,6 @@ jobs:
     steps:
     - uses: actions/checkout@v4
     - uses: actions/setup-python@v5
-    - uses: psf/black@22.12.0
+    - uses: psf/black@stable
       with:
-        version: 22.12.0
+        version: "22.*"
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,8 @@
 # BookWyrm
 .env
 /images/
+/exports/
+/static/
 bookwyrm/static/css/bookwyrm.css
 bookwyrm/static/css/themes/
 !bookwyrm/static/css/themes/bookwyrm-*.scss
@@ -36,3 +38,6 @@ nginx/default.conf
 
 #macOS
 **/.DS_Store
+
+# Docker
+docker-compose.override.yml
diff --git a/README.md b/README.md
@@ -10,7 +10,6 @@ BookWyrm is a social network for tracking your reading, talking about books, wri
 ## Links
 
 [![Mastodon Follow](https://img.shields.io/mastodon/follow/000146121?domain=https%3A%2F%2Ftech.lgbt&style=social)](https://tech.lgbt/@bookwyrm)
-[![Twitter Follow](https://img.shields.io/twitter/follow/BookWyrmSocial?style=social)](https://twitter.com/BookWyrmSocial)
 
  - [Project homepage](https://joinbookwyrm.com/)
  - [Support](https://patreon.com/bookwyrm)

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.7.2
+0.7.3
diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py
@@ -139,14 +139,14 @@ def _get_audience(self, status):  # pylint: disable=no-self-use
                 | (
                     Q(following=status.user) & Q(following=status.reply_parent.user)
                 )  # if the user is following both authors
-            ).distinct()
+            )
 
         # only visible to the poster's followers and tagged users
         elif status.privacy == "followers":
             audience = audience.filter(
                 Q(following=status.user)  # if the user is following the author
             )
-        return audience.distinct()
+        return audience.distinct("id")
 
     @tracer.start_as_current_span("ActivityStream.get_audience")
     def get_audience(self, status):
@@ -156,7 +156,7 @@ def get_audience(self, status):
         status_author = models.User.objects.filter(
             is_active=True, local=True, id=status.user.id
         ).values_list("id", flat=True)
-        return list(set(list(audience) + list(status_author)))
+        return list(set(audience) | set(status_author))
 
     def get_stores_for_users(self, user_ids):
         """convert a list of user ids into redis store ids"""
@@ -183,15 +183,13 @@ class HomeStream(ActivityStream):
     def get_audience(self, status):
         trace.get_current_span().set_attribute("stream_id", self.key)
         audience = super()._get_audience(status)
-        if not audience:
-            return []
         # if the user is following the author
         audience = audience.filter(following=status.user).values_list("id", flat=True)
         # if the user is the post's author
         status_author = models.User.objects.filter(
             is_active=True, local=True, id=status.user.id
         ).values_list("id", flat=True)
-        return list(set(list(audience) + list(status_author)))
+        return list(set(audience) | set(status_author))
 
     def get_statuses_for_user(self, user):
         return models.Status.privacy_filter(
@@ -239,9 +237,7 @@ def _get_audience(self, status):
         )
 
         audience = super()._get_audience(status)
-        if not audience:
-            return models.User.objects.none()
-        return audience.filter(shelfbook__book__parent_work=work).distinct()
+        return audience.filter(shelfbook__book__parent_work=work)
 
     def get_audience(self, status):
         # only show public statuses on the books feed,

diff --git a/bookwyrm/apps.py b/bookwyrm/apps.py
@@ -1,4 +1,5 @@
 """Do further startup configuration and initialization"""
+
 import os
 import urllib
 import logging
@@ -14,16 +15,16 @@ def download_file(url, destination):
     """Downloads a file to the given path"""
     try:
         # Ensure our destination directory exists
-        os.makedirs(os.path.dirname(destination))
+        os.makedirs(os.path.dirname(destination), exist_ok=True)
         with urllib.request.urlopen(url) as stream:
             with open(destination, "b+w") as outfile:
                 outfile.write(stream.read())
-    except (urllib.error.HTTPError, urllib.error.URLError):
-        logger.info("Failed to download file %s", url)
-    except OSError:
-        logger.info("Couldn't open font file %s for writing", destination)
-    except:  # pylint: disable=bare-except
-        logger.info("Unknown error in file download")
+    except (urllib.error.HTTPError, urllib.error.URLError) as err:
+        logger.error("Failed to download file %s: %s", url, err)
+    except OSError as err:
+        logger.error("Couldn't open font file %s for writing: %s", destination, err)
+    except Exception as err:  # pylint:disable=broad-except
+        logger.error("Unknown error in file download: %s", err)
 
 
 class BookwyrmConfig(AppConfig):

diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py
@@ -118,20 +118,22 @@ def get_connectors() -> Iterator[abstract_connector.AbstractConnector]:
 def get_or_create_connector(remote_id: str) -> abstract_connector.AbstractConnector:
     """get the connector related to the object's server"""
     url = urlparse(remote_id)
-    identifier = url.netloc
+    identifier = url.hostname
     if not identifier:
-        raise ValueError("Invalid remote id")
+        raise ValueError(f"Invalid remote id: {remote_id}")
+
+    base_url = f"{url.scheme}://{url.netloc}"
 
     try:
         connector_info = models.Connector.objects.get(identifier=identifier)
     except models.Connector.DoesNotExist:
         connector_info = models.Connector.objects.create(
             identifier=identifier,
             connector_file="bookwyrm_connector",
-            base_url=f"https://{identifier}",
-            books_url=f"https://{identifier}/book",
-            covers_url=f"https://{identifier}/images/covers",
-            search_url=f"https://{identifier}/search?q=",
+            base_url=base_url,
+            books_url=f"{base_url}/book",
+            covers_url=f"{base_url}/images/covers",
+            search_url=f"{base_url}/search?q=",
             priority=2,
         )
 
@@ -188,8 +190,11 @@ def raise_not_valid_url(url: str) -> None:
     if not parsed.scheme in ["http", "https"]:
         raise ConnectorException("Invalid scheme: ", url)
 
+    if not parsed.hostname:
+        raise ConnectorException("Hostname missing: ", url)
+
     try:
-        ipaddress.ip_address(parsed.netloc)
+        ipaddress.ip_address(parsed.hostname)
         raise ConnectorException("Provided url is an IP address: ", url)
     except ValueError:
         # it's not an IP address, which is good

diff --git a/bookwyrm/emailing.py b/bookwyrm/emailing.py
@@ -4,7 +4,7 @@
 
 from bookwyrm import models, settings
 from bookwyrm.tasks import app, EMAIL
-from bookwyrm.settings import DOMAIN
+from bookwyrm.settings import DOMAIN, BASE_URL
 
 
 def email_data():
@@ -14,6 +14,7 @@ def email_data():
         "site_name": site.name,
         "logo": site.logo_small_url,
         "domain": DOMAIN,
+        "base_url": BASE_URL,
         "user": None,
     }
 

diff --git a/bookwyrm/forms/author.py b/bookwyrm/forms/author.py
@@ -15,6 +15,7 @@ class Meta:
             "aliases",
             "bio",
             "wikipedia_link",
+            "wikidata",
             "website",
             "born",
             "died",
@@ -32,6 +33,7 @@ class Meta:
             "wikipedia_link": forms.TextInput(
                 attrs={"aria-describedby": "desc_wikipedia_link"}
             ),
+            "wikidata": forms.TextInput(attrs={"aria-describedby": "desc_wikidata"}),
             "website": forms.TextInput(attrs={"aria-describedby": "desc_website"}),
             "born": forms.SelectDateWidget(attrs={"aria-describedby": "desc_born"}),
             "died": forms.SelectDateWidget(attrs={"aria-describedby": "desc_died"}),

diff --git a/bookwyrm/forms/links.py b/bookwyrm/forms/links.py
@@ -26,7 +26,7 @@ def clean(self):
         url = cleaned_data.get("url")
         filetype = cleaned_data.get("filetype")
         book = cleaned_data.get("book")
-        domain = urlparse(url).netloc
+        domain = urlparse(url).hostname
         if models.LinkDomain.objects.filter(domain=domain).exists():
             status = models.LinkDomain.objects.get(domain=domain).status
             if status == "blocked":

diff --git a/bookwyrm/management/commands/deduplicate_book_data.py b/bookwyrm/management/commands/deduplicate_book_data.py
@@ -1,13 +1,14 @@
 """ PROCEED WITH CAUTION: uses deduplication fields to permanently
 merge book data objects """
+
 from django.core.management.base import BaseCommand
 from django.db.models import Count
 from bookwyrm import models
-from bookwyrm.management.merge import merge_objects
 
 
-def dedupe_model(model):
+def dedupe_model(model, dry_run=False):
     """combine duplicate editions and update related models"""
+    print(f"deduplicating {model.__name__}:")
     fields = model._meta.get_fields()
     dedupe_fields = [
         f for f in fields if hasattr(f, "deduplication_field") and f.deduplication_field
@@ -16,30 +17,42 @@ def dedupe_model(model):
         dupes = (
             model.objects.values(field.name)
             .annotate(Count(field.name))
-            .filter(**{"%s__count__gt" % field.name: 1})
+            .filter(**{f"{field.name}__count__gt": 1})
+            .exclude(**{field.name: ""})
+            .exclude(**{f"{field.name}__isnull": True})
         )
 
         for dupe in dupes:
             value = dupe[field.name]
-            if not value or value == "":
-                continue
             print("----------")
-            print(dupe)
             objs = model.objects.filter(**{field.name: value}).order_by("id")
             canonical = objs.first()
-            print("keeping", canonical.remote_id)
+            action = "would merge" if dry_run else "merging"
+            print(
+                f"{action} into {model.__name__} {canonical.remote_id} based on {field.name} {value}:"
+            )
             for obj in objs[1:]:
-                print(obj.remote_id)
-                merge_objects(canonical, obj)
+                print(f"- {obj.remote_id}")
+                absorbed_fields = obj.merge_into(canonical, dry_run=dry_run)
+                print(f"  absorbed fields: {absorbed_fields}")
 
 
 class Command(BaseCommand):
     """deduplicate allllll the book data models"""
 
     help = "merges duplicate book data"
+
+    def add_arguments(self, parser):
+        """add the arguments for this command"""
+        parser.add_argument(
+            "--dry_run",
+            action="store_true",
+            help="don't actually merge, only print what would happen",
+        )
+
     # pylint: disable=no-self-use,unused-argument
     def handle(self, *args, **options):
         """run deduplications"""
-        dedupe_model(models.Edition)
-        dedupe_model(models.Work)
-        dedupe_model(models.Author)
+        dedupe_model(models.Edition, dry_run=options["dry_run"])
+        dedupe_model(models.Work, dry_run=options["dry_run"])
+        dedupe_model(models.Author, dry_run=options["dry_run"])