diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml new file mode 100644 index 000000000000..f5ff2f63e5c8 --- /dev/null +++ b/.github/workflows/semgrep.yml @@ -0,0 +1,45 @@ +# Finds code problems by structural pattern matching. +# +# New rules can be added to test_root/semgrep/ and they should be picked up +# automatically. See https://semgrep.dev/docs/ for documentation. + +name: Semgrep code quality + +on: + pull_request: + push: + branches: + - master + +jobs: + run_semgrep: + name: Semgrep analysis + runs-on: "${{ matrix.os }}" + strategy: + matrix: + os: [ "ubuntu-20.04" ] + python-version: [ "3.8" ] + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 1 + + - uses: actions/setup-python@v4 + with: + python-version: "${{ matrix.python-version }}" + + - name: Install semgrep + run: | + make pre-requirements + pip-sync requirements/edx/semgrep.txt + + - name: Run semgrep + env: + # Peg this to some reasonable value so that semgrep's rewrapping + # of messages doesn't break up lines in an unpredictable manner: + # https://github.com/returntocorp/semgrep/issues/8608 + COLUMNS: 80 + run: | + semgrep scan --config test_root/semgrep/ --error --quiet \ + -- lms cms common openedx diff --git a/Makefile b/Makefile index 32314eef18f0..8664111ebefb 100644 --- a/Makefile +++ b/Makefile @@ -102,6 +102,7 @@ REQ_FILES = \ requirements/edx/testing \ requirements/edx/development \ requirements/edx/assets \ + requirements/edx/semgrep \ scripts/xblock/requirements define COMMON_CONSTRAINTS_TEMP_COMMENT diff --git a/openedx/core/djangoapps/content_staging/tasks.py b/openedx/core/djangoapps/content_staging/tasks.py index 9df79a9a7e87..951a6f0a645d 100644 --- a/openedx/core/djangoapps/content_staging/tasks.py +++ b/openedx/core/djangoapps/content_staging/tasks.py @@ -6,6 +6,7 @@ from celery import shared_task from celery_utils.logged_task import LoggedTask +from edx_django_utils.monitoring import set_code_owner_attribute from .data import CLIPBOARD_PURPOSE from .models import StagedContent @@ -14,6 +15,7 @@ @shared_task(base=LoggedTask) +@set_code_owner_attribute def delete_expired_clipboards(staged_content_ids: list[int]): """ A Celery task to delete StagedContent clipboard entries that are no longer diff --git a/openedx/features/content_tagging/tasks.py b/openedx/features/content_tagging/tasks.py index a328634f0735..3ffa6c29f107 100644 --- a/openedx/features/content_tagging/tasks.py +++ b/openedx/features/content_tagging/tasks.py @@ -9,6 +9,7 @@ from celery_utils.logged_task import LoggedTask from django.conf import settings from django.contrib.auth import get_user_model +from edx_django_utils.monitoring import set_code_owner_attribute from opaque_keys.edx.keys import CourseKey, UsageKey from openedx_tagging.core.tagging.models import Taxonomy @@ -73,6 +74,7 @@ def _delete_tags(content_object: CourseKey | UsageKey) -> None: @shared_task(base=LoggedTask) +@set_code_owner_attribute def update_course_tags(course_key_str: str) -> bool: """ Updates the automatically-managed tags for a course @@ -98,6 +100,7 @@ def update_course_tags(course_key_str: str) -> bool: @shared_task(base=LoggedTask) +@set_code_owner_attribute def delete_course_tags(course_key_str: str) -> bool: """ Delete the tags for a Course (when the course itself has been deleted). @@ -119,6 +122,7 @@ def delete_course_tags(course_key_str: str) -> bool: @shared_task(base=LoggedTask) +@set_code_owner_attribute def update_xblock_tags(usage_key_str: str) -> bool: """ Updates the automatically-managed tags for a XBlock @@ -149,6 +153,7 @@ def update_xblock_tags(usage_key_str: str) -> bool: @shared_task(base=LoggedTask) +@set_code_owner_attribute def delete_xblock_tags(usage_key_str: str) -> bool: """ Delete the tags for a XBlock (when the XBlock itself is deleted). diff --git a/requirements/edx/semgrep.in b/requirements/edx/semgrep.in new file mode 100644 index 000000000000..0fc07d64532b --- /dev/null +++ b/requirements/edx/semgrep.in @@ -0,0 +1,13 @@ +# Requirements to run Semgrep code quality checks +# +# DON'T JUST ADD NEW DEPENDENCIES!!! +# +# If you open a pull request that adds a new dependency, you should: +# * verify that the dependency has a license compatible with AGPLv3 +# * confirm that it has no system requirements beyond what we already install +# * run "make upgrade" to update the detailed requirements files +# + +-c ../constraints.txt + +semgrep # Semgrep performs structural code searches diff --git a/requirements/edx/semgrep.txt b/requirements/edx/semgrep.txt new file mode 100644 index 000000000000..f11f4660f7b9 --- /dev/null +++ b/requirements/edx/semgrep.txt @@ -0,0 +1,99 @@ +# +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: +# +# make upgrade +# +attrs==23.1.0 + # via + # glom + # jsonschema + # referencing + # semgrep +boltons==21.0.0 + # via + # face + # glom + # semgrep +bracex==2.3.post1 + # via wcmatch +certifi==2023.7.22 + # via requests +charset-normalizer==2.0.12 + # via + # -c requirements/edx/../constraints.txt + # requests +click==8.1.6 + # via + # -c requirements/edx/../constraints.txt + # click-option-group + # semgrep +click-option-group==0.5.6 + # via semgrep +colorama==0.4.6 + # via semgrep +defusedxml==0.7.1 + # via semgrep +face==22.0.0 + # via glom +glom==22.1.0 + # via semgrep +idna==3.4 + # via requests +importlib-resources==6.0.1 + # via + # jsonschema + # jsonschema-specifications +jsonschema==4.19.0 + # via semgrep +jsonschema-specifications==2023.7.1 + # via jsonschema +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py +packaging==23.1 + # via semgrep +peewee==3.16.3 + # via semgrep +pkgutil-resolve-name==1.3.10 + # via jsonschema +pygments==2.16.1 + # via rich +python-lsp-jsonrpc==1.0.0 + # via semgrep +referencing==0.30.2 + # via + # jsonschema + # jsonschema-specifications +requests==2.31.0 + # via semgrep +rich==13.5.2 + # via semgrep +rpds-py==0.10.0 + # via + # jsonschema + # referencing +ruamel-yaml==0.17.32 + # via semgrep +ruamel-yaml-clib==0.2.7 + # via ruamel-yaml +semgrep==1.38.0 + # via -r requirements/edx/semgrep.in +tomli==2.0.1 + # via semgrep +typing-extensions==4.7.1 + # via + # rich + # semgrep +ujson==5.8.0 + # via python-lsp-jsonrpc +urllib3==1.26.16 + # via + # -c requirements/edx/../constraints.txt + # requests + # semgrep +wcmatch==8.5 + # via semgrep +zipp==3.16.2 + # via importlib-resources diff --git a/test_root/semgrep/README.rst b/test_root/semgrep/README.rst new file mode 100644 index 000000000000..92fa16f1bf05 --- /dev/null +++ b/test_root/semgrep/README.rst @@ -0,0 +1,21 @@ +Semgrep linters +############### + +Linting rules for use with `semgrep`_ during CI checks on PRs. + +Status +****** + +This is an experimental approach to developing new linting rules. Semgrep provides by-example structural matching that can be easier to write and maintain than procedural code inspecting ASTs. If the approach works out, we can expand our use of Semgrep; if it becomes a problem for some reason, we can switch to adding pylint rules in edx-lint. + +Ignoring failures +***************** + +If you need to tell semgrep to ignore a block of code, put a ``# nosemgrep`` comment on or before the first matched line. + +Documentation for writing new rules: + +- https://semgrep.dev/docs/writing-rules/rule-syntax/ +- https://semgrep.dev/docs/writing-rules/pattern-syntax/ + +.. _semgrep: https://github.com/returntocorp/semgrep diff --git a/test_root/semgrep/celery-code-owner.yml b/test_root/semgrep/celery-code-owner.yml new file mode 100644 index 000000000000..c8cf417e43a8 --- /dev/null +++ b/test_root/semgrep/celery-code-owner.yml @@ -0,0 +1,104 @@ +rules: + - id: celery-missing-code-owner-function + # We can't link directly to the howto doc in question because + # semgrep has a bug around long lines: + # https://github.com/returntocorp/semgrep/issues/8608 + # + # Here's the intended URL, for reference: + # https://edx.readthedocs.io/projects/edx-django-utils/en/latest/monitoring/how_tos/add_code_owner_custom_attribute_to_an_ida.html#handling-celery-tasks + message: | + Celery tasks need to be decorated with `@set_code_owner_attribute` + (from the `edx_django_utils.monitoring` module) in order for us + to correctly track code-owners for errors and in other monitoring. + + For more information, see the Celery section of "Add Code_Owner + Custom Attributes to an IDA" in the Monitoring How-Tos of + . + languages: + - python + patterns: + # Find functions with decorators containing the substring "task" + # in their name. This might end up with false positives, but + # there are a lot of variations on how we decorate Celery tasks. + + # This pattern should match all decorators, whether or not + # they're called as a function (both `@foo(...)` and `@foo`) + # and whether or not there are other decorators above or below. + - pattern-either: + - pattern: | + @$TASK + def $F(...): + ... + - pattern: | + @$TASK(...) + def $F(...): + ... + + # Restrict the decorators of interest to just ones with "task" + # in the name. + - metavariable-pattern: + metavariable: $TASK + patterns: + - pattern-regex: >- + [^\(]*task(\(|$) + + # Filter out all of the properly annotated functions, leaving + # just the ones of interest. + - pattern-not: | + @set_code_owner_attribute + def $F(...): + ... + # This is an alternative approach that we have needed in rare cases. + - pattern-not: | + def $F(...): + ... + set_code_owner_attribute_from_module(...) + + severity: WARNING + + # This is like celery-missing-code-owner-function but for the `run` + # method of Task classes. + - id: celery-missing-code-owner-class + message: | + Celery task classes need to decorate their `run` method with + `@set_code_owner_attribute` (imported from `edx_django_utils.monitoring`) + in order for us to correctly track code-owners for errors and in other + monitoring. Alternatively, the `run` method can call + `set_code_owner_attribute_from_module`. + + For more information, see the Celery section of "Add Code_Owner + Custom Attributes to an IDA" in the Monitoring How-Tos of + . + languages: + - python + patterns: + - pattern: | + class $C(..., $SUPER, ...): + def run(...): + ... + - metavariable-pattern: + metavariable: $SUPER + patterns: + - pattern-regex: "Task$" + + - pattern-not: | + class $C(..., $SUPER, ...): + + @set_code_owner_attribute + def run(...): + ... + + - pattern-not: | + class $C(..., $SUPER, ...): + + @set_code_owner_attribute + def run(...): + ... + - pattern-not: | + class $C(..., $SUPER, ...): + + def run(...): + ... + set_code_owner_attribute_from_module(...) + + severity: WARNING