Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: Added pyupgrade to ruff #372

Merged
merged 8 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# GitHub action to run linting

name: run-linting

on:
push:
branches: [main]
pull_request:

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true

- name: Set up Python
run: uv python install 3.9

- name: Install dependencies
run: make install

- name: Lint
id: lint
run: |
make lint-check

45 changes: 0 additions & 45 deletions .pre-commit-config.yaml

This file was deleted.

1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
from __future__ import annotations

from textdescriptives.about import __version__

Expand Down
4 changes: 3 additions & 1 deletion docs/tutorials/filter_corpus_using_quality.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@
"from datasets import load_dataset\n",
"\n",
"# stream in the dataset\n",
"dataset = load_dataset(\"mc4\", \"en\", streaming=True, split=\"train\", trust_remote_code=True)\n",
"dataset = load_dataset(\n",
" \"mc4\", \"en\", streaming=True, split=\"train\", trust_remote_code=True\n",
")\n",
"\n",
"# download the first 1 000\n",
"dataset = dataset.take(1000)\n",
Expand Down
13 changes: 7 additions & 6 deletions docs/tutorials/sklearn_integration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@
],
"source": [
"from textdescriptives.utils import load_sms_data\n",
"\n",
"df = load_sms_data()\n",
"df.head()"
]
Expand All @@ -152,7 +153,7 @@
"# to textdescriptives.extract_metrics\n",
"descriptive_stats_extractor = TextDescriptivesFeaturizer(\n",
" lang=\"en\", metrics=[\"descriptive_stats\"]\n",
" )"
")"
]
},
{
Expand Down Expand Up @@ -184,7 +185,7 @@
"from sklearn.pipeline import Pipeline\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.model_selection import train_test_split \n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn import set_config\n",
"\n",
Expand All @@ -197,10 +198,9 @@
" (\n",
" \"featurizer\",\n",
" ColumnTransformer(\n",
" [(\"text_processing\", descriptive_stats_extractor, \"message\")]\n",
" ,\n",
" # removes the `text_processing__` prefix from feature names\n",
" verbose_feature_names_out=False, \n",
" [(\"text_processing\", descriptive_stats_extractor, \"message\")],\n",
" # removes the `text_processing__` prefix from feature names\n",
" verbose_feature_names_out=False,\n",
" ),\n",
" ),\n",
" (\"imputer\", SimpleImputer(strategy=\"median\")),\n",
Expand Down Expand Up @@ -366,6 +366,7 @@
],
"source": [
"import pandas as pd\n",
"\n",
"# extract feature importances\n",
"feature_importance_mapping = list(\n",
" zip(\n",
Expand Down
14 changes: 8 additions & 6 deletions makefile
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
install:
@echo "--- 🚀 Installing project ---"
uv sync --extra docs --extra tests --extra style --extra style
uv sync --extra docs --extra tests --extra style
uv pip install pip
uv pip install -r tests/requirements.txt

lint:
@echo "--- 🧹 Running linters ---"
ruff format . # running ruff formatting
ruff check **/*.py --fix # running ruff linting
uv run ruff format . # running ruff formatting
uv run ruff check **/*.py --fix # running ruff linting

lint-check:
@echo "--- 🧹 Check is project is linted ---"
ruff format . --check # running ruff formatting
ruff check **/*.py # running ruff linting
uv run ruff format . --check # running ruff formatting
uv run ruff check **/*.py # running ruff linting

test:
@echo "--- 🧪 Running tests ---"
make install
pytest tests/
pytest tests/ -n auto

build-docs:
@echo "--- 📚 Building docs ---"
Expand Down
17 changes: 15 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ classifiers = [
"Operating System :: Microsoft :: Windows",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]

keywords = [
Expand Down Expand Up @@ -49,8 +53,8 @@ repository = "https://github.com/HLasse/textdescriptives"
documentation = "https://hlasse.github.io/TextDescriptives/"

[project.optional-dependencies]
style = ["black==24.1.1", "pre-commit==3.6.0", "ruff==0.1.15", "mypy==1.8.0"]
tests = ["pytest>=7.1.3", "pytest-cov>=3.0.0"]
style = ["ruff==0.8.3"]
tests = ["pytest>=7.1.3", "pytest-cov>=3.0.0", "pytest-xdist"]
docs = [
"pydantic==2.1",
"sphinx>=5.3.0",
Expand Down Expand Up @@ -115,3 +119,12 @@ build_command = "python -m pip install build; python -m build"

[tool.ruff]
exclude = [".venv", ".env", ".git", "__pycache__"]

[tool.ruff.lint]
select = ["UP", "I"]

[tool.ruff.lint.isort]
required-imports = ["from __future__ import annotations"]

[tool.ruff.lint.pydocstyle]
convention = "google"
2 changes: 1 addition & 1 deletion src/textdescriptives/about.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" About textdescriptives, version number is specified in the setup.cfg
"""About textdescriptives, version number is specified in the setup.cfg
file."""

# if python >= 3.8, use importlib.metadata otherwise use pkg_resources
Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/dependency_distance.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculation of statistics related to dependency distance."""
"""Calculation of statistics related to dependency distance."""

from typing import Callable

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/descriptive_stats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculation of descriptive statistics."""
"""Calculation of descriptive statistics."""

from typing import Callable, Dict, Union

Expand Down
7 changes: 5 additions & 2 deletions src/textdescriptives/components/information_theory.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculate the entropy and perplexity of a corpus."""
"""Calculate the entropy and perplexity of a corpus."""

from typing import Callable, Dict, Union

Expand Down Expand Up @@ -64,7 +64,10 @@ def per_word_perplexity_getter(doc: Union[Doc, Span]) -> float:
else:
perplexity = perplexity_getter(doc)

return perplexity / len(doc)
len_doc = len(doc)
if len_doc:
return perplexity / len(doc)
return np.nan


def set_docspan_extension(
Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/pos_proportions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculation of statistics that require a pos-tagger in the pipeline."""
"""Calculation of statistics that require a pos-tagger in the pipeline."""

from typing import Callable, Counter, List, Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/quality.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Component for calculating quality metrics."""
"""Component for calculating quality metrics."""

from collections import Counter, defaultdict
from typing import Callable, Dict, List, Mapping, Optional, Tuple, Union
Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/quality_data_classes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Data classes used for the quality component."""
"""Data classes used for the quality component."""

from typing import Any, Dict, Optional, Tuple, Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/readability.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Calculation of various readability metrics."""
"""Calculation of various readability metrics."""

from typing import Callable, Dict

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/components/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Utility functions for calculating various text descriptives."""
"""Utility functions for calculating various text descriptives."""

from typing import Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/extractors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Extract metrics as Pandas DataFrame."""
"""Extract metrics as Pandas DataFrame."""

from typing import Any, Dict, Iterable, List, Optional, Union

Expand Down
2 changes: 1 addition & 1 deletion src/textdescriptives/load_components.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Adds all components to a spaCy pipeline."""
"""Adds all components to a spaCy pipeline."""

from spacy.language import Language
from spacy.tokens import Doc
Expand Down
4 changes: 3 additions & 1 deletion tests/books.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
""" These books and several tests are borrowed from
"""These books and several tests are borrowed from
https://github.com/mholtzscher/spacy_readability."""

from __future__ import annotations

oliver_twist = """Among other public buildings in a certain town, which for many reasons
it will be prudent to refrain from mentioning, and to which I will
assign no fictitious name, there is one anciently common to most towns,
Expand Down
11 changes: 7 additions & 4 deletions tests/test_coherence.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
from __future__ import annotations

import warnings

import numpy as np
import pytest
import spacy

import textdescriptives as td # noqa: F401
import warnings


@pytest.fixture(scope="function")
def nlp():
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("textdescriptives/coherence")
return nlp
nlp_en = spacy.load("en_core_web_sm")
nlp_en.add_pipe("textdescriptives/coherence")
return nlp_en


def test_coherence_integration(nlp):
Expand Down
4 changes: 3 additions & 1 deletion tests/test_dependency_distance.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations

import ftfy
import numpy as np
import pytest
import spacy
import textdescriptives as td # noqa: F401

import textdescriptives as td # noqa: F401

from .books import flatland, oliver_twist, secret_garden

Expand Down
6 changes: 4 additions & 2 deletions tests/test_descriptive_stats.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from __future__ import annotations

import warnings

import ftfy
import pytest
from spacy.lang.en import English
Expand All @@ -6,8 +10,6 @@

from .books import flatland, oliver_twist, secret_garden

import warnings


@pytest.fixture(scope="function")
def nlp():
Expand Down
2 changes: 2 additions & 0 deletions tests/test_extractors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import pytest
import spacy

Expand Down
2 changes: 2 additions & 0 deletions tests/test_information.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import numpy as np
import pytest
import spacy
Expand Down
3 changes: 3 additions & 0 deletions tests/test_load_components.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from __future__ import annotations

import pytest
import spacy

import textdescriptives as td # noqa: F401


Expand Down
2 changes: 2 additions & 0 deletions tests/test_pos_proportions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import pytest
import spacy
from spacy.tokens import Doc
Expand Down
Loading
Loading