diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a39cd89d..675e7897 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -23,8 +23,8 @@ jobs:
           pip install .[quality]
       - name: Check quality
         run: |
-          ruff check tests src setup.py # linter
-          ruff format --check tests src setup.py # formatter
+          ruff check tests src # linter
+          ruff format --check tests src # formatter
 
   test:
     runs-on: ubuntu-latest
diff --git a/Makefile b/Makefile
index 8e325385..a967892a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 .PHONY: quality style test
 
-check_dirs := src tests examples setup.py
+check_dirs := src tests examples
 
 quality:
 	ruff check $(check_dirs)  # linter
diff --git a/pyproject.toml b/pyproject.toml
index 7f72ee25..93c5e398 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,16 +1,123 @@
+[project]
+name = "datatrove"
+version = "0.0.1.dev0"  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
+description = "HuggingFace library to process and filter large amounts of webdata"
+readme = "README.md"
+authors = [
+  {name = "HuggingFace Inc.", email = "guilherme@huggingface.co"}
+]
+license = {text = "Apache-2.0"}
+classifiers = [
+    "Intended Audience :: Developers",
+    "Intended Audience :: Education",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+keywords = ["data", "machine", "learning", "processing"]
+requires-python = ">=3.10.0"
+dependencies = [
+    "dill>=0.3.0",
+    "fsspec>=2023.6.0",
+    "huggingface-hub>=0.17.0",
+    "humanize",
+    "loguru>=0.7.0",
+    "multiprocess",
+    "numpy>=1.25.0",
+    "tqdm",
+]
+
+[project.optional-dependencies]
+cli = [
+  "rich",
+]
+io = [
+  "faust-cchardet",
+  "pyarrow",
+  "python-magic",
+  "warcio",
+  "datasets"
+]
+s3 = [
+  "s3fs>=2023.12.2",
+]
+processing = [
+    "fasttext-wheel",
+    "nltk",
+    "inscriptis",
+    "readability-lxml @ git+https://github.com/huggingface/python-readability.git@speedup",
+    "tldextract",
+    "trafilatura",
+    "tokenizers",
+]
+quality = [
+  "ruff>=0.1.5"
+]
+testing = [
+  "datatrove[cli]",
+  "datatrove[io]",
+  "datatrove[processing]",
+  "datatrove[s3]",
+  "pytest",
+  "pytest-timeout",
+  "pytest-xdist",
+  "moto[s3,server]",
+]
+all = [
+  "datatrove[quality]",
+  "datatrove[testing]",
+]
+dev = [
+  "datatrove[all]"
+]
+
+[project.urls]
+Repository = "https://github.com/huggingface/datatrove"
+
+[project.scripts]
+check_dataset = "datatrove.tools.check_dataset:main"
+merge_stats = "datatrove.tools.merge_stats:main"
+launch_pickled_pipeline = "datatrove.tools.launch_pickled_pipeline:main"
+failed_logs = "datatrove.tools.failed_logs:main"
+inspect_data = "datatrove.tools.inspect_data:main"
+
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[tool.setuptools.package-data]
+datatrove = ["assets/*"]
+
 [tool.ruff]
-# Ignored rules:
-#   "E501" -> line length violation
-#   "C901" -> `function_name` is too complex
-ignore = ["C901", "E501"]
-select = ["C", "E", "F", "I", "W"]
+ignore = [
+  "C901", # `function_name` is too complex
+  "E501", # line length violation
+]
+select = [
+  "C",
+  "E",
+  "F",
+  "I",
+  "W"
+]
 line-length = 119
 
 [tool.ruff.per-file-ignores]
-# Ignored rules:
-#   "F401" -> module imported but unused
-"__init__.py" = ["F401"]
+"__init__.py" = [
+  "F401" # module imported but unused
+]
 
 [tool.ruff.isort]
 lines-after-imports = 2
-known-first-party = ["datatrove"]
+known-first-party = [
+  "datatrove"
+]
+
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 60b0024c..00000000
--- a/setup.py
+++ /dev/null
@@ -1,97 +0,0 @@
-from setuptools import find_packages, setup
-
-
-install_requires = [
-    "dill>=0.3.0",
-    "fsspec>=2023.6.0",
-    "huggingface-hub>=0.17.0",
-    "humanize",
-    "loguru>=0.7.0",
-    "multiprocess",
-    "numpy>=1.25.0",
-    "tqdm",
-]
-
-extras = {}
-
-extras["cli"] = [
-    "rich",
-]
-
-extras["io"] = ["faust-cchardet", "pyarrow", "python-magic", "warcio", "datasets"]
-
-extras["s3"] = [
-    "s3fs>=2023.12.2",
-]
-
-extras["processing"] = [
-    "fasttext-wheel",
-    "nltk",
-    "inscriptis",
-    "readability-lxml @ git+https://github.com/huggingface/python-readability.git@speedup",
-    "tldextract",
-    "trafilatura",
-    "tokenizers",
-]
-
-extras["quality"] = [
-    "ruff>=0.1.5",
-]
-
-extras["testing"] = (
-    extras["cli"]
-    + extras["io"]
-    + extras["processing"]
-    + extras["s3"]
-    + [
-        "pytest",
-        "pytest-timeout",
-        "pytest-xdist",
-        "moto[s3,server]",
-    ]
-)
-
-extras["all"] = extras["quality"] + extras["testing"]
-
-extras["dev"] = extras["all"]
-
-setup(
-    name="datatrove",
-    version="0.0.1.dev0",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
-    description="HuggingFace library to process and filter large amounts of webdata",
-    long_description=open("README.md", encoding="utf-8").read(),
-    long_description_content_type="text/markdown",
-    author="HuggingFace Inc.",
-    author_email="guilherme@huggingface.co",
-    url="https://github.com/huggingface/datatrove",
-    license="Apache 2.0",
-    packages=find_packages("src"),
-    package_dir={"": "src"},
-    package_data={"": ["assets/*"]},
-    include_package_data=True,
-    python_requires=">=3.10.0",
-    install_requires=install_requires,
-    extras_require=extras,
-    classifiers=[
-        "Intended Audience :: Developers",
-        "Intended Audience :: Education",
-        "Intended Audience :: Science/Research",
-        "License :: OSI Approved :: Apache Software License",
-        "Operating System :: OS Independent",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Programming Language :: Python :: 3.12",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-    ],
-    keywords="data machine learning processing",
-    entry_points={
-        "console_scripts": [
-            "check_dataset=datatrove.tools.check_dataset:main",
-            "merge_stats=datatrove.tools.merge_stats:main",
-            "launch_pickled_pipeline=datatrove.tools.launch_pickled_pipeline:main",
-            "failed_logs=datatrove.tools.failed_logs:main",
-            "inspect_data=datatrove.tools.inspect_data:main",
-        ]
-    },
-)