Skip to content

Commit

Permalink
ci (#47)
Browse files Browse the repository at this point in the history
* crates tests

* +ci

* add paths
  • Loading branch information
sanchitram1 authored Jan 3, 2025
1 parent 7dffc61 commit 5d1d2c6
Show file tree
Hide file tree
Showing 8 changed files with 947 additions and 3 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: CI

on:
push:
branches: ["main"]
paths:
- "**/*.py"
- "tests/**"
- "core/**"
- "package_managers/**"
pull_request:
branches: ["main"]
paths:
- "**/*.py"
- "tests/**"
- "core/**"
- "package_managers/**"

permissions:
contents: read

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r tests/requirements.txt
- name: Run tests
run: |
pytest tests/unit/test_crates_transformer.py -v -m transformer --cov=core --cov-report=xml --cov-report=term-missing
pytest tests/unit/test_db_models.py -v -m db --cov=core --cov-append --cov-report=xml --cov-report=term-missing
pytest tests/system -v -m system --cov=core --cov-append --cov-report=xml --cov-report=term-missing
10 changes: 7 additions & 3 deletions core/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
func,
)
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Mapped, declarative_base, relationship

naming_convention = {
"ix": "ix_%(column_0_label)s",
Expand Down Expand Up @@ -117,8 +117,8 @@ class Version(Base):
DateTime, nullable=False, default=func.now(), server_default=func.now()
)

# package: Mapped["Package"] = relationship()
# license: Mapped["License"] = relationship()
package: Mapped["Package"] = relationship()
license: Mapped["License"] = relationship()

def to_dict(self):
return {
Expand Down Expand Up @@ -184,6 +184,10 @@ class DependsOn(Base):
DateTime, nullable=False, default=func.now(), server_default=func.now()
)

version: Mapped["Version"] = relationship()
dependency: Mapped["Package"] = relationship()
dependency_type: Mapped["DependsOnType"] = relationship()

def to_dict(self):
return {
"version_id": self.version_id,
Expand Down
15 changes: 15 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[pytest]
# Test discovery paths
testpaths = tests

# Python paths for test discovery
pythonpath = .

# Markers for different test types
markers =
transformer: Unit tests for transformer classes
db: Unit tests for database models and operations
system: End-to-end system tests requiring full setup

# Configure test paths
addopts = --import-mode=importlib
146 changes: 146 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""
Common test fixtures and configurations.
"""

import uuid
from unittest.mock import MagicMock

import pytest
import testing.postgresql
from sqlalchemy import create_engine, event, text
from sqlalchemy.orm import Session

from core.config import URLTypes, UserTypes
from core.db import DB
from core.models import Base, PackageManager, Source, URLType


@pytest.fixture(scope="session")
def mock_db():
"""
Create a mock DB with necessary methods for transformer tests.
This fixture provides consistent mock objects for URL types and sources.
"""
db = MagicMock(spec=DB)

# Mock URL types with consistent UUIDs
homepage_type = MagicMock()
homepage_type.id = uuid.UUID("00000000-0000-0000-0000-000000000001")
repository_type = MagicMock()
repository_type.id = uuid.UUID("00000000-0000-0000-0000-000000000002")
documentation_type = MagicMock()
documentation_type.id = uuid.UUID("00000000-0000-0000-0000-000000000003")
source_type = MagicMock()
source_type.id = uuid.UUID("00000000-0000-0000-0000-000000000004")

db.select_url_types_homepage.return_value = homepage_type
db.select_url_types_repository.return_value = repository_type
db.select_url_types_documentation.return_value = documentation_type
db.select_url_types_source.return_value = source_type

# Mock sources with consistent UUIDs
github_source = MagicMock()
github_source.id = uuid.UUID("00000000-0000-0000-0000-000000000005")
crates_source = MagicMock()
crates_source.id = uuid.UUID("00000000-0000-0000-0000-000000000006")

db.select_source_by_name.side_effect = lambda name: {
"github": github_source,
"crates": crates_source,
}[name]

return db


@pytest.fixture(scope="session")
def url_types(mock_db):
"""Provide URL types configuration for tests."""
return URLTypes(mock_db)


@pytest.fixture(scope="session")
def user_types(mock_db):
"""Provide user types configuration for tests."""
return UserTypes(mock_db)


@pytest.fixture(scope="class")
def pg_db():
"""
Create a temporary PostgreSQL database for integration tests.
This database is recreated for each test class.
"""
with testing.postgresql.Postgresql() as postgresql:
yield postgresql


@pytest.fixture
def db_session(pg_db):
"""
Create a database session using temporary PostgreSQL.
This fixture handles database initialization and cleanup.
"""
engine = create_engine(pg_db.url())

# Create UUID extension for PostgreSQL
@event.listens_for(Base.metadata, "before_create")
def create_uuid_function(target, connection, **kw):
connection.execute(
text("""
CREATE OR REPLACE FUNCTION uuid_generate_v4()
RETURNS uuid
AS $$
BEGIN
RETURN gen_random_uuid();
END;
$$ LANGUAGE plpgsql;
""")
)

Base.metadata.create_all(engine)

with Session(engine) as session:
# Initialize URL types
for url_type_name in ["homepage", "repository", "documentation", "source"]:
existing_url_type = (
session.query(URLType).filter_by(name=url_type_name).first()
)
if not existing_url_type:
session.add(URLType(name=url_type_name))
session.commit()

# Initialize sources
for source_type in ["github", "crates"]:
existing_source = session.query(Source).filter_by(type=source_type).first()
if not existing_source:
session.add(Source(type=source_type))
session.commit()

# Initialize package manager
crates_source = session.query(Source).filter_by(type="crates").first()
existing_package_manager = (
session.query(PackageManager).filter_by(source_id=crates_source.id).first()
)
if not existing_package_manager:
package_manager = PackageManager(source_id=crates_source.id)
session.add(package_manager)
session.commit()

yield session
session.rollback()


@pytest.fixture
def mock_csv_reader():
"""
Fixture to mock CSV reading functionality.
Provides a consistent way to mock _read_csv_rows across transformer tests.
"""

def create_mock_reader(data):
def mock_reader(file_key):
return [data].__iter__()

return mock_reader

return create_mock_reader
5 changes: 5 additions & 0 deletions tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pytest==8.1.1
testing.postgresql==1.3.0
sqlalchemy==2.0.28
psycopg2-binary==2.9.9
pytest-cov==4.1.0
85 changes: 85 additions & 0 deletions tests/system/test_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""
System tests for the complete data pipeline.
These tests verify the entire system working together with:
1. Real PostgreSQL database
2. Actual data transformations
3. End-to-end data flow
These tests require the full Docker Compose setup and are skipped
if the required environment is not available.
"""

import os

import pytest
from sqlalchemy import create_engine, text
from sqlalchemy.orm import Session

from core.models import Base


@pytest.mark.system
class TestSystemIntegration:
"""
System tests that require the full Docker Compose setup.
These tests verify the entire system working together.
"""

def is_postgres_ready(self):
"""
Check if PostgreSQL is available.
Returns:
bool: True if PostgreSQL is accessible, False otherwise
"""
try:
engine = create_engine(
os.environ.get(
"CHAI_DATABASE_URL",
"postgresql://postgres:s3cr3t@localhost:5435/chai",
)
)
with engine.connect() as conn:
conn.execute(text("SELECT 1"))
return True
except Exception as e:
print(f"PostgreSQL not ready: {e}")
return False

@pytest.fixture
def db_session(self):
"""
Create a PostgreSQL database session.
This fixture:
1. Checks if PostgreSQL is available
2. Creates all tables if they don't exist
3. Provides a session for the test
4. Rolls back changes after the test
"""
if not self.is_postgres_ready():
pytest.skip("PostgreSQL is not available")

engine = create_engine(os.environ.get("CHAI_DATABASE_URL"))
Base.metadata.create_all(engine)

with Session(engine) as session:
yield session
session.rollback()

@pytest.mark.skipif(
not os.environ.get("RUN_SYSTEM_TESTS"), reason="System tests not enabled"
)
def test_full_pipeline(self, db_session):
"""
Test the entire pipeline with actual database.
This test verifies:
1. Data loading from CSV files
2. Transformation of raw data
3. Database schema compatibility
4. Data integrity across models
"""
# TODO: Implement full pipeline test
pass
Loading

0 comments on commit 5d1d2c6

Please sign in to comment.