From 8c6ef0656f3c20a220e501465589dfbbc386505a Mon Sep 17 00:00:00 2001 From: Jacob Heider Date: Mon, 21 Oct 2024 13:06:17 -0400 Subject: [PATCH] chai-api (#12) a simple REST api for grabbing db tables --- .github/workflows/chai-api.ci.yml | 86 +++++++++++++++++++++++++++++ README.md | 8 +++ api/.dockerignore | 4 ++ api/.gitignore | 4 ++ api/Cargo.toml | 23 ++++++++ api/Dockerfile | 20 +++++++ api/README.md | 14 +++++ api/src/app_state.rs | 7 +++ api/src/db.rs | 30 +++++++++++ api/src/handlers.rs | 90 +++++++++++++++++++++++++++++++ api/src/logging.rs | 13 +++++ api/src/main.rs | 47 ++++++++++++++++ api/src/utils.rs | 52 ++++++++++++++++++ docker-compose.yml | 23 ++++++++ 14 files changed, 421 insertions(+) create mode 100644 .github/workflows/chai-api.ci.yml create mode 100644 api/.dockerignore create mode 100644 api/.gitignore create mode 100644 api/Cargo.toml create mode 100644 api/Dockerfile create mode 100644 api/README.md create mode 100644 api/src/app_state.rs create mode 100644 api/src/db.rs create mode 100644 api/src/handlers.rs create mode 100644 api/src/logging.rs create mode 100644 api/src/main.rs create mode 100644 api/src/utils.rs diff --git a/.github/workflows/chai-api.ci.yml b/.github/workflows/chai-api.ci.yml new file mode 100644 index 0000000..16835c4 --- /dev/null +++ b/.github/workflows/chai-api.ci.yml @@ -0,0 +1,86 @@ +name: api.ci + +on: + push: + branches: [main] + pull_request: + +env: + CARGO_TERM_COLOR: always + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + name: test + runs-on: ubuntu-latest + + services: + postgres: + image: postgres + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: s3cr3t + POSTGRES_DB: chai + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5435:5432 + + steps: + - uses: actions/checkout@v3 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y postgresql-client + - name: Run tests + run: cargo test --verbose + working-directory: api + env: + DATABASE_URL: postgresql://postgres:s3cr3t@localhost:5435/chai + + fmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + components: rustfmt + - run: cargo fmt --all -- --check + working-directory: api + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + components: clippy + - run: cargo clippy --all-targets --all-features -- -D warnings + working-directory: api + + docker-build: + name: Build Docker Image + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Build + uses: docker/build-push-action@v3 + with: + context: ./api + push: false # Set push to false to prevent pushing the image diff --git a/README.md b/README.md index cec0d3d..49afa09 100644 --- a/README.md +++ b/README.md @@ -227,3 +227,11 @@ psql "postgresql://postgres:s3cr3t@localhost:5435/chai" -c "SELECT count(id) FRO ```sh psql "postgresql://postgres:s3cr3t@localhost:5435/chai" -c "SELECT * FROM load_history;" ``` + +### restart-api + +Refreshes table knowledge from the db. + +```sh +docker-compose restart api +``` diff --git a/api/.dockerignore b/api/.dockerignore new file mode 100644 index 0000000..0fedc5b --- /dev/null +++ b/api/.dockerignore @@ -0,0 +1,4 @@ +/target +.git +.gitignore +README.md diff --git a/api/.gitignore b/api/.gitignore new file mode 100644 index 0000000..707b866 --- /dev/null +++ b/api/.gitignore @@ -0,0 +1,4 @@ +/target +**/*.rs.bk +Cargo.lock +.env diff --git a/api/Cargo.toml b/api/Cargo.toml new file mode 100644 index 0000000..91e8ad9 --- /dev/null +++ b/api/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "chai-api" +version = "0.1.0" +edition = "2021" +authors = ["Jacob Heider "] +description = "A simple REST API for the CHAI database" +readme = "README.md" +license = "MIT" +repository = "https://github.com/teaxyz/chai-oss" + +[dependencies] +actix-web = "4.3" +dotenv = "0.15" +tokio = { version = "1", features = ["full"] } +log = "0.4" +env_logger = "0.10" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +chrono = { version = "0.4", features = ["serde"] } +tokio-postgres = { version = "0.7", features = [ + "with-serde_json-1", + "with-chrono-0_4", +] } diff --git a/api/Dockerfile b/api/Dockerfile new file mode 100644 index 0000000..6c67dfa --- /dev/null +++ b/api/Dockerfile @@ -0,0 +1,20 @@ +FROM lukemathwalker/cargo-chef:latest-rust-1.78.0 as chef +WORKDIR /app + +FROM chef as planner +COPY . . +RUN cargo chef prepare --recipe-path recipe.json + +FROM chef as builder +COPY --from=planner /app/recipe.json recipe.json +RUN cargo chef cook --release --recipe-path recipe.json +COPY . . +RUN cargo build --release + +FROM debian:bookworm-slim as runtime +WORKDIR /app +RUN apt-get update && apt-get install -y curl openssl ca-certificates && rm -rf /var/lib/apt/lists/* +COPY --from=builder /app/target/release/chai-api /usr/local/bin +ENV DATABASE_URL=postgresql://postgres:s3cr3t@db:5432/chai +EXPOSE 8080 +CMD ["chai-api"] diff --git a/api/README.md b/api/README.md new file mode 100644 index 0000000..b1a37c1 --- /dev/null +++ b/api/README.md @@ -0,0 +1,14 @@ +# CHAI API + +CHAI API is a REST API service for accessing the CHAI database, which contains package manager data. + +## Features + +- List all tables in the database +- Fetch paginated data from any table +- Heartbeat endpoint for health checks + +## Requirements + +- Rust 1.67 or later +- PostgreSQL database diff --git a/api/src/app_state.rs b/api/src/app_state.rs new file mode 100644 index 0000000..c607723 --- /dev/null +++ b/api/src/app_state.rs @@ -0,0 +1,7 @@ +use std::sync::Arc; +use tokio_postgres::Client; + +pub struct AppState { + pub client: Arc, + pub tables: Arc>, +} diff --git a/api/src/db.rs b/api/src/db.rs new file mode 100644 index 0000000..9708758 --- /dev/null +++ b/api/src/db.rs @@ -0,0 +1,30 @@ +use std::sync::Arc; +use tokio_postgres::{Client, NoTls}; + +pub async fn create_db_client(database_url: &str) -> Arc { + let (client, connection) = tokio_postgres::connect(database_url, NoTls) + .await + .expect("Failed to connect to PostgreSQL"); + + tokio::spawn(async move { + if let Err(e) = connection.await { + log::error!("Database connection error: {}", e); + } + }); + + Arc::new(client) +} + +pub async fn get_tables(client: &Arc) -> Vec { + let rows = client + .query( + "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'", + &[], + ) + .await + .expect("Failed to fetch tables"); + + rows.into_iter() + .map(|row| row.get::<_, String>("table_name")) + .collect() +} diff --git a/api/src/handlers.rs b/api/src/handlers.rs new file mode 100644 index 0000000..19326fd --- /dev/null +++ b/api/src/handlers.rs @@ -0,0 +1,90 @@ +use actix_web::{get, web, HttpResponse, Responder}; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; + +use crate::app_state::AppState; +use crate::utils::{get_column_names, rows_to_json}; + +#[derive(Deserialize)] +struct PaginationParams { + page: Option, + limit: Option, +} + +#[derive(Serialize)] +struct PaginatedResponse { + table: String, + total_count: i64, + page: i64, + limit: i64, + total_pages: i64, + columns: Vec, + data: Vec, +} + +#[get("/tables")] +pub async fn list_tables(data: web::Data) -> impl Responder { + HttpResponse::Ok().json(&*data.tables) +} + +#[get("/heartbeat")] +pub async fn heartbeat() -> impl Responder { + HttpResponse::Ok().body("OK") +} + +#[get("/{table}")] +pub async fn get_table( + path: web::Path, + query: web::Query, + data: web::Data, +) -> impl Responder { + let table = path.into_inner(); + if !data.tables.contains(&table) { + return HttpResponse::NotFound().json(json!({ + "error": format!("Table '{}' not found", table) + })); + } + + let page = query.page.unwrap_or(1).max(1); + let limit = query.limit.unwrap_or(200).clamp(1, 1000); + let offset = (page - 1) * limit; + + let count_query = format!("SELECT COUNT(*) FROM {}", table); + let data_query = format!("SELECT * FROM {} LIMIT $1 OFFSET $2", table); + + match data.client.query_one(&count_query, &[]).await { + Ok(count_row) => { + let total_count: i64 = count_row.get(0); + let total_pages = (total_count as f64 / limit as f64).ceil() as i64; + + match data.client.query(&data_query, &[&limit, &offset]).await { + Ok(rows) => { + let columns = get_column_names(&rows); + let data = rows_to_json(&rows); + let response = PaginatedResponse { + table, + total_count, + page, + limit, + total_pages, + columns, + data, + }; + HttpResponse::Ok().json(response) + } + Err(e) => { + log::error!("Database query error: {}", e); + HttpResponse::InternalServerError().json(json!({ + "error": "An error occurred while querying the database" + })) + } + } + } + Err(e) => { + log::error!("Database count query error: {}", e); + HttpResponse::InternalServerError().json(json!({ + "error": "An error occurred while counting rows in the database" + })) + } + } +} diff --git a/api/src/logging.rs b/api/src/logging.rs new file mode 100644 index 0000000..8c8465e --- /dev/null +++ b/api/src/logging.rs @@ -0,0 +1,13 @@ +use env_logger::Env; + +pub fn setup_logger() { + env_logger::init_from_env(Env::default().default_filter_or("info")); +} + +pub struct Logger; + +impl Logger { + pub fn default() -> actix_web::middleware::Logger { + actix_web::middleware::Logger::new("%a '%r' %s %b '%{Referer}i' '%{User-Agent}i' %T") + } +} diff --git a/api/src/main.rs b/api/src/main.rs new file mode 100644 index 0000000..647d6a8 --- /dev/null +++ b/api/src/main.rs @@ -0,0 +1,47 @@ +mod app_state; +mod db; +mod handlers; +mod logging; +mod utils; + +use actix_web::{web, App, HttpServer}; +use dotenv::dotenv; +use std::env; +use std::sync::Arc; + +use crate::app_state::AppState; +use crate::db::create_db_client; +use crate::handlers::{get_table, heartbeat, list_tables}; +use crate::logging::setup_logger; + +#[actix_web::main] +async fn main() -> std::io::Result<()> { + dotenv().ok(); + setup_logger(); + + let database_url = env::var("DATABASE_URL").expect("DATABASE_URL must be set"); + let host = env::var("HOST").unwrap_or_else(|_| "0.0.0.0".to_string()); + let port = env::var("PORT").unwrap_or_else(|_| "8080".to_string()); + let bind_address = format!("{}:{}", host, port); + + let client = create_db_client(&database_url).await; + let tables = Arc::new(db::get_tables(&client).await); + + log::info!("Available tables: {:?}", tables); + log::info!("Starting server at http://{}", bind_address); + + HttpServer::new(move || { + App::new() + .wrap(logging::Logger::default()) + .app_data(web::Data::new(AppState { + client: Arc::clone(&client), + tables: Arc::clone(&tables), + })) + .service(list_tables) + .service(heartbeat) + .service(get_table) + }) + .bind(&bind_address)? + .run() + .await +} diff --git a/api/src/utils.rs b/api/src/utils.rs new file mode 100644 index 0000000..264ca33 --- /dev/null +++ b/api/src/utils.rs @@ -0,0 +1,52 @@ +use serde_json::{json, Value}; +use tokio_postgres::types::{Json, Type}; +use tokio_postgres::Row; + +pub fn get_column_names(rows: &[Row]) -> Vec { + if let Some(row) = rows.first() { + row.columns() + .iter() + .map(|col| col.name().to_string()) + .collect() + } else { + vec![] + } +} + +pub fn rows_to_json(rows: &[Row]) -> Vec { + rows.iter() + .map(|row| { + let mut map = serde_json::Map::new(); + for (i, column) in row.columns().iter().enumerate() { + let value: Value = match *column.type_() { + Type::INT2 => json!(row.get::<_, i16>(i)), + Type::INT4 => json!(row.get::<_, i32>(i)), + Type::INT8 => json!(row.get::<_, i64>(i)), + Type::FLOAT4 => json!(row.get::<_, f32>(i)), + Type::FLOAT8 => json!(row.get::<_, f64>(i)), + Type::BOOL => json!(row.get::<_, bool>(i)), + Type::VARCHAR | Type::TEXT | Type::BPCHAR => json!(row.get::<_, String>(i)), + Type::TIMESTAMP => { + let ts: chrono::NaiveDateTime = row.get(i); + json!(ts.to_string()) + } + Type::TIMESTAMPTZ => { + let ts: chrono::DateTime = row.get(i); + json!(ts.to_rfc3339()) + } + Type::DATE => { + let date: chrono::NaiveDate = row.get(i); + json!(date.to_string()) + } + Type::JSON | Type::JSONB => { + let json_value: Json = row.get(i); + json_value.0 + } + _ => Value::Null, + }; + map.insert(column.name().to_string(), value); + } + Value::Object(map) + }) + .collect() +} diff --git a/docker-compose.yml b/docker-compose.yml index d52e463..8e152f6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -49,6 +49,29 @@ services: working_dir: /src entrypoint: ["./run_pipeline.sh"] + api: + build: + context: ./api + dockerfile: Dockerfile + environment: + - DATABASE_URL=postgresql://postgres:s3cr3t@db:5432/chai + - HOST=0.0.0.0 + - PORT=8080 + ports: + - "8080:8080" + depends_on: + db: + condition: service_healthy + alembic: + condition: service_completed_successfully + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/heartbeat"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 5s + monitor: build: monitor environment: