diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..70fdeac --- /dev/null +++ b/Gemfile @@ -0,0 +1,5 @@ +source 'https://rubygems.org' + +gem 'sinatra' +gem 'csvlint' +gem 'webrick' \ No newline at end of file diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..8faee6f --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: ruby csvlint_server.rb -p $PORT \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..f265943 --- /dev/null +++ b/README.md @@ -0,0 +1,112 @@ +# CSVLint-API + +CSVLint is a Ruby-based server for validating CSV files. It checks CSV files against standard structures and schemas, providing detailed feedback on any issues detected. + +## Features + +- **Structure Validation**: Checks for structural issues, such as inconsistent row lengths, incorrect quoting, and malformed line endings. +- **Schema Validation**: Validates CSV data against schemas (e.g., JSON-based) to ensure data formats, types, and constraints are met. +- **Dialect Options**: Flexible options for parsing CSV files, such as delimiters, quoting characters, and line terminators. +- **Detailed Reporting**: Provides error, warning, and informational feedback on validation results. + +## Installation + +### Prerequisites + +Make sure you have the following installed on your system: + +- **Ruby** (version 2.6 or higher) +- **Bundler** (for managing Ruby dependencies) + +### Step 1: Clone the Repository + +Clone the repository from GitHub and navigate into the project directory: + +```bash +git clone https://github.com/theodi/csvlint-api.git +cd csvlint-api +``` + +### Step 2: Install Ruby Dependencies + +Ensure you have Bundler installed. If not, you can install it with: + +```bash +gem install bundler +``` + +Then install all required Ruby gems: + +```bash +bundle install +``` + +### Step 3: Set Up Environment Variables + +Create an `.env` file in the project root (you can start by copying `.env.example`): + +```bash +cp .env.example .env +``` + +Set any necessary environment variables in `.env`. At minimum, you should define: + +- `PORT`: The port on which the server will run (default: `4567`). + +### Step 4: Run the Server Locally + +To start the CSVLint server locally, use: + +```bash +ruby csvlint_server.rb +``` + +The server will start on the specified port (default `4567`). You can access it at `http://localhost:4567`. + +### Usage + +#### Web Interface + +To use CSVLint through a web interface, simply open the URL in your browser: + +```bash +http://localhost:4567 +``` + +#### API Usage + +The CSVLint API allows you to programmatically validate CSV files. You can upload a CSV file or provide a URL to a CSV with optional schema and dialect options. + +**Example `POST` Request:** + +```bash +curl -X POST http://localhost:4567/validate \ +-F "file=@/path/to/yourfile.csv" \ +-F "schema=@/path/to/yourschema.json" \ +-F "dialect={\"delimiter\":\",\",\"quoteChar\":\"\\\"\"}" +``` + +**Response:** + +The API responds with JSON validation results. Here’s an example response format: + +```json +{ + "valid": true, + "errors": [], + "warnings": [], + "info_messages": [] +} +``` + +#### Deploying to Render + +1. **Log in to Render** and create a new service. +2. **Link your GitHub repository**. +3. **Choose the Ruby environment** and specify `Gemfile` for dependencies. +4. Set environment variables, including `PORT`. +5. Click **Create Web Service** to deploy. + +### License + +This project is licensed under the MIT License. \ No newline at end of file diff --git a/config.ru b/config.ru new file mode 100644 index 0000000..574df8b --- /dev/null +++ b/config.ru @@ -0,0 +1,2 @@ +require './csvlint_server' +run Sinatra::Application \ No newline at end of file diff --git a/csvlint_server.rb b/csvlint_server.rb new file mode 100644 index 0000000..4b1cbd2 --- /dev/null +++ b/csvlint_server.rb @@ -0,0 +1,89 @@ +require 'sinatra' +require 'csvlint' +require 'webrick' +require 'securerandom' +require 'json' + +set :port, ENV['PORT'] || 4567 +set :server, 'webrick' + +post '/validate' do + content_type :json + + # Retrieve file, schema, and dialect from the request + csv_file = params[:file] + schema_file = params[:schema] + dialect = JSON.parse(params[:dialect] || '{}') # Parse dialect JSON if provided + + # Generate a unique temporary file name + csv_tempfile = "temp_#{SecureRandom.uuid}.csv" + schema_tempfile = "temp_schema_#{SecureRandom.uuid}.json" if schema_file + + begin + # Save the CSV file temporarily + File.open(csv_tempfile, "wb") { |f| f.write(csv_file[:tempfile].read) } + + # Save schema file if provided + schema = nil + if schema_file + File.open(schema_tempfile, "wb") { |f| f.write(schema_file[:tempfile].read) } + schema = Csvlint::Schema.load_from_json(File.new(schema_tempfile)) + end + + # Create the CSV validator with dialect options and schema + validator = Csvlint::Validator.new(File.new(csv_tempfile), dialect, schema) + + # Perform validation + validator.validate + + # Map errors, warnings, and info messages into hashes + errors = validator.errors.map do |error| + { + category: error.category, + type: error.type, + row: error.row, + column: error.column, + content: error.content + } + end + + warnings = validator.warnings.map do |warning| + { + category: warning.category, + type: warning.type, + row: warning.row, + column: warning.column, + content: warning.content + } + end + + info_messages = validator.info_messages.map do |info| + { + category: info.category, + type: info.type, + row: info.row, + column: info.column, + content: info.content + } + end + + # Collect results + result = { + valid: validator.valid?, + errors: errors, + warnings: warnings, + info_messages: info_messages + } + + rescue StandardError => e + # Handle errors gracefully and return the error message as JSON + result = { error: "Validation failed: #{e.message}" } + ensure + # Ensure temporary files are deleted + File.delete(csv_tempfile) if File.exist?(csv_tempfile) + File.delete(schema_tempfile) if schema_file && File.exist?(schema_tempfile) + end + + # Return the result as JSON + result.to_json +end \ No newline at end of file