From 04b50d1cb3ae3a39fb70a6669fe9de6a47ad4745 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 22 Apr 2024 17:58:35 -0700 Subject: [PATCH] ci: remove ingest tests job from CI workflow These ingests tests have been failing for a while now and were not required for merge. As we have no immediate intention of fixing these and their value in this context is doubtful, remove them. --- .github/workflows/ci.yml | 43 ---------------------- scripts/test-unstructured-ingest-helper.sh | 34 ----------------- 2 files changed, 77 deletions(-) delete mode 100644 scripts/test-unstructured-ingest-helper.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5b81d852..d822d332 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -104,49 +104,6 @@ jobs: CI=true make test make check-coverage - test_ingest: - strategy: - matrix: - python-version: ["3.9","3.10"] - runs-on: ubuntu-latest - env: - NLTK_DATA: ${{ github.workspace }}/nltk_data - needs: lint - steps: - - name: Checkout unstructured repo for integration testing - uses: actions/checkout@v4 - with: - repository: 'Unstructured-IO/unstructured' - - name: Checkout this repo - uses: actions/checkout@v4 - with: - path: inference - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Test - env: - GH_READ_ONLY_ACCESS_TOKEN: ${{ secrets.GH_READ_ONLY_ACCESS_TOKEN }} - SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }} - DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }} - run: | - python${{ matrix.python-version }} -m venv .venv - source .venv/bin/activate - [ ! -d "$NLTK_DATA" ] && mkdir "$NLTK_DATA" - make install-ci - pip install -e inference/ - sudo apt-get update - sudo apt-get install -y libmagic-dev poppler-utils libreoffice pandoc - sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5 - sudo apt-get install -y tesseract-ocr - sudo apt-get install -y tesseract-ocr-kor - sudo apt-get install -y diffstat - tesseract --version - make install-all-ingest - # only run ingest tests that check expected output diffs. - bash inference/scripts/test-unstructured-ingest-helper.sh - changelog: runs-on: ubuntu-latest steps: diff --git a/scripts/test-unstructured-ingest-helper.sh b/scripts/test-unstructured-ingest-helper.sh deleted file mode 100644 index 1cc1ab9e..00000000 --- a/scripts/test-unstructured-ingest-helper.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash - -# This is intended to be run from an unstructured checkout, not in this repo -# The goal here is to see what changes the current branch would introduce to unstructured -# fixtures - -INGEST_COMMANDS=( - test_unstructured_ingest/src/azure.sh - test_unstructured_ingest/src/biomed-api.sh - test_unstructured_ingest/src/biomed-path.sh - test_unstructured_ingest/src/box.sh - test_unstructured_ingest/src/dropbox.sh - test_unstructured_ingest/src/gcs.sh - test_unstructured_ingest/src/onedrive.sh - test_unstructured_ingest/src/s3.sh -) - -EXIT_STATUSES=() - -# Run each command and capture its exit status -for INGEST_COMMAND in "${INGEST_COMMANDS[@]}"; do - $INGEST_COMMAND - EXIT_STATUSES+=($?) -done - -# Check for failures -for STATUS in "${EXIT_STATUSES[@]}"; do - if [[ $STATUS -ne 0 ]]; then - echo "At least one ingest command failed! Scroll up to see which" - exit 1 - fi -done - -echo "No diff's resulted from any ingest commands"