Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test for ocrd-network #1184

Merged
merged 27 commits into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
eb0da74
Add a test for workflow run in ocrd_all
joschrew Feb 7, 2024
79c5b79
remove duplicates
MehmedGIT Feb 12, 2024
3d501b5
Make make assets in Dockerfile skipable
joschrew Feb 12, 2024
7f77b57
Add a test for workflow run in ocrd_all
joschrew Mar 13, 2024
9cdb222
remove duplicates
MehmedGIT Feb 12, 2024
419a535
Make make assets in Dockerfile skipable
joschrew Mar 13, 2024
a55d961
Merge branch 'test-workflow' of github.com:OCR-D/core into test-workflow
MehmedGIT Apr 11, 2024
cb8cde7
merge master
MehmedGIT Apr 11, 2024
dfd78d5
make ocrd all tests callable from Makefile
MehmedGIT Apr 11, 2024
14576cf
update actions and add python 3.12
Apr 10, 2024
34459e0
update actions and add python 3.12
Apr 10, 2024
7d119aa
update actions
Apr 10, 2024
2a7ef7b
Remove ocrd_all-tests from core makefile
joschrew Apr 16, 2024
3effd63
ci: disable scrutinizer build
kba Apr 16, 2024
8dae53d
bashlib input-files: apply download_file on each input_file
bertsky Apr 25, 2024
0195099
bashlib input-files: let None pass through
bertsky Apr 25, 2024
feee374
scrutinizer: try to fix py version
bertsky Apr 25, 2024
48d52e3
:memo: changelog
kba May 3, 2024
71ec3a2
Merge branch 'master' into update/workflows
kba May 3, 2024
c8f41a5
drop distutils, support python 3.12
kba May 3, 2024
b788b59
:memo: changelog
kba May 3, 2024
df77ace
Merge branch 'master' into update/workflows
kba May 3, 2024
cf4664a
disable ocrd all test in core
MehmedGIT May 3, 2024
e88d646
:memo: changelog
kba May 3, 2024
6ecbaa8
make network-integration-test: disable ocrd_all test
kba May 3, 2024
f714742
Merge branch 'master' into test-workflow
kba May 3, 2024
1bd8fc4
ci: fix integration test
kba May 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ WORKDIR /data
CMD ["/usr/local/bin/ocrd", "--help"]

FROM ocrd_core_base as ocrd_core_test
# Optionally skip make assets with this arg
ARG SKIP_ASSETS
WORKDIR /build-ocrd
COPY Makefile .
RUN make assets
RUN if test -z "$SKIP_ASSETS" || test $SKIP_ASSETS -eq 0 ; then make assets ; fi
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would expect SKIP_ASSETS=0 to disable the behavior. Did you mean test $SKIP_ASSETS -eq 1?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think SKIP_ASSETS should be renamed to MAKE_ASSETS to reverse the logic.

SKIP_ASSETS itself has a negative meaning. False to False makes True.

SKIP_ASSETS=1 (true) -> do not make assets
SKIP_ASSETS=0 (false) -> make assets

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for me 0 is false so if SKIP_ASSETS is 0 then it is not skipped.

COPY tests ./tests
COPY .gitmodules .
COPY requirements_test.txt .
Expand Down
22 changes: 22 additions & 0 deletions tests/network/test_ocrd_all_workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from src.ocrd_network.models import StateEnum
from tests.network.config import test_config
from tests.network.utils import (
poll_job_till_timeout_fail_or_success,
post_ps_workflow_request,
)

PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL


def test_ocrd_all_workflow():
# This test is supposed to run with ocrd_all not with just core on its own
# Note: the used workflow path is volume mapped
path_to_wf = "/ocrd-data/assets/ocrd_all-test-workflow.txt"
kba marked this conversation as resolved.
Show resolved Hide resolved
path_to_mets = "/data/mets.xml"
wf_job_id = post_ps_workflow_request(PROCESSING_SERVER_URL, path_to_wf, path_to_mets)
job_state = poll_job_till_timeout_fail_or_success(
test_url=f"{PROCESSING_SERVER_URL}/workflow/job-simple/{wf_job_id}",
tries=30,
wait=10
)
assert job_state == StateEnum.success
76 changes: 22 additions & 54 deletions tests/network/test_processing_server.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,50 @@
from time import sleep
from requests import get, post
from requests import get
from src.ocrd_network import NETWORK_AGENT_WORKER
from src.ocrd_network.models import StateEnum
from tests.base import assets
from tests.network.config import test_config
from tests.network.utils import (
poll_job_till_timeout_fail_or_success,
post_ps_processing_request,
post_ps_workflow_request,
)

PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL


def poll_till_timeout_fail_or_success(test_url: str, tries: int, wait: int) -> StateEnum:
job_state = StateEnum.unset
while tries > 0:
sleep(wait)
response = get(url=test_url)
assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}"
job_state = response.json()["state"]
if job_state == StateEnum.success or job_state == StateEnum.failed:
break
tries -= 1
return job_state


def test_processing_server_connectivity():
test_url = f'{PROCESSING_SERVER_URL}/'
test_url = f"{PROCESSING_SERVER_URL}/"
response = get(test_url)
assert response.status_code == 200, \
f'Processing server is not reachable on: {test_url}, {response.status_code}'
message = response.json()['message']
assert message.startswith('The home page of'), \
f'Processing server home page message is corrupted'
f"Processing server is not reachable on: {test_url}, {response.status_code}"
message = response.json()["message"]
assert message.startswith("The home page of"), \
f"Processing server home page message is corrupted"


# TODO: The processing workers are still not registered when deployed separately.
# Fix that by extending the processing server.
def test_processing_server_deployed_processors():
test_url = f'{PROCESSING_SERVER_URL}/processor'
test_url = f"{PROCESSING_SERVER_URL}/processor"
response = get(test_url)
processors = response.json()
assert response.status_code == 200, \
f'Processing server: {test_url}, {response.status_code}'
assert processors == [], f'Mismatch in deployed processors'
f"Processing server: {test_url}, {response.status_code}"
assert processors == [], f"Mismatch in deployed processors"


def test_processing_server_processing_request():
path_to_mets = assets.path_to('kant_aufklaerung_1784/data/mets.xml')
path_to_mets = assets.path_to("kant_aufklaerung_1784/data/mets.xml")
test_processing_job_input = {
"path_to_mets": path_to_mets,
"input_file_grps": ['OCR-D-IMG'],
"output_file_grps": ['OCR-D-DUMMY'],
"input_file_grps": ["OCR-D-IMG"],
"output_file_grps": ["OCR-D-DUMMY"],
"agent_type": NETWORK_AGENT_WORKER,
"parameters": {}
}
test_processor = 'ocrd-dummy'
test_url = f'{PROCESSING_SERVER_URL}/processor/run/{test_processor}'
response = post(
url=test_url,
headers={"accept": "application/json"},
json=test_processing_job_input
)
# print(response.json())
assert response.status_code == 200, \
f'Processing server: {test_url}, {response.status_code}'
processing_job_id = response.json()["job_id"]
assert processing_job_id

job_state = poll_till_timeout_fail_or_success(
test_processor = "ocrd-dummy"
processing_job_id = post_ps_processing_request(PROCESSING_SERVER_URL, test_processor, test_processing_job_input)
job_state = poll_job_till_timeout_fail_or_success(
test_url=f"{PROCESSING_SERVER_URL}/processor/job/{processing_job_id}",
tries=10,
wait=10
Expand All @@ -76,20 +56,8 @@ def test_processing_server_workflow_request():
# Note: the used workflow path is volume mapped
path_to_dummy_wf = "/ocrd-data/assets/dummy-workflow.txt"
path_to_mets = assets.path_to('kant_aufklaerung_1784/data/mets.xml')

# submit the workflow job
test_url = f"{PROCESSING_SERVER_URL}/workflow/run?mets_path={path_to_mets}&page_wise=True"
response = post(
url=test_url,
headers={"accept": "application/json"},
files={"workflow": open(path_to_dummy_wf, 'rb')}
)
# print(response.json())
assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}"
wf_job_id = response.json()["job_id"]
assert wf_job_id

job_state = poll_till_timeout_fail_or_success(
wf_job_id = post_ps_workflow_request(PROCESSING_SERVER_URL, path_to_dummy_wf, path_to_mets)
job_state = poll_job_till_timeout_fail_or_success(
test_url=f"{PROCESSING_SERVER_URL}/workflow/job-simple/{wf_job_id}",
tries=30,
wait=10
Expand Down
50 changes: 50 additions & 0 deletions tests/network/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from requests import get, post
from time import sleep
from src.ocrd_network.models import StateEnum


def poll_job_till_timeout_fail_or_success(
test_url: str,
tries: int = 10,
wait: int = 10
) -> StateEnum:
job_state = StateEnum.unset
while tries > 0:
sleep(wait)
response = get(url=test_url)
assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}"
job_state = response.json()["state"]
if job_state == StateEnum.success or job_state == StateEnum.failed:
break
tries -= 1
return job_state


def post_ps_processing_request(ps_server_host: str, test_processor: str, test_job_input: dict) -> str:
test_url = f"{ps_server_host}/processor/run/{test_processor}"
response = post(
url=test_url,
headers={"accept": "application/json"},
json=test_job_input
)
# print(response.json())
assert response.status_code == 200, \
f"Processing server: {test_url}, {response.status_code}"
processing_job_id = response.json()["job_id"]
assert processing_job_id
return processing_job_id


# TODO: Can be extended to include other parameters such as page_wise
def post_ps_workflow_request(ps_server_host: str, path_to_test_wf: str, path_to_test_mets: str) -> str:
test_url = f"{ps_server_host}/workflow/run?mets_path={path_to_test_mets}&page_wise=True"
response = post(
url=test_url,
headers={"accept": "application/json"},
files={"workflow": open(path_to_test_wf, "rb")}
)
# print(response.json())
assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}"
wf_job_id = response.json()["job_id"]
assert wf_job_id
return wf_job_id
Loading