-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of https://github.com/shoodeen/2023-2-level-ctlr
- Loading branch information
Showing
120 changed files
with
7,909 additions
and
4,156 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -190,7 +190,7 @@ jobs: | |
bash admin_utils/stage_2_crawler_tests/_stage_collect_articles.sh "$PR_NAME" "$PR_AUTHOR" | ||
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_5_scrapper -m stage_2_4_dataset_volume_check | ||
- name: Archive raw dataset | ||
uses: actions/upload-artifact@v3 | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: raw-dataset | ||
path: | | ||
|
@@ -212,7 +212,7 @@ jobs: | |
uses: fipl-hse/[email protected] | ||
- name: Download previously collected dataset | ||
continue-on-error: true | ||
uses: actions/download-artifact@v3 | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: raw-dataset | ||
- name: Run metadata validation | ||
|
@@ -262,7 +262,7 @@ jobs: | |
uses: fipl-hse/[email protected] | ||
- name: Download previously collected dataset | ||
continue-on-error: true | ||
uses: actions/download-artifact@v3 | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: raw-dataset | ||
- name: Run crawler config checks | ||
|
@@ -285,39 +285,16 @@ jobs: | |
uses: fipl-hse/[email protected] | ||
- name: Download previously collected dataset | ||
continue-on-error: true | ||
uses: actions/download-artifact@v3 | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: raw-dataset | ||
- name: Run CorpusManager tests | ||
run: | | ||
source venv/bin/activate | ||
source venv/bin/activate | ||
export PYTHONPATH=$(pwd):$PYTHONPATH | ||
python admin_utils/unpack_archived_dataset.py lab_6_pipeline | ||
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_6_pipeline -m stage_3_2_corpus_manager_checks | ||
checking-conllu-token-works-correctly: | ||
name: ConlluToken displays tokens | ||
needs: [ | ||
milestone-2-pipeline | ||
] | ||
env: | ||
PR_AUTHOR: ${{ github.actor }} | ||
runs-on: ubuntu-latest | ||
timeout-minutes: 2 | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Setup FIPL environment | ||
uses: fipl-hse/[email protected] | ||
- name: Download previously collected dataset | ||
continue-on-error: true | ||
uses: actions/download-artifact@v3 | ||
with: | ||
name: raw-dataset | ||
- name: Run Conllu tests | ||
run: | | ||
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_6_pipeline -m stage_3_3_conllu_token_checks | ||
checking-student-processing-works-for-admin-dataset: | ||
name: Pipe processed admin data | ||
needs: [ | ||
|
@@ -334,7 +311,7 @@ jobs: | |
uses: fipl-hse/[email protected] | ||
- name: Download previously collected dataset | ||
continue-on-error: true | ||
uses: actions/download-artifact@v3 | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: raw-dataset | ||
- name: Run metadata validation | ||
|
@@ -361,15 +338,15 @@ jobs: | |
uses: fipl-hse/[email protected] | ||
- name: Download previously collected dataset | ||
continue-on-error: true | ||
uses: actions/download-artifact@v3 | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: raw-dataset | ||
- name: Run validation of `_processed.txt` files | ||
run: | | ||
bash admin_utils/stage_3_pipeline_tests/_stage_check_on_student_dataset.sh "$PR_AUTHOR" | ||
- name: Archive processed dataset | ||
continue-on-error: true | ||
uses: actions/upload-artifact@v3 | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: processed-dataset | ||
path: | | ||
|
@@ -392,18 +369,15 @@ jobs: | |
uses: fipl-hse/[email protected] | ||
- name: Download previously collected dataset | ||
continue-on-error: true | ||
uses: actions/download-artifact@v3 | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: processed-dataset | ||
- name: Run validation of `_processed.txt` files | ||
run: | | ||
source venv/bin/activate | ||
source venv/bin/activate | ||
export PYTHONPATH=$(pwd):$PYTHONPATH | ||
python admin_utils/unpack_archived_dataset.py lab_6_pipeline | ||
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_6_pipeline -m stage_3_5_student_dataset_validation | ||
- name: Run validation of `.conllu` files | ||
run: | | ||
bash admin_utils/stage_3_pipeline_tests/_stage_check_student_conllu_validation.sh | ||
running-pos-pipeline-tests: | ||
name: POSFrequencyPipeline tests | ||
|
@@ -412,7 +386,6 @@ jobs: | |
checking-student-processing-works-for-admin-dataset, | ||
checking-student-processing-works-for-student-dataset, | ||
checking-corpus-manager-creates-instances-correctly, | ||
checking-conllu-token-works-correctly, | ||
] | ||
env: | ||
PR_AUTHOR: ${{ github.actor }} | ||
|
@@ -425,16 +398,15 @@ jobs: | |
uses: fipl-hse/[email protected] | ||
- name: Download previously collected dataset | ||
continue-on-error: true | ||
uses: actions/download-artifact@v3 | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: processed-dataset | ||
- name: Congratulations | ||
run: | | ||
bash admin_utils/stage_5_pos_frequency_pipeline_tests/_stage_check_pos_pipeline.sh "$PR_AUTHOR" | ||
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_6_pipeline -m stage_4_pos_frequency_pipeline_checks | ||
- name: Archive processed dataset | ||
continue-on-error: true | ||
uses: actions/upload-artifact@v3 | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: processed-dataset | ||
path: | | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -151,3 +151,6 @@ dictionary.dic | |
|
||
# website | ||
config/website/test_sphinx_project/_build | ||
|
||
# final project | ||
/data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
""" | ||
Public module for checking student CoNLL-U files. | ||
""" | ||
|
||
import subprocess | ||
import sys | ||
from pathlib import Path | ||
|
||
from config.cli_unifier import _run_console_tool, choose_python_exe | ||
from config.stage_1_style_tests.common import check_result | ||
|
||
|
||
def check_via_official_validator(conllu_path: Path) -> subprocess.CompletedProcess: | ||
""" | ||
Run validator checks for the project. | ||
URL: https://github.com/UniversalDependencies/tools/blob/master/validate.py | ||
Args: | ||
paths (list[Path]): Paths to the projects. | ||
path_to_config (Path): Path to the config. | ||
Returns: | ||
subprocess.CompletedProcess: Program execution values | ||
""" | ||
validator_args = [ | ||
str(Path(__file__).parent / "ud_validator" / "validate.py"), | ||
"--lang", | ||
"ru", | ||
"--max-err", | ||
"0", | ||
"--level", | ||
"2", | ||
str(conllu_path), | ||
] | ||
return _run_console_tool(str(choose_python_exe()), validator_args, debug=True) | ||
|
||
|
||
def main() -> None: | ||
""" | ||
Module entrypoint. | ||
""" | ||
if len(sys.argv) < 2: | ||
print('Provide path to the file to check.') | ||
sys.exit(1) | ||
conllu_path = Path(sys.argv[1]) | ||
if not conllu_path.exists(): | ||
print("Total CONLLU file is not present. Analyze first.") | ||
sys.exit(1) | ||
|
||
completed_process = check_via_official_validator(conllu_path=conllu_path) | ||
print(completed_process.stdout.decode("utf-8")) | ||
print(completed_process.stderr.decode("utf-8")) | ||
check_result(completed_process.returncode) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
File renamed without changes.
File renamed without changes.
Oops, something went wrong.