-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a pipeline for parsing mimosa papers
- Loading branch information
1 parent
182a0d3
commit 0c0ebc3
Showing
11 changed files
with
129 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,35 @@ | ||
.PHONY: test install dev venv clean activate base | ||
.ONESHELL: | ||
|
||
VENV=.venv | ||
PY_VER=python3.11 | ||
PYTHON=./$(VENV)/bin/$(PY_VER) | ||
PIP_INSTALL=$(PYTHON) -m pip install | ||
SPACY_MODEL=$(PYTHON) -m spacy download en_core_web_md | ||
|
||
test: activate | ||
export MOCK_TRAITER=1 | ||
$(PYTHON) -m unittest discover | ||
./.venv/bin/python3.12 -m unittest discover | ||
export MOCK_TRAITER=0 | ||
|
||
install: venv activate base | ||
$(PIP_INSTALL) git+https://github.com/rafelafrance/common_utils.git@main#egg=common_utils | ||
$(PIP_INSTALL) git+https://github.com/rafelafrance/spell-well.git@main#egg=spell-well | ||
$(PIP_INSTALL) git+https://github.com/rafelafrance/traiter.git@master#egg=traiter | ||
$(PIP_INSTALL) . | ||
$(SPACY_MODEL) | ||
./.venv/bin/python3.12 -m pip install git+https://github.com/rafelafrance/common_utils.git@main#egg=common_utils | ||
./.venv/bin/python3.12 -m pip install git+https://github.com/rafelafrance/spell-well.git@main#egg=spell-well | ||
./.venv/bin/python3.12 -m pip install git+https://github.com/rafelafrance/traiter.git@master#egg=traiter | ||
./.venv/bin/python3.12 -m pip install . | ||
./.venv/bin/python3.12 -m spacy download en_core_web_md | ||
|
||
dev: venv activate base | ||
$(PIP_INSTALL) -e ../../misc/common_utils | ||
$(PIP_INSTALL) -e ../../misc/spell-well | ||
$(PIP_INSTALL) -e ../../traiter/traiter | ||
$(PIP_INSTALL) -e .[dev] | ||
$(SPACY_MODEL) | ||
./.venv/bin/python3.12 -m pip install -e ../../misc/common_utils | ||
./.venv/bin/python3.12 -m pip install -e ../../misc/spell-well | ||
./.venv/bin/python3.12 -m pip install -e ../../traiter/traiter | ||
./.venv/bin/python3.12 -m pip install -e .[dev] | ||
./.venv/bin/python3.12 -m spacy download en_core_web_md | ||
pre-commit install | ||
|
||
activate: | ||
. $(VENV)/bin/activate | ||
. .venv/bin/activate | ||
|
||
base: | ||
$(PIP_INSTALL) -U pip setuptools wheel | ||
./.venv/bin/python3.12 -m pip install -U pip setuptools wheel | ||
|
||
venv: | ||
test -d $(VENV) || $(PY_VER) -m venv $(VENV) | ||
test -d .venv || python3.12 -m venv .venv | ||
|
||
clean: | ||
rm -r $(VENV) | ||
rm -r .venv | ||
find -iname "*.pyc" -delete |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
import spacy | ||
from traiter.pylib.pipes import extensions, sentence, tokenizer | ||
from traiter.pylib.rules.date_ import Date | ||
from traiter.pylib.rules.elevation import Elevation | ||
from traiter.pylib.rules.habitat import Habitat | ||
from traiter.pylib.rules.lat_long import LatLong | ||
|
||
from flora.pylib.rules import delete_missing, delete_too_far, post_process | ||
from flora.pylib.rules.color import Color | ||
from flora.pylib.rules.count import Count | ||
from flora.pylib.rules.duration import Duration | ||
from flora.pylib.rules.flower_location import FlowerLocation | ||
from flora.pylib.rules.flower_morphology import FlowerMorphology | ||
from flora.pylib.rules.habit import Habit | ||
from flora.pylib.rules.leaf_duration import LeafDuration | ||
from flora.pylib.rules.leaf_folding import LeafFolding | ||
from flora.pylib.rules.margin import Margin | ||
from flora.pylib.rules.morphology import Morphology | ||
from flora.pylib.rules.name import Name | ||
from flora.pylib.rules.odor import Odor | ||
from flora.pylib.rules.part import Part | ||
from flora.pylib.rules.part_linker import PartLinker | ||
from flora.pylib.rules.part_location import PartLocation | ||
from flora.pylib.rules.part_location_linker import PartLocationLinker | ||
from flora.pylib.rules.plant_duration import PlantDuration | ||
from flora.pylib.rules.range import Range | ||
from flora.pylib.rules.reproduction import Reproduction | ||
from flora.pylib.rules.sex import Sex | ||
from flora.pylib.rules.sex_linker import SexLinker | ||
from flora.pylib.rules.shape import Shape | ||
from flora.pylib.rules.size import Size | ||
from flora.pylib.rules.subpart import Subpart | ||
from flora.pylib.rules.subpart_linker import SubpartLinker | ||
from flora.pylib.rules.surface import Surface | ||
from flora.pylib.rules.taxon import Taxon | ||
from flora.pylib.rules.taxon_like import TaxonLike | ||
from flora.pylib.rules.taxon_like_linker import TaxonLikeLinker | ||
from flora.pylib.rules.venation import Venation | ||
from flora.pylib.rules.woodiness import Woodiness | ||
|
||
# from traiter.pylib.pipes import debug | ||
|
||
|
||
def build(): | ||
extensions.add_extensions() | ||
|
||
nlp = spacy.load("en_core_web_md", exclude=["ner"]) | ||
|
||
tokenizer.setup_tokenizer(nlp) | ||
|
||
config = {"base_model": "en_core_web_md"} | ||
nlp.add_pipe(sentence.SENTENCES, config=config, before="parser") | ||
|
||
Date.pipe(nlp) | ||
|
||
Part.pipe(nlp) | ||
Subpart.pipe(nlp) | ||
|
||
Elevation.pipe(nlp) | ||
LatLong.pipe(nlp) | ||
|
||
Color.pipe(nlp) | ||
Habitat.pipe(nlp) | ||
|
||
Duration.pipe(nlp) | ||
FlowerLocation.pipe(nlp) | ||
FlowerMorphology.pipe(nlp) | ||
LeafDuration.pipe(nlp) | ||
LeafFolding.pipe(nlp) | ||
Morphology.pipe(nlp) | ||
Odor.pipe(nlp) | ||
PlantDuration.pipe(nlp) | ||
Reproduction.pipe(nlp) | ||
Sex.pipe(nlp) | ||
Venation.pipe(nlp) | ||
Woodiness.pipe(nlp) | ||
|
||
Name.pipe(nlp, overwrite=["subpart", "color", "admin_unit"]) | ||
|
||
Range.pipe(nlp) | ||
Size.pipe(nlp) | ||
Count.pipe(nlp) | ||
|
||
Habit.pipe(nlp) | ||
Margin.pipe(nlp) | ||
Shape.pipe(nlp) | ||
Surface.pipe(nlp) | ||
|
||
Taxon.pipe(nlp, extend=2, overwrite=["habitat", "color"]) | ||
|
||
PartLocation.pipe(nlp) | ||
TaxonLike.pipe(nlp) | ||
|
||
PartLinker.pipe(nlp) | ||
SubpartLinker.pipe(nlp) | ||
SexLinker.pipe(nlp) | ||
PartLocationLinker.pipe(nlp) | ||
TaxonLikeLinker.pipe(nlp) | ||
|
||
delete_missing.pipe(nlp) | ||
delete_too_far.pipe(nlp) | ||
|
||
post_process.pipe(nlp) | ||
|
||
return nlp |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ readme = "README.md" | |
description = "" | ||
license = {file = "LICENSE"} | ||
authors = [{name="Raphael LaFrance", email="[email protected]"}] | ||
requires-python = ">=3.11" | ||
requires-python = ">=3.12" | ||
dependencies = [ | ||
"Jinja2", | ||
"pandas", | ||
|
@@ -39,7 +39,7 @@ add-taxa = "flora.util_add_taxon_terms:main" | |
py-modules = [] | ||
|
||
[tool.ruff] | ||
target-version = "py311" | ||
target-version = "py312" | ||
|
||
show-fixes = true | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters