Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
finn committed Mar 5, 2024
1 parent 36efe44 commit 8eb8dee
Showing 1 changed file with 8 additions and 9 deletions.
17 changes: 8 additions & 9 deletions script.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,39 +18,38 @@ def verify_inputs(data):


os.makedirs(DEFAULT_MODEL_DIR, exist_ok=True)
print(f"Stanza model directory: {DEFAULT_MODEL_DIR}")
# print(f"Stanza model directory: {DEFAULT_MODEL_DIR}")

print(f"Initiating pipeline cache...")
# print(f"Initiating pipeline cache...")
pipelinesCache = dict()

def ensure_stanza(language):
if not os.path.exists(os.path.join(DEFAULT_MODEL_DIR, language)):
print(f"Downloading Stanza model for '{language}'...")
stanza.download(language, model_dir=DEFAULT_MODEL_DIR)
else:
print(f"Stanza model for '{language}' already exists. Skipping download.")
# print(f"Stanza model for '{language}' already exists. Skipping download.")

def get_pipeline(language, processors):
global pipelinesCache
cacheKey = language + "_" + processors

ensure_stanza(language)

print(f"current pipelines: {pipelinesCache}")
print(f"cacheKey: {cacheKey}")

if cacheKey not in pipelinesCache:
print(f"cacheKey: {cacheKey} NOT FOUND! building")
pipelinesCache[cacheKey] = stanza.Pipeline(
lang=language,
processors=processors,
use_gpu=False
)
print(f"new pipelines: {pipelinesCache}")

return pipelinesCache[cacheKey]


def parse_doc(doc):
print("Parsing doc...")
print(doc)
serializable_entities = [
{
"text": entity.text,
Expand All @@ -66,7 +65,7 @@ def parse_doc(doc):
tokens = []
deps = []
for word in sentence.words:
print(word)
# print(word)
tokens.append({
'index': word.id,
'token': word.text,
Expand Down Expand Up @@ -101,7 +100,7 @@ def ping():
def get_data():
try:
data = request.get_json()
print(f"request data: {data}")
# print(f"request data: {data}")

try:
verify_inputs(data)
Expand Down

0 comments on commit 8eb8dee

Please sign in to comment.