Skip to content

Commit

Permalink
implemented weighting tag format.
Browse files Browse the repository at this point in the history
  • Loading branch information
ryogrid committed Oct 14, 2024
1 parent 467eb93 commit a807db9
Showing 1 changed file with 29 additions and 1 deletion.
30 changes: 29 additions & 1 deletion web-ui-image-search-lsi.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,37 @@ class Arguments(Protocol):

args: Optional[Arguments] = None

def normalize_and_apply_weight_to_quey_bow(query_bow: List[Tuple[int, int]], new_doc: str) -> List[Tuple[int, float]]:
tags: List[str] = new_doc.split(" ")

# parse tag:weight format
tag_and_weight_list: List[Tuple[str, float]] = []
for tag in tags:
tag_splited: List[str] = tag.split(":")
if len(tag_splited) == 2:
tag_and_weight_list.append((tag_splited[0], int(tag_splited[1])))
else:
tag_and_weight_list.append((tag_splited[0], 1))

# apply weight to query_bow
for tag, weight in tag_and_weight_list:
tag_id: int = dictionary.token2id[tag]

Check failure on line 42 in web-ui-image-search-lsi.py

View workflow job for this annotation

GitHub Actions / build

Item "None" of "Any | None" has no attribute "token2id" [union-attr]
for ii in range(len(query_bow)):
if query_bow[ii][0] == tag_id:
query_bow[ii] = (query_bow[ii][0], query_bow[ii][1]*weight)

Check failure on line 45 in web-ui-image-search-lsi.py

View workflow job for this annotation

GitHub Actions / build

Incompatible types in assignment (expression has type "tuple[int, float]", target has type "tuple[int, int]") [assignment]
break

query_lsi: List[Tuple[int, float]] = model[query_bow]

Check failure on line 48 in web-ui-image-search-lsi.py

View workflow job for this annotation

GitHub Actions / build

Value of type "Any | None" is not indexable [index]

# normalize query with tag num
tag_num: int = len(tags)
query_lsi = [(tag_id, tag_value / tag_num) for tag_id, tag_value in query_lsi]
return query_lsi

def find_similar_documents(model: LsiModel, new_doc: str, topn: int = 50) -> List[Tuple[int, float]]:
query_bow: List[Tuple[int, int]] = dictionary.doc2bow(simple_preprocess(new_doc))

Check failure on line 56 in web-ui-image-search-lsi.py

View workflow job for this annotation

GitHub Actions / build

Item "None" of "Any | None" has no attribute "doc2bow" [union-attr]
query_lsi: List[Tuple[int, float]] = model[query_bow]
query_lsi = normalize_and_apply_weight_to_quey_bow(query_bow, new_doc)
#query_lsi: List[Tuple[int, float]] = model[query_bow]

sims: List[Tuple[int, float]] = index[query_lsi]

Check failure on line 60 in web-ui-image-search-lsi.py

View workflow job for this annotation

GitHub Actions / build

Value of type "Any | None" is not indexable [index]

Expand Down

0 comments on commit a807db9

Please sign in to comment.