Skip to content

Commit

Permalink
chore: update transformers test dependency (#8752)
Browse files Browse the repository at this point in the history
* update transformers test dependency

* add pad_token_id to the mock tokenizer

* fix HFLocal test + new test
  • Loading branch information
anakin87 authored Jan 21, 2025
1 parent 2bf6bf6 commit f96839e
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 2 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ format-check = "ruff format --check {args}"
extra-dependencies = [
"numpy>=2", # Haystack is compatible both with numpy 1.x and 2.x, but we test with 2.x

"transformers[torch,sentencepiece]==4.44.2", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
"transformers[torch,sentencepiece]==4.47.1", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
"huggingface_hub>=0.27.0", # Hugging Face API Generators and Embedders
"sentence-transformers>=3.0.0", # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder
"langdetect", # TextLanguageRouter and DocumentLanguageClassifier
Expand Down
1 change: 1 addition & 0 deletions test/components/generators/chat/test_hugging_face_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def mock_pipeline_tokenizer():
# Mocking the tokenizer
mock_tokenizer = Mock(spec=PreTrainedTokenizer)
mock_tokenizer.encode.return_value = ["Berlin", "is", "cool"]
mock_tokenizer.pad_token_id = 100
mock_pipeline.tokenizer = mock_tokenizer

return mock_pipeline
Expand Down
18 changes: 17 additions & 1 deletion test/components/generators/test_hugging_face_local_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,12 @@ def test_stop_words_criteria_with_a_mocked_tokenizer(self):
# "This is ambiguously, but is unrelated."
input_ids_one = torch.LongTensor([[100, 19, 24621, 11937, 6, 68, 19, 73, 3897, 5]])
input_ids_two = torch.LongTensor([[100, 19, 73, 24621, 11937]]) # "This is unambiguously"
stop_words_criteria = StopWordsCriteria(tokenizer=Mock(spec=PreTrainedTokenizerFast), stop_words=["mock data"])

mock_tokenizer = Mock(spec=PreTrainedTokenizerFast)
mock_tokenizer.pad_token = "<pad>"
stop_words_criteria = StopWordsCriteria(tokenizer=mock_tokenizer, stop_words=["mock data"])
stop_words_criteria.stop_ids = stop_words_id

assert not stop_words_criteria(input_ids_one, scores=None)
assert stop_words_criteria(input_ids_two, scores=None)

Expand Down Expand Up @@ -459,3 +463,15 @@ def test_hf_pipeline_runs_with_our_criteria(self):
results = generator.run(prompt="something that triggers something")
assert results["replies"] != []
assert generator.stopping_criteria_list is not None

@pytest.mark.integration
@pytest.mark.flaky(reruns=3, reruns_delay=10)
def test_live_run(self):
llm = HuggingFaceLocalGenerator(model="Qwen/Qwen2.5-0.5B-Instruct", generation_kwargs={"max_new_tokens": 50})
llm.warm_up()

result = llm.run(prompt="Please create a summary about the following topic: Climate change")

assert "replies" in result
assert isinstance(result["replies"][0], str)
assert "climate change" in result["replies"][0].lower()

0 comments on commit f96839e

Please sign in to comment.