You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
from transformers import LlamaTokenizer, LlamaForCausalLM
tokenizer = LlamaTokenizer.from_pretrained("yulan-team/YuLan-Chat-2-13b-fp16")
model = LlamaForCausalLM.from_pretrained("yulan-team/YuLan-Chat-2-13b-fp16", load_in_8bit=True).cuda()
model = model.eval()
input_text = "hello"
prompt = "The following is a conversation between a human and an AI assistant namely YuLan, developed by GSAI, Renmin University of China. The AI assistant gives helpful, detailed, and polite answers to the user's questions.\n[|Human|]:{}\n[|AI|]:".format(input_text)
inputs = tokenizer(prompt, return_tensors='pt', padding="longest", max_length=8192, truncation=True, return_attention_mask=True, add_special_tokens=True)
kwargs = {'temperature': 0.8, 'top_p': 0.95, "top_k": 50, "repetition_penalty": 1.1, "no_repeat_ngram_size": 64, "max_length": 8192, "pad_token_id": tokenizer.bos_token_id, "eos_token_id": tokenizer.eos_token_id}
outputs = model.generate(inputs['input_ids'].to(model.device), attention_mask=inputs['attention_mask'].to(model.device), do_sample=True, **kwargs)
from transformers import LlamaTokenizer, LlamaForCausalLM
tokenizer = LlamaTokenizer.from_pretrained("yulan-team/YuLan-Chat-2-13b-fp16")
model = LlamaForCausalLM.from_pretrained("yulan-team/YuLan-Chat-2-13b-fp16", load_in_8bit=True).cuda()
model = model.eval()
input_text = "hello"
prompt = "The following is a conversation between a human and an AI assistant namely YuLan, developed by GSAI, Renmin University of China. The AI assistant gives helpful, detailed, and polite answers to the user's questions.\n[|Human|]:{}\n[|AI|]:".format(input_text)
inputs = tokenizer(prompt, return_tensors='pt', padding="longest", max_length=8192, truncation=True, return_attention_mask=True, add_special_tokens=True)
kwargs = {'temperature': 0.8, 'top_p': 0.95, "top_k": 50, "repetition_penalty": 1.1, "no_repeat_ngram_size": 64, "max_length": 8192, "pad_token_id": tokenizer.bos_token_id, "eos_token_id": tokenizer.eos_token_id}
outputs = model.generate(inputs['input_ids'].to(model.device), attention_mask=inputs['attention_mask'].to(model.device), do_sample=True, **kwargs)
RuntimeError Traceback (most recent call last)
in <cell line: 9>()
7 inputs = tokenizer(prompt, return_tensors='pt', padding="longest", max_length=8192, truncation=True, return_attention_mask=True, add_special_tokens=True)
8 kwargs = {'temperature': 0.8, 'top_p': 0.95, "top_k": 50, "repetition_penalty": 1.1, "no_repeat_ngram_size": 64, "max_length": 8192, "pad_token_id": tokenizer.bos_token_id, "eos_token_id": tokenizer.eos_token_id}
----> 9 outputs = model.generate(inputs['input_ids'].to(model.device), attention_mask=inputs['attention_mask'].to(model.device), do_sample=True, **kwargs)
10 print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[len(prompt):])
2 frames
/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py in decorate_context(*args, **kwargs)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)
116
117 return decorate_context
/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py in generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, **kwargs)
1586
1587 # 13. run sample
-> 1588 return self.sample(
1589 input_ids,
1590 logits_processor=logits_processor,
/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py in sample(self, input_ids, logits_processor, stopping_criteria, logits_warper, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)
2676 # sample
2677 probs = nn.functional.softmax(next_token_scores, dim=-1)
-> 2678 next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
2679
2680 # finished sentences should have their next token be a padding token
RuntimeError: probability tensor contains either
inf
,nan
or element < 0The text was updated successfully, but these errors were encountered: