-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathliteclip.py
79 lines (58 loc) · 2.19 KB
/
liteclip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torchvision.transforms import InterpolationMode
from PIL import Image
from transformers import AutoTokenizer
from model import CLIP
from config import Config
class ZeroShotPipeline:
def __init__(self,):
self.config = Config
self.model = CLIP(self.config)
self.model.load_state_dict(torch.load(self.config.state_dict_path))
self.model.eval()
self.tokenizer = AutoTokenizer.from_pretrained(self.config.text_encoder)
self._img_tfms = T.Compose([
T.Resize(224,interpolation=InterpolationMode.BICUBIC),
T.CenterCrop(224),
T.ToTensor(),
T.Normalize(mean=(0.48145466, 0.4578275, 0.40821073),
std=(0.26862954, 0.26130258, 0.27577711))
])
@torch.no_grad()
def _prepare_image(self,image):
try:
img = Image.open(image).convert('RGB')
except Exception:
raise Exception('provide a valid path for the image')
img = self._img_tfms(img)
img = torch.unsqueeze(img, 0)
return img
@torch.no_grad()
def _prepare_text(self,labels):
text_inputs = self.tokenizer.batch_encode_plus(
labels,
padding='max_length',
truncation=True,
max_length=self.config.max_length,
return_tensors='pt'
)
return text_inputs
@torch.no_grad()
def predict(self,image:str, labels: list[str],top_k:int=5):
assert len(labels) >= 2, "provide atleast 2 labels"
if len(labels) < top_k:
top_k = len(labels)
img = self._prepare_image(image)
text = self._prepare_text(labels)
logits,_ = self.model((img,text))
print('logits',logits)
logits = torch.flatten(logits)
probabilities = torch.softmax(logits,dim=0)
values,indices = torch.topk(probabilities,k=top_k)
values = [v.item() for v in values]
indices = [i.item() for i in indices]
result = [(labels[i],v) for v,i in zip(values,indices)]
return result