-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyolo4_AI_Thread.py
263 lines (239 loc) · 12.1 KB
/
yolo4_AI_Thread.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#import argparse
#import os
#import glob
import random
import darknet
import time
import cv2
import numpy as np
from imutils.video import FPS
'''
conda activate pycoral
python AI.py -y4AI -d 1 -cam 6onvif.txt -rtsp 19cams.rtsp
On my i9-12900K with GTX 3070 GPU running yolov4-608.cfg model
I get ~34 fps per second on 25 cameras.
Performace is great, so far no false positive detections and seems
to have greater detection sensitivity, espcially at night.
I do get segfault crashes from a darknet function
that I've not had any luck tracking down so far. It may be a GPU
memory issue as it seems to correlate with other code using the GPU/display.
At this point it is so frustrating that I may shitcan the Darknet yolo4.
Especially since the Ultralytics yolo8 works so well.
'''
global __Thread__
__Thread__ = False
global network
global class_names
global class_colors
'''
one time code to run when thread is launched.
'''
def threadInit():
'''
eventually, should pass in PREPROCESS_DIMS and choose
either the 608, 512, or 416 yolo4 model, just use 608 for now
but smaller should be better for weaker machines, but bigger seems better for accuracy.
'''
global network
global class_names
global class_colors
random.seed(33) # deterministic bbox colors
network, class_names, class_colors = darknet.load_network(
"./yolo4/yolov4-608.cfg",
"./yolo4/coco.data",
"./yolo4/yolov4.weights",
batch_size=1
)
'''
AI model dependent function to do the inference.
This is the function you need to write to add a new AI model option to the sysem,
along with changes in AI.py to add command line options for it and start and stop the thread function.
Search for #$$$# string to show where changes need to be made in analogy with the TPU thread, which is the
most straightforward to start and stop.
called as:
image, personDetected, boxpoints, detectConfidence = do_inference( image, model, PREPROCESS_DIMS, confidence, blobThreshold )
'''
def do_inference( input_image, model, PREPROCESS_DIMS, confidence, blobThreshold ):
global network
global class_names
global class_colors
boxPoints=(0,0, 0,0, 0,0, 0,0) # (startX, startY, endX, endY, Xcenter, Ycenter, Xlength, Ylength)
personDetected = False
detectConfidence = 0.0
# code to do an inference
#%%%#print('DN', end='', flush=True)
width = darknet.network_width(network)
height = darknet.network_height(network)
darknet_image = darknet.make_image(width, height, 3)
image_rgb = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
image_resized = cv2.resize(image_rgb, (width, height), interpolation=cv2.INTER_LINEAR)
darknet.copy_image_from_bytes(darknet_image, image_resized.tobytes())
#%%%#print('*DD', end='', flush=True) # debug trace to see where segfault happens
detections = darknet.detect_image(network, class_names, darknet_image, thresh=confidence-0.001)
#%%%#print('*F', end='', flush=True)
darknet.free_image(darknet_image)
#%%%#print('*DB', end='', flush=True)
'''
Note that I modified darknet.draw_boxes() to return the number of persons detected,
and their cv2 style boxPoints in input_image coordinates. I let darknet draw all the objects detected
mostly for grins at the moment, but I only return the person detection boxPoints.
Only the first (highest confidence) person detection is used, but having multiple persons
might end up being useful eventually. My SSD code stops with the first above threshold detection.
Could do it here too, but I'm exploring yolo as much as trying to actually use it.
'''
image, personDetected, boxpoints, detectConfidence = darknet.draw_boxes(detections, image_rgb, class_colors, network)
#%%%#print('!', end='', flush=True)
image=cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
if personDetected:
## dump some data to help me figure out what is what.
##darknet.print_detections(detections, coordinates=True)
##print(boxpoints)
(H,W)=image.shape[:2]
### this doesn't seem right:
###detectConfidence = float(detections[-1][1])/100 # sorted highest confidence last!
### modified darknet.draw_boxes() to return person detection confidence as float instead of string
if blobThreshold >= 0: # <0 is signal that we don't want boxpoints or image annotations on zoomed verification image
startX, startY, endX, endY = boxpoints[-1]
xlen=endX-startX
ylen=endY-startY
xcen=int((startX+endX)/2)
ycen=int((startY+endY)/2)
boxPoints=(startX,startY, endX,endY, xcen,ycen, xlen,ylen)
### dump some data to help me figure out what is what.
###darknet.print_detections(detections, coordinates=True)
###print(boxPoints)
if float(xlen*ylen)/(W*H) > blobThreshold: # detection filling too much of the frame is bogus
personDetected = False
#%%%#print('#', end='', flush=True)
return image.copy(), personDetected, boxPoints, detectConfidence
'''
This should be pure "boilerplate" with no or minimal changes necessary
'''
def AI_thread(results, inframe, model, tnum, cameraLock, nextCamera, Ncameras,
PREPROCESS_DIMS, confidence, verifyConf, dnnStr, blobThreshold, yoloQ):
global __Thread__
waits=0
dcnt=0
ncnt=0
ecnt=0
fcnt=0
detect=0
noDetect=0
yolo_verify_fail=0
if tnum > 0:
aiStr = dnnStr + str(tnum)
else:
aiStr = dnnStr
threadInit()
print(aiStr + " AI thread is running...")
if yoloQ is not None:
print(" " + aiStr + " yolo4 AI thread doesn't use yolo queue! Ignoring.")
yoloQ = None
__Thread__ = True
cfps = FPS().start()
while __Thread__ is True:
cameraLock.acquire()
cq=nextCamera
nextCamera = (nextCamera+1)%Ncameras
cameraLock.release()
# get a frame
try:
(image, cam, imageDT) = inframe[cq].get(True,0.100)
except:
image = None
waits+=1
continue
if image is None:
continue
personDetected = False
# image is straignt from the camera, we draw boxes and labels on it later
(H,W)=image.shape[:2]
# orig_image is a copy of the image and is never drawn on, can be passed in the output queue if you don't want annotations.
orig_image=image.copy() # for zoomed in yolo verification
# run the inference
#%%%#print(str(cam), end='', flush=True) # debug trace to see where segfault happens
yoloDetect=False
img, personDetected, boxPoints, detectConfidence = do_inference( image.copy(), model, PREPROCESS_DIMS, confidence, blobThreshold )
image=img.copy()
#%%%#print('r', end='', flush=True) ## Crashing in this function
fcnt+=1
cfps.update() # update the FPS counter
# Next zoom in and repeat inference to verify detection
## removing this puts too much load on the much slower yolo thread,
## as this verification rejects a lot of plants as people detection.
if personDetected: # always verify now.
try: # repeat the inference zoomed in on the person detected
personDetected = False
yoloDetect=True
## removing this box expansion really hurt the verification sensitivity
startX, startY, endX, endY, xcen, ycen, xlen, ylen = boxPoints
label = "{:.1f}% C:{},{} W:{} H:{} UL:{},{} LR:{},{} {}".format(detectConfidence * 100,
str(xcen), str(ycen), str(xlen), str(ylen), str(startX), str(startY), str(endX), str(endY), aiStr)
cv2.putText(image, label, (2, (H-5)-28), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2, cv2.LINE_AA)
# zoom in on detection box and run second inference for verification.
blen=max(xlen,ylen)
if blen < PREPROCESS_DIMS[0]:
blen = PREPROCESS_DIMS[0] # expand crop pixels so resize always makes smaller image
adj=int(1.2*blen/2) # enlarge detection box and make crop be square about box center
CstartX=max(xcen-adj,0)
CendX=min(xcen+adj,W-1)
CstartY=max(ycen-adj,0)
CendY=min(ycen+adj,H-1)
zimg = orig_image[CstartY:CendY, CstartX:CendX]
except Exception as e:
print("Yolo4 crop region Exception: " + str(e))
##print(" Yolo4 crop region ERROR: {}:{} {}:{} Cam:{}".format( str(startY), str(endY), str(startX), str(endX), str(cam) ) )
continue
# run inference on the zoomed in image, the minus one for blobThreshold signals don't want boxpoints or image annotations.
#%%%#print('V', end='', flush=True) # debug trace to see where segfault happens
# pass to yolo v4 for verification, or pass as zoomed image for alerts
zzimg, personDetected, _, detectConfidence = do_inference( zimg.copy(), model, PREPROCESS_DIMS, verifyConf, -1.0 )
cfps.update() # update the FPS counter
""" #%%%#
if personDetected:
print('d', end='', flush=True) # debug trace to see where segfault happens
else:
print('n', end='', flush=True) # debug trace to see where segfault happens
"""
if personDetected:
text = "Verify: {:.1f}%".format(detectConfidence * 100) # show verification confidence on detection image
cv2.putText(image, text, (2, 28), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)
# Queue results
try:
if personDetected:
detect+=1
if results.full():
[_,_,_,_,_,_,_]=results.get(False) # remove oldest result
dcnt+=1
results.put((image.copy(), cam, personDetected, imageDT, aiStr, boxPoints, zzimg.copy() ), True, 1.0) # yolo_frame is person_crop here
else:
noDetect+=1
if results.full():
[_,_,_,_,_,_,_]=results.get(False) # remove oldest result
ncnt+=1
if yoloDetect:
yolo_verify_fail+=1
results.put((image.copy(), cam, False, imageDT, aiStr, (-1,0, 0,0, 0,0, 0,0), zzimg.copy() ), True, 1.00) # -1 boxpoints flags zoom verification failed
else:
results.put((image.copy(), cam, False, imageDT, aiStr, (0,0, 0,0, 0,0, 0,0), None ), True, 0.200) # 0 boxpoints flag no initial detection
""" #%%%#
if (noDetect + detect) % Ncameras:
print('|', end='', flush=True) # debug trace to see where segfault happens
else:
print('|', end='\n', flush=True) # debug trace to see where segfault happens
"""
except:
# presumably outptut queue was full, main thread too slow.
if personDetected:
print("Person detection dropped! Cam" + str(cam) + imageDT.strftime("%Y-%m-%d_%H:%M:%S.%f"), flush=True)
##else: # verified that quickly dropping no detection frames helps prevent dropping detecting frames, but otherwise not helpful
## print("Dropped a no detection frame." + imageDT.strftime("%Y-%m-%d_%H:%M:%S.%f"), flush=True)
ecnt+=1
continue
# Thread exits
cfps.stop() # stop the FPS counter timer
print(aiStr + " thread" + str(tnum) + ", waited: " + str(waits) + " dropped: " + str(dcnt+ncnt+ecnt) + " out of "
+ str(fcnt) + " images. AI: {:.2f} inferences/sec".format(cfps.fps()))
print(" " + aiStr + " " + str(detect) + " Persons Detected. " + str(noDetect) + " frames with no person.")
print(" " + aiStr + " " + str(yolo_verify_fail) + " detections failed zoom-in verification.")
print(" " + aiStr + " Detections dropped: " + str(dcnt) + " results dropped: " + str(ncnt) + " results.put() exceptions: " + str(ecnt))