Skip to content


add a demo script
Browse files Browse the repository at this point in the history
  • Loading branch information
szq0214 committed Aug 11, 2017
1 parent 70302a2 commit 152ff2b
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 7 deletions.
126 changes: 126 additions & 0 deletions
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import as cm
# %matplotlib inline

plt.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['image.interpolation'] = 'nearest'

# Make sure that caffe is on the python path:
import os
import sys

import caffe

# gpu:
# caffe.set_device(0)
# caffe.set_mode_gpu()

# cpu:

from google.protobuf import text_format
from caffe.proto import caffe_pb2

# load PASCAL VOC labels
labelmap_file = 'data/VOC0712/labelmap_voc.prototxt'
file = open(labelmap_file, 'r')
labelmap = caffe_pb2.LabelMap()
text_format.Merge(str(, labelmap)

def get_labelname(labelmap, labels):
num_labels = len(labelmap.item)
labelnames = []
if type(labels) is not list:
labels = [labels]
for label in labels:
found = False
for i in xrange(0, num_labels):
if label == labelmap.item[i].label:
found = True
assert found == True
return labelnames

#Load the net in the test phase for inference, and configure input preprocessing.
model_def = 'examples/07+12+coco/deploy.prototxt'
model_weights = 'examples/07+12+coco/DSOD300_VOC0712+coco.caffemodel'

net = caffe.Net(model_def, # defines the structure of the model
model_weights, # contains the trained weights
caffe.TEST) # use test mode (e.g., don't perform dropout)

# input preprocessing: 'data' is the name of the input blob == net.inputs[0]
transformer ={'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
transformer.set_mean('data', np.array([104,117,123])) # mean pixel
transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB

# DSOD detection

# set net to batch size of 1
image_resize = 300

# set colors
colors =, 1, 21)).tolist()

#Load an image.
img = "examples/images/cat.jpg"
image =

#Run the net and examine the top_k results

transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image

# Forward pass.
detections = net.forward()['detection_out']

# Parse the outputs.
det_label = detections[0,0,:,1]
det_conf = detections[0,0,:,2]
det_xmin = detections[0,0,:,3]
det_ymin = detections[0,0,:,4]
det_xmax = detections[0,0,:,5]
det_ymax = detections[0,0,:,6]

# Get detections with confidence higher than 0.6.
top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6]

top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist()
top_labels = get_labelname(labelmap, top_label_indices)
top_xmin = det_xmin[top_indices]
top_ymin = det_ymin[top_indices]
top_xmax = det_xmax[top_indices]
top_ymax = det_ymax[top_indices]

#Plot the boxes
currentAxis = plt.gca()

for i in xrange(top_conf.shape[0]):
xmin = int(round(top_xmin[i] * image.shape[1]))
ymin = int(round(top_ymin[i] * image.shape[0]))
xmax = int(round(top_xmax[i] * image.shape[1]))
ymax = int(round(top_ymax[i] * image.shape[0]))
score = top_conf[i]
label = int(top_label_indices[i])
label_name = top_labels[i]
display_txt = '%s: %.2f'%(label_name, score)
coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1
color = colors[label]
currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=6))
currentAxis.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5}, fontproperties=fm.FontProperties(size=22))


14 changes: 7 additions & 7 deletions
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ The code is based on the SSD framework (

If you use these models or find this helps your research, please cite:

title = {DSOD: Learning Deeply Supervised Object Detectors from Scratch},
author = {Shen, Zhiqiang and Liu, Zhuang and Li, Jianguo and Jiang, Yu-Gang and Chen, Yurong and Xue, Xiangyang},
Expand Down Expand Up @@ -58,14 +57,12 @@ COCO test-dev 2015 result (COCO has more object categories than VOC dataset, so
| DSOD300 (COCO trainval) | 29.3 | [Download (87.2M)]( |

## Preparation

0. Install SSD ( following the instructions there, including: (1) Install SSD caffe; (2) Download PASCAL VOC 2007 and 2012 datasets; and (3) Create LMDB file. Make sure you can run it without any errors.
1. Create a subfolder `dsod` under `example/`, add files ``, ``, `` and `` to the folder `example/dsod/`.
1. Create a subfolder `dsod` under `example/`, add files ``, ``, ``, `` and `` to the folder `example/dsod/`.
2. Replace the file `` in the folder `python/caffe/` with ours.

## Training & Testing

- Train a DSOD model on VOC 07+12:
Expand All @@ -86,12 +83,16 @@ COCO test-dev 2015 result (COCO has more object categories than VOC dataset, so
python examples/dsod/

- Evaluate the model:

python examples/dsod/

- Run a demo:

python examples/dsod/

**Note**: You can modify the file `` to design your own network structure as you like.
Expand All @@ -109,4 +110,3 @@ Zhiqiang Shen (zhiqiangshen13 at
Zhuang Liu (liuzhuangthu at

Any comments or suggestions are welcome!

0 comments on commit 152ff2b

Please sign in to comment.