-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsuper_deduper.py
78 lines (64 loc) · 2.32 KB
/
super_deduper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""Creates an HTML file that can be used to deduplicate post-it notes.
"""
import collections
import os
import sys
from PIL import Image
import imagehash
HASH_SIZE = 6
def hexdistance(one, two):
total = 0
for a, b in zip(one, two):
if int(a, 16) != int(b, 16):
total += 1
# total += abs(int(a, 16) - int(b, 16))
return total
def most_frequent_color(image):
width, height = image.size
pixels = image.getcolors(width * height)
pixels.sort(reverse=True)
r, g, b = (int(i / (16.0 * 8)) * 8 for i in pixels[0][1])
result = '%01x%01x%01x' % (r, g, b)
return result
directory = sys.argv[1]
# group full file paths by board key
grouped = collections.defaultdict(list)
filename_list = [
os.path.join(directory, filename) for filename in os.listdir(directory)
]
base_color = '000'
for path in filename_list:
try:
image = Image.open(path)
except IOError:
continue
color = most_frequent_color(image)
image_hash = imagehash.dhash(image, hash_size=HASH_SIZE)
distance = hexdistance(color, base_color)
grouped[(distance, color, str(image_hash))].append(path)
print '<html>'
print '<head>'
print '<link rel="stylesheet" type="text/css" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css">'
print '<link rel="stylesheet" type="text/css" href="css/bunga.css">'
print '<script src="https://code.jquery.com/jquery-2.2.2.min.js"></script>'
print '<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>'
print '<script src="js/clipboard.js"></script>'
print '<script src="js/json2.js"></script>'
print '<script src="js/bunga.js"></script>'
print '</head>'
print '<body>'
for (distance, color, image_hash), path_list in sorted(grouped.iteritems()):
# print '<div class="row">'
# print '<div class="swatch" style="background:#%s"></div>' % color
# print '<h4 class="hash">%s %s</h4>' % (color, hash)
for path in path_list:
name = os.path.basename(path)
print '<div class="col-md-1 holder">'
print '<img class="post-it" src="%s" />' % path
print '<div class="checkbox">'
print '<label><input type="checkbox" value=""><span class="basename">%s</span></label>' % path
print '</div>'
print '</div>'
# print '</div>'
print '</body>'
print '</html>'