-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
images_analyse(most used colors), popular_words in texts, and script …
…to analyse the sentiments of customers based on reviews are added
- Loading branch information
1 parent
fdc360c
commit 4b3340e
Showing
13 changed files
with
1,281 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
imgs/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"cells": [], | ||
"metadata": {}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 17, | ||
"id": "e08d4cb9", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Analyse has been started\n", | ||
" picture imgs\\zb1.jpg is done\n", | ||
" picture imgs\\zb10.jpg is done\n", | ||
" picture imgs\\zb3.jpg is done\n", | ||
" picture imgs\\zb11.jpg is done\n", | ||
" picture imgs\\zb2.jpg is done\n", | ||
" picture imgs\\zb5.jpg is done\n", | ||
" picture imgs\\zb4.jpg is done\n", | ||
" picture imgs\\zb7.jpg is done\n", | ||
" picture imgs\\zb8.jpg is done\n", | ||
" picture imgs\\zb9.jpg is done\n", | ||
"{'brown': 1, 'black': 11, 'gray': 8, 'silver': 4, 'indigo': 1, 'tan': 3, 'salmon': 1, 'peach': 1}\n", | ||
"Analyse has been ended\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import extcolors\n", | ||
"import os\n", | ||
"import concurrent.futures\n", | ||
"import math\n", | ||
"from collections import Counter\n", | ||
"\n", | ||
"COLORS = {\n", | ||
" \"red\": ((255, 0, 0), (255, 99, 71)),\n", | ||
" \"orange\": ((255, 165, 0), (255, 140, 0)),\n", | ||
" \"yellow\": ((255, 255, 0), (255, 215, 0)),\n", | ||
" \"green\": ((0, 128, 0), (0, 255, 0)),\n", | ||
" \"blue\": ((0, 0, 255), (0, 0, 128)),\n", | ||
" \"purple\": ((128, 0, 128), (218, 112, 214)),\n", | ||
" \"pink\": ((255, 192, 203), (255, 105, 180)),\n", | ||
" \"brown\": ((165, 42, 42), (139, 69, 19)),\n", | ||
" \"black\": ((0, 0, 0), (25, 25, 25)),\n", | ||
" \"white\": ((255, 255, 255), (245, 245, 245)),\n", | ||
" \"gray\": ((128, 128, 128), (169, 169, 169)),\n", | ||
" \"navy\": ((0, 0, 128), (0, 0, 139)),\n", | ||
" \"olive\": ((128, 128, 0), (107, 142, 35)),\n", | ||
" \"silver\": ((192, 192, 192), (211, 211, 211)),\n", | ||
" \"gold\": ((255, 215, 0), (255, 165, 0)),\n", | ||
" \"sky blue\": ((135, 206, 235), (135, 206, 250)),\n", | ||
" \"turquoise\": ((64, 224, 208), (0, 245, 255)),\n", | ||
" \"indigo\": ((75, 0, 130), (138, 43, 226)),\n", | ||
" \"violet\": ((238, 130, 238), (148, 0, 211)),\n", | ||
" \"beige\": ((245, 245, 220), (245, 245, 220)),\n", | ||
" \"magenta\": ((255, 0, 255), (199, 21, 133)),\n", | ||
" \"orchid\": ((218, 112, 214), (255, 131, 250)),\n", | ||
" \"peach\": ((255, 218, 185), (255, 229, 180)),\n", | ||
" \"salmon\": ((250, 128, 114), (255, 99, 71)),\n", | ||
" \"tan\": ((210, 180, 140), (210, 180, 140))\n", | ||
"}\n", | ||
"\n", | ||
"def determine_closest_color(rgb):\n", | ||
" \"\"\"\n", | ||
" Given an RGB tuple, returns the name of the closest color from the COLORS dictionary.\n", | ||
" \"\"\"\n", | ||
" min_distance = math.inf\n", | ||
" closest_color = \"unknown\"\n", | ||
" for color, (min_rgb, max_rgb) in COLORS.items():\n", | ||
" r_distance = rgb[0] - (min_rgb[0] + max_rgb[0]) / 2\n", | ||
" g_distance = rgb[1] - (min_rgb[1] + max_rgb[1]) / 2\n", | ||
" b_distance = rgb[2] - (min_rgb[2] + max_rgb[2]) / 2\n", | ||
" distance = math.sqrt(r_distance ** 2 + g_distance ** 2 + b_distance ** 2)\n", | ||
" if distance < min_distance:\n", | ||
" min_distance = distance\n", | ||
" closest_color = color\n", | ||
" return closest_color\n", | ||
"\n", | ||
"def most_common_colors(img_file):\n", | ||
" \"\"\"\n", | ||
" Given the name of an image file, returns a list of the three most common colors in the image.\n", | ||
" \"\"\"\n", | ||
" img_file = os.path.join(\"imgs\", img_file)\n", | ||
" colors = extcolors.extract_from_path(img_file, tolerance=12, limit=12)[0]\n", | ||
" color_counts = Counter(dict(colors)).most_common(4)\n", | ||
" print(f\" picture {img_file} is done\")\n", | ||
" del color_counts[0]\n", | ||
" return [color[0] for color in color_counts]\n", | ||
"\n", | ||
"def main():\n", | ||
" \"\"\"\n", | ||
" Main function that processes all the images in the \"imgs\" directory and prints the count of the closest color for each image.\n", | ||
" \"\"\"\n", | ||
" img_files = os.listdir(\"imgs\")\n", | ||
" colors_dict = {}\n", | ||
" with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:\n", | ||
" futures = [executor.submit(most_common_colors, img_file) for img_file in img_files]\n", | ||
" list_colors = [color for future in concurrent.futures.as_completed(futures) for color in future.result()]\n", | ||
" for rgb in list_colors:\n", | ||
" color = determine_closest_color(rgb)\n", | ||
" if color in colors_dict:\n", | ||
" colors_dict[color] += 1\n", | ||
" else:\n", | ||
" colors_dict[color] = 1\n", | ||
" print(colors_dict)\n", | ||
"\n", | ||
"if __name__ == '__main__':\n", | ||
" print(\"Analyse has been started\")\n", | ||
" main()\n", | ||
" print(\"Analysis has ended\")\n", | ||
"\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "2be7b637", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.9" | ||
}, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "3c06e3e46abf38078fe4dac36a0085ec2b134ebbd73dd076183d243eeca6918f" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"id": "bb08745f", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"defaultdict(<class 'int'>, {'Goede': 8, 'functionele': 1, 'jas': 110, 'Top': 12, 'Gaaf': 1, 'jasje': 1, 'Al': 1, 'met': 4, 'al': 3, 'de': 20, 'perfecte': 3, 'vest': 1, 'Prima': 21, 'Jas': 8, 'niet': 4, 'passend': 1, 'helaas': 1, 'is': 6, 'te': 6, 'kort': 1, 'voor': 23, 'maat': 6, 'Fijne': 12, 'tussenjas': 1, 'Oke': 1, 'Lekker': 5, 'jack': 3, 'om': 2, 'snel': 1, 'even': 1, 'bij': 1, 'je': 2, 'steken': 1, 'Mooie': 7, 'Hele': 6, 'Superfijne': 2, 'softshell': 2, 'sunflower': 1, 'Net': 1, 'zo': 2, 'goed': 10, 'als': 4, 'concurrenten': 1, 'die': 5, 'x': 1, 'duur': 1, 'zijn': 1, 'Nice': 1, 'Niet': 1, 'geheel': 1, 'winddicht': 3, 'beschermt': 1, 'tegen': 1, 'wind': 3, 'Super': 14, 'fijn': 2, 'Prachtige': 2, 'een': 17, 'mooie': 8, 'prijs': 12, 'Perfecte': 7, 'deze': 2, 'koude': 3, 'dagen': 5, 'Heerlijke': 10, 'verwarment': 1, 'Geweldig': 5, 'winterjas': 13, 'parka': 5, 'Perfect': 5, 'en': 22, 'lekker': 5, 'warm': 13, 'O': 1, 'Gewedlige': 1, 'warme': 23, 'geweldige': 3, 'Echt': 3, 'top': 4, 'Ben': 1, 'er': 4, 'super': 2, 'blij': 1, 'mee': 2, 'Eindelijk': 1, 'Een': 8, 'fijne': 17, 'waterdichte': 4, 'goede': 3, 'Uitstekende': 1, 'kwaliteit': 4, 'mooi': 1, 'prijsje': 2, 'De': 2, 'het': 4, 'na': 2, 'jaar': 1, 'Gewoon': 1, 'echte': 3, 'Antarctisch': 1, 'proof': 1, 'prima': 3, 'Superwarm': 1, 'Gave': 1, 'uitstraling': 1, 'smilingfacewithclosedeyes': 1, 'Ook': 1, 'Geweldige': 4, 'waardeloos': 1, 'verpakt': 1, 'Ideale': 2, 'Zware': 1, 'Goed': 3, 'op': 4, 'scooter': 1, 'Ik': 3, 'gebruik': 2, 'paard': 1, 'werkt': 1, 'Heerlijk': 2, 'scheurt': 1, 'net': 1, 'boven': 2, 'borst': 1, 'funcioneel': 1, 'aankooptotaal': 1, 'geen': 4, 'spijt': 1, 'Past': 1, 'geld': 2, 'zit': 3, 'perfect': 4, 'van': 5, 'pre': 1, 'fantastiche': 1, 'maar': 5, 'n': 1, 'minpuntje': 1, 'heb': 3, 'elke': 3, 'dag': 3, 'plezier': 2, 'comfortabel': 2, 'regendicht': 1, 'Fantastisch': 2, 'Nooit': 1, 'meer': 3, 'Woolrich': 1, 'kou': 1, 'maakt': 1, 'schijn': 1, 'kans': 1, 'Meer': 1, 'dan': 1, 'voldoet': 1, 'aan': 2, 'alles': 1, 'winter': 3, 'Kwaliteit': 1, 'Vind': 1, 'comfortabele': 2, 'heerlijke': 2, 'Vlugge': 1, 'bezorging': 1, 'mag': 1, 'verwachte': 1, 'Zeer': 2, 'Ziet': 1, 'erg': 2, 'uit': 1, 'pasvorm': 2, 'geweldig': 1, 'juist': 1, 'heerlijk': 2, 'online': 1, 'besteldomgeruild': 1, 'aangezien': 1, 'eng': 1, 'uitvalt': 1, 'degelijke': 1, 'Warme': 3, 'Sportieve': 1, 'Lekkere': 2, 'laat': 1, 'komen': 1, 'bestel': 1, 'L': 1, 'krijg': 1, 'ik': 5, 'XL': 1, 'opgestuurd': 1, 'Maakt': 1, 'belofte': 1, 'waar': 1, 'zware': 1, 'Winterjas': 1, 'Houd': 1, 'me': 1, 'flinke': 1, 'kanttekeningen': 1, 'Great': 1, 'wintercoat': 1, 'echt': 2, 'buiten': 1, 'pa': 1, 'lichte': 2, 'Very': 1, 'good': 1, 'klein': 5, 'Waterdicht': 1, 'Warm': 1, 'Het': 2, 'artikel': 1, 'zoals': 1, 'aanbevolen': 1, 'wordt': 1, 'klopt': 1, 'Voir': 1, 'dat': 2, 'ontzettend': 1, 'lekkere': 1, 'verwachting': 3, 'weer': 1, 'waterdicht': 3, 'regenjas': 5, 'regenachtige': 1, 'prachtige': 1, 'zomer': 1, 'Superjas': 1, 'Voldoet': 1, 'had': 1, 'groene': 1, 'nu': 1, 'ook': 3, 'blauwe': 1, 'model': 1, 'qua': 1, 'uiterlijk': 1, 'Fantastische': 3, 'Boven': 1, 'past': 1, 'Tevreden': 1, 'Ruime': 1, 'fiets': 2, 'in': 5, 'hard': 1, 'lopen': 1, 'Erg': 2, 'waterafstotende': 1, 'Veel': 1, 'jammer': 1, 'Valt': 1, 'mega': 1, 'door': 1, 'luchtgaten': 1, 'Veeeel': 1, 'Winddicht': 2, 'cute': 2, 'hele': 2, 'draag': 2, 'heel': 4, 'stof': 2, 'snit': 2, 'beetje': 2, 'smal': 2, 'schouders': 2, 'dunne': 2, 'Blij': 1, 'Mooi': 1, 'Ja': 1, 'gewoon': 1, 'allemaal': 1, 'Eerste': 1, 'gekocht': 1, 'merk': 1, 'Quechua': 1, 'omdat': 1, 'spr': 1, 'Te': 1, 'zowel': 1, 'lente': 1, 'Heel': 1, 'veel': 1, 'gehad': 1, 'herfst': 1, 'prijskwaliteitsverhouding': 1, 'grootste': 1})\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import csv\n", | ||
"from collections import defaultdict\n", | ||
"import re\n", | ||
"\n", | ||
"# create a defaultdict to keep track of word counts\n", | ||
"word_count = defaultdict(int)\n", | ||
"\n", | ||
"# open the CSV file and read it using the csv module\n", | ||
"with open('data/reviews.csv', 'r') as file:\n", | ||
" reader = csv.DictReader(file)\n", | ||
" \n", | ||
" # iterate over each row in the file\n", | ||
" for row in reader:\n", | ||
" # extract the review title from the row\n", | ||
" title = row['title']\n", | ||
" \n", | ||
" # remove any non-letter characters, except spaces\n", | ||
" title = re.sub(r'[^a-zA-Z\\s]', '', title)\n", | ||
" \n", | ||
" # split the title into individual words\n", | ||
" words = title.split()\n", | ||
" \n", | ||
" # iterate over each word in the title and increment its count in the defaultdict\n", | ||
" for word in words:\n", | ||
" word_count[word] += 1\n", | ||
"\n", | ||
"# print the final word count dictionary\n", | ||
"print(word_count)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.9" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
[ | ||
"helaas", | ||
"fijn", | ||
"verdrietig", | ||
"teleurgesteld", | ||
"wanhopig", | ||
"ontroostbaar", | ||
"verslagen", | ||
"onvriendelijk", | ||
"afstandelijk", | ||
"onaangenaam", | ||
"bot", | ||
"onbeschoft", | ||
"lui", | ||
"onproductief", | ||
"traag", | ||
"slordig", | ||
"niet-gefocust", | ||
"ongeïnspireerd", | ||
"ongemotiveerd", | ||
"verveeld", | ||
"lusteloos", | ||
"onverschillig", | ||
"egoïstisch", | ||
"onverantwoordelijk", | ||
"onbetrouwbaar", | ||
"zelfzuchtig", | ||
"onethisch", | ||
"slecht", | ||
"kut", | ||
"fuck", | ||
"erg slecht", | ||
"waardeloos", | ||
"rampzalig", | ||
"verschrikkelijk", | ||
"afschuwelijk", | ||
"gruwelijk", | ||
"vreselijk", | ||
"bedroevend", | ||
"schandalig", | ||
"onzinnig", | ||
"belachelijk", | ||
"idioot", | ||
"dom", | ||
"stom", | ||
"onacceptabel", | ||
"onvergeeflijk", | ||
"weerzinwekkend", | ||
"verachtelijk", | ||
"walgelijk", | ||
"misselijkmakend", | ||
"ondraaglijk", | ||
"storend", | ||
"irritant", | ||
"vervelend", | ||
"frustrerend", | ||
"teleurstellend", | ||
"onzin", | ||
"nutteloos", | ||
"zonde van het geld", | ||
"verspilling van tijd en moeite", | ||
"bedrieglijk", | ||
"misleidend", | ||
"vals", | ||
"bedrieglijk", | ||
"nep", | ||
"stuk", | ||
"slechte", | ||
"Niet", | ||
"klein", | ||
"kort", | ||
"scheur", | ||
"flink", | ||
"dunne", | ||
"jammer" | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
|
||
[ | ||
"rood", | ||
"oranje", | ||
"geel", | ||
"groen", | ||
"blauw", | ||
"paars", | ||
"roze", | ||
"zwart", | ||
"wit", | ||
"grijs", | ||
"hond", | ||
"kat", | ||
"vis", | ||
"vogel", | ||
"paard", | ||
"koe", | ||
"schaap", | ||
"varken", | ||
"kikker", | ||
"spin", | ||
"appel", | ||
"banaan", | ||
"sinaasappel", | ||
"peer", | ||
"aardbei", | ||
"druif", | ||
"ananas", | ||
"wortel", | ||
"sla", | ||
"tomaat", | ||
"piano", | ||
"gitaar", | ||
"drumstel", | ||
"viool", | ||
"trompet", | ||
"saxofoon", | ||
"harp", | ||
"accordeon", | ||
"fluit", | ||
"xylofoon", | ||
"arts", | ||
"leraar", | ||
"verpleegkundige", | ||
"monteur", | ||
"kok", | ||
"receptionist", | ||
"advocaat", | ||
"bankier", | ||
"accountant", | ||
"architect" | ||
|
||
] |
Oops, something went wrong.