images_analyse(most used colors), popular_words in texts, and script …

…to analyse the sentiments of customers based on reviews are added
codershiyar · Apr 13, 2023 · 4b3340e · 4b3340e
1 parent fdc360c
commit 4b3340e
Show file tree

Hide file tree

Showing 13 changed files with 1,281 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+imgs/*
diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/.ipynb_checkpoints/images_analyse-checkpoint.ipynb b/.ipynb_checkpoints/images_analyse-checkpoint.ipynb
@@ -0,0 +1,150 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "e08d4cb9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Analyse has been started\n",
+      " picture imgs\\zb1.jpg is done\n",
+      " picture imgs\\zb10.jpg is done\n",
+      " picture imgs\\zb3.jpg is done\n",
+      " picture imgs\\zb11.jpg is done\n",
+      " picture imgs\\zb2.jpg is done\n",
+      " picture imgs\\zb5.jpg is done\n",
+      " picture imgs\\zb4.jpg is done\n",
+      " picture imgs\\zb7.jpg is done\n",
+      " picture imgs\\zb8.jpg is done\n",
+      " picture imgs\\zb9.jpg is done\n",
+      "{'brown': 1, 'black': 11, 'gray': 8, 'silver': 4, 'indigo': 1, 'tan': 3, 'salmon': 1, 'peach': 1}\n",
+      "Analyse has been ended\n"
+     ]
+    }
+   ],
+   "source": [
+    "import extcolors\n",
+    "import os\n",
+    "import concurrent.futures\n",
+    "import math\n",
+    "from collections import Counter\n",
+    "\n",
+    "COLORS = {\n",
+    "    \"red\": ((255, 0, 0), (255, 99, 71)),\n",
+    "    \"orange\": ((255, 165, 0), (255, 140, 0)),\n",
+    "    \"yellow\": ((255, 255, 0), (255, 215, 0)),\n",
+    "    \"green\": ((0, 128, 0), (0, 255, 0)),\n",
+    "    \"blue\": ((0, 0, 255), (0, 0, 128)),\n",
+    "    \"purple\": ((128, 0, 128), (218, 112, 214)),\n",
+    "    \"pink\": ((255, 192, 203), (255, 105, 180)),\n",
+    "    \"brown\": ((165, 42, 42), (139, 69, 19)),\n",
+    "    \"black\": ((0, 0, 0), (25, 25, 25)),\n",
+    "    \"white\": ((255, 255, 255), (245, 245, 245)),\n",
+    "    \"gray\": ((128, 128, 128), (169, 169, 169)),\n",
+    "    \"navy\": ((0, 0, 128), (0, 0, 139)),\n",
+    "    \"olive\": ((128, 128, 0), (107, 142, 35)),\n",
+    "    \"silver\": ((192, 192, 192), (211, 211, 211)),\n",
+    "    \"gold\": ((255, 215, 0), (255, 165, 0)),\n",
+    "    \"sky blue\": ((135, 206, 235), (135, 206, 250)),\n",
+    "    \"turquoise\": ((64, 224, 208), (0, 245, 255)),\n",
+    "    \"indigo\": ((75, 0, 130), (138, 43, 226)),\n",
+    "    \"violet\": ((238, 130, 238), (148, 0, 211)),\n",
+    "    \"beige\": ((245, 245, 220), (245, 245, 220)),\n",
+    "    \"magenta\": ((255, 0, 255), (199, 21, 133)),\n",
+    "    \"orchid\": ((218, 112, 214), (255, 131, 250)),\n",
+    "    \"peach\": ((255, 218, 185), (255, 229, 180)),\n",
+    "    \"salmon\": ((250, 128, 114), (255, 99, 71)),\n",
+    "    \"tan\": ((210, 180, 140), (210, 180, 140))\n",
+    "}\n",
+    "\n",
+    "def determine_closest_color(rgb):\n",
+    "    \"\"\"\n",
+    "    Given an RGB tuple, returns the name of the closest color from the COLORS dictionary.\n",
+    "    \"\"\"\n",
+    "    min_distance = math.inf\n",
+    "    closest_color = \"unknown\"\n",
+    "    for color, (min_rgb, max_rgb) in COLORS.items():\n",
+    "        r_distance = rgb[0] - (min_rgb[0] + max_rgb[0]) / 2\n",
+    "        g_distance = rgb[1] - (min_rgb[1] + max_rgb[1]) / 2\n",
+    "        b_distance = rgb[2] - (min_rgb[2] + max_rgb[2]) / 2\n",
+    "        distance = math.sqrt(r_distance ** 2 + g_distance ** 2 + b_distance ** 2)\n",
+    "        if distance < min_distance:\n",
+    "            min_distance = distance\n",
+    "            closest_color = color\n",
+    "    return closest_color\n",
+    "\n",
+    "def most_common_colors(img_file):\n",
+    "    \"\"\"\n",
+    "    Given the name of an image file, returns a list of the three most common colors in the image.\n",
+    "    \"\"\"\n",
+    "    img_file = os.path.join(\"imgs\", img_file)\n",
+    "    colors = extcolors.extract_from_path(img_file, tolerance=12, limit=12)[0]\n",
+    "    color_counts = Counter(dict(colors)).most_common(4)\n",
+    "    print(f\" picture {img_file} is done\")\n",
+    "    del color_counts[0]\n",
+    "    return [color[0] for color in color_counts]\n",
+    "\n",
+    "def main():\n",
+    "    \"\"\"\n",
+    "    Main function that processes all the images in the \"imgs\" directory and prints the count of the closest color for each image.\n",
+    "    \"\"\"\n",
+    "    img_files = os.listdir(\"imgs\")\n",
+    "    colors_dict = {}\n",
+    "    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:\n",
+    "        futures = [executor.submit(most_common_colors, img_file) for img_file in img_files]\n",
+    "        list_colors = [color for future in concurrent.futures.as_completed(futures) for color in future.result()]\n",
+    "    for rgb in list_colors:\n",
+    "        color = determine_closest_color(rgb)\n",
+    "        if color in colors_dict:\n",
+    "            colors_dict[color] += 1\n",
+    "        else:\n",
+    "            colors_dict[color] = 1\n",
+    "    print(colors_dict)\n",
+    "\n",
+    "if __name__ == '__main__':\n",
+    "    print(\"Analyse has been started\")\n",
+    "    main()\n",
+    "    print(\"Analysis has ended\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2be7b637",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "3c06e3e46abf38078fe4dac36a0085ec2b134ebbd73dd076183d243eeca6918f"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/.ipynb_checkpoints/popular_words-checkpoint.ipynb b/.ipynb_checkpoints/popular_words-checkpoint.ipynb
@@ -0,0 +1,70 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "bb08745f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "defaultdict(<class 'int'>, {'Goede': 8, 'functionele': 1, 'jas': 110, 'Top': 12, 'Gaaf': 1, 'jasje': 1, 'Al': 1, 'met': 4, 'al': 3, 'de': 20, 'perfecte': 3, 'vest': 1, 'Prima': 21, 'Jas': 8, 'niet': 4, 'passend': 1, 'helaas': 1, 'is': 6, 'te': 6, 'kort': 1, 'voor': 23, 'maat': 6, 'Fijne': 12, 'tussenjas': 1, 'Oke': 1, 'Lekker': 5, 'jack': 3, 'om': 2, 'snel': 1, 'even': 1, 'bij': 1, 'je': 2, 'steken': 1, 'Mooie': 7, 'Hele': 6, 'Superfijne': 2, 'softshell': 2, 'sunflower': 1, 'Net': 1, 'zo': 2, 'goed': 10, 'als': 4, 'concurrenten': 1, 'die': 5, 'x': 1, 'duur': 1, 'zijn': 1, 'Nice': 1, 'Niet': 1, 'geheel': 1, 'winddicht': 3, 'beschermt': 1, 'tegen': 1, 'wind': 3, 'Super': 14, 'fijn': 2, 'Prachtige': 2, 'een': 17, 'mooie': 8, 'prijs': 12, 'Perfecte': 7, 'deze': 2, 'koude': 3, 'dagen': 5, 'Heerlijke': 10, 'verwarment': 1, 'Geweldig': 5, 'winterjas': 13, 'parka': 5, 'Perfect': 5, 'en': 22, 'lekker': 5, 'warm': 13, 'O': 1, 'Gewedlige': 1, 'warme': 23, 'geweldige': 3, 'Echt': 3, 'top': 4, 'Ben': 1, 'er': 4, 'super': 2, 'blij': 1, 'mee': 2, 'Eindelijk': 1, 'Een': 8, 'fijne': 17, 'waterdichte': 4, 'goede': 3, 'Uitstekende': 1, 'kwaliteit': 4, 'mooi': 1, 'prijsje': 2, 'De': 2, 'het': 4, 'na': 2, 'jaar': 1, 'Gewoon': 1, 'echte': 3, 'Antarctisch': 1, 'proof': 1, 'prima': 3, 'Superwarm': 1, 'Gave': 1, 'uitstraling': 1, 'smilingfacewithclosedeyes': 1, 'Ook': 1, 'Geweldige': 4, 'waardeloos': 1, 'verpakt': 1, 'Ideale': 2, 'Zware': 1, 'Goed': 3, 'op': 4, 'scooter': 1, 'Ik': 3, 'gebruik': 2, 'paard': 1, 'werkt': 1, 'Heerlijk': 2, 'scheurt': 1, 'net': 1, 'boven': 2, 'borst': 1, 'funcioneel': 1, 'aankooptotaal': 1, 'geen': 4, 'spijt': 1, 'Past': 1, 'geld': 2, 'zit': 3, 'perfect': 4, 'van': 5, 'pre': 1, 'fantastiche': 1, 'maar': 5, 'n': 1, 'minpuntje': 1, 'heb': 3, 'elke': 3, 'dag': 3, 'plezier': 2, 'comfortabel': 2, 'regendicht': 1, 'Fantastisch': 2, 'Nooit': 1, 'meer': 3, 'Woolrich': 1, 'kou': 1, 'maakt': 1, 'schijn': 1, 'kans': 1, 'Meer': 1, 'dan': 1, 'voldoet': 1, 'aan': 2, 'alles': 1, 'winter': 3, 'Kwaliteit': 1, 'Vind': 1, 'comfortabele': 2, 'heerlijke': 2, 'Vlugge': 1, 'bezorging': 1, 'mag': 1, 'verwachte': 1, 'Zeer': 2, 'Ziet': 1, 'erg': 2, 'uit': 1, 'pasvorm': 2, 'geweldig': 1, 'juist': 1, 'heerlijk': 2, 'online': 1, 'besteldomgeruild': 1, 'aangezien': 1, 'eng': 1, 'uitvalt': 1, 'degelijke': 1, 'Warme': 3, 'Sportieve': 1, 'Lekkere': 2, 'laat': 1, 'komen': 1, 'bestel': 1, 'L': 1, 'krijg': 1, 'ik': 5, 'XL': 1, 'opgestuurd': 1, 'Maakt': 1, 'belofte': 1, 'waar': 1, 'zware': 1, 'Winterjas': 1, 'Houd': 1, 'me': 1, 'flinke': 1, 'kanttekeningen': 1, 'Great': 1, 'wintercoat': 1, 'echt': 2, 'buiten': 1, 'pa': 1, 'lichte': 2, 'Very': 1, 'good': 1, 'klein': 5, 'Waterdicht': 1, 'Warm': 1, 'Het': 2, 'artikel': 1, 'zoals': 1, 'aanbevolen': 1, 'wordt': 1, 'klopt': 1, 'Voir': 1, 'dat': 2, 'ontzettend': 1, 'lekkere': 1, 'verwachting': 3, 'weer': 1, 'waterdicht': 3, 'regenjas': 5, 'regenachtige': 1, 'prachtige': 1, 'zomer': 1, 'Superjas': 1, 'Voldoet': 1, 'had': 1, 'groene': 1, 'nu': 1, 'ook': 3, 'blauwe': 1, 'model': 1, 'qua': 1, 'uiterlijk': 1, 'Fantastische': 3, 'Boven': 1, 'past': 1, 'Tevreden': 1, 'Ruime': 1, 'fiets': 2, 'in': 5, 'hard': 1, 'lopen': 1, 'Erg': 2, 'waterafstotende': 1, 'Veel': 1, 'jammer': 1, 'Valt': 1, 'mega': 1, 'door': 1, 'luchtgaten': 1, 'Veeeel': 1, 'Winddicht': 2, 'cute': 2, 'hele': 2, 'draag': 2, 'heel': 4, 'stof': 2, 'snit': 2, 'beetje': 2, 'smal': 2, 'schouders': 2, 'dunne': 2, 'Blij': 1, 'Mooi': 1, 'Ja': 1, 'gewoon': 1, 'allemaal': 1, 'Eerste': 1, 'gekocht': 1, 'merk': 1, 'Quechua': 1, 'omdat': 1, 'spr': 1, 'Te': 1, 'zowel': 1, 'lente': 1, 'Heel': 1, 'veel': 1, 'gehad': 1, 'herfst': 1, 'prijskwaliteitsverhouding': 1, 'grootste': 1})\n"
+     ]
+    }
+   ],
+   "source": [
+    "import csv\n",
+    "from collections import defaultdict\n",
+    "import re\n",
+    "\n",
+    "# create a defaultdict to keep track of word counts\n",
+    "word_count = defaultdict(int)\n",
+    "\n",
+    "# open the CSV file and read it using the csv module\n",
+    "with open('data/reviews.csv', 'r') as file:\n",
+    "    reader = csv.DictReader(file)\n",
+    "    \n",
+    "    # iterate over each row in the file\n",
+    "    for row in reader:\n",
+    "        # extract the review title from the row\n",
+    "        title = row['title']\n",
+    "        \n",
+    "        # remove any non-letter characters, except spaces\n",
+    "        title = re.sub(r'[^a-zA-Z\\s]', '', title)\n",
+    "        \n",
+    "        # split the title into individual words\n",
+    "        words = title.split()\n",
+    "        \n",
+    "        # iterate over each word in the title and increment its count in the defaultdict\n",
+    "        for word in words:\n",
+    "            word_count[word] += 1\n",
+    "\n",
+    "# print the final word count dictionary\n",
+    "print(word_count)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/.ipynb_checkpoints/review_analyse-checkpoint.ipynb b/.ipynb_checkpoints/review_analyse-checkpoint.ipynb
diff --git a/.ipynb_checkpoints/sentiment_analyse-checkpoint.ipynb b/.ipynb_checkpoints/sentiment_analyse-checkpoint.ipynb
diff --git a/data/negative_words.json b/data/negative_words.json
@@ -0,0 +1,76 @@
+[
+    "helaas",
+    "fijn",
+    "verdrietig",
+    "teleurgesteld",
+    "wanhopig",
+    "ontroostbaar",
+    "verslagen",
+    "onvriendelijk",
+    "afstandelijk",
+    "onaangenaam",
+    "bot",
+    "onbeschoft",
+    "lui",
+    "onproductief",
+    "traag",
+    "slordig",
+    "niet-gefocust",
+    "ongeïnspireerd",
+    "ongemotiveerd",
+    "verveeld",
+    "lusteloos",
+    "onverschillig",
+    "egoïstisch",
+    "onverantwoordelijk",
+    "onbetrouwbaar",
+    "zelfzuchtig",
+    "onethisch",
+    "slecht",
+    "kut",
+    "fuck",
+    "erg slecht",
+    "waardeloos",
+    "rampzalig",
+    "verschrikkelijk",
+    "afschuwelijk",
+    "gruwelijk",
+    "vreselijk",
+    "bedroevend",
+    "schandalig",
+    "onzinnig",
+    "belachelijk",
+    "idioot",
+    "dom",
+    "stom",
+    "onacceptabel",
+    "onvergeeflijk",
+    "weerzinwekkend",
+    "verachtelijk",
+    "walgelijk",
+    "misselijkmakend",
+    "ondraaglijk",
+    "storend",
+    "irritant",
+    "vervelend",
+    "frustrerend",
+    "teleurstellend",
+    "onzin",
+    "nutteloos",
+    "zonde van het geld",
+    "verspilling van tijd en moeite",
+    "bedrieglijk",
+    "misleidend",
+    "vals",
+    "bedrieglijk",
+    "nep",
+    "stuk",
+    "slechte",
+    "Niet",
+    "klein",
+    "kort",
+    "scheur",
+    "flink",
+    "dunne",
+    "jammer"
+  ]
diff --git a/data/neutrale_words.json b/data/neutrale_words.json
@@ -0,0 +1,54 @@
+
+[
+          "rood",
+          "oranje",
+          "geel",
+          "groen",
+          "blauw",
+          "paars",
+          "roze",
+          "zwart",
+          "wit",
+          "grijs",
+          "hond",
+          "kat",
+          "vis",
+          "vogel",
+          "paard",
+          "koe",
+          "schaap",
+          "varken",
+          "kikker",
+          "spin",
+          "appel",
+          "banaan",
+          "sinaasappel",
+          "peer",
+          "aardbei",
+          "druif",
+          "ananas",
+          "wortel",
+          "sla",
+          "tomaat",
+          "piano",
+          "gitaar",
+          "drumstel",
+          "viool",
+          "trompet",
+          "saxofoon",
+          "harp",
+          "accordeon",
+          "fluit",
+          "xylofoon",
+          "arts",
+          "leraar",
+          "verpleegkundige",
+          "monteur",
+          "kok",
+          "receptionist",
+          "advocaat",
+          "bankier",
+          "accountant",
+          "architect"
+
+        ]