Skip to content

Commit

Permalink
added recommendations and content loader
Browse files Browse the repository at this point in the history
  • Loading branch information
samay-rgb committed Dec 26, 2021
1 parent b31a29c commit d6879ca
Show file tree
Hide file tree
Showing 17 changed files with 19,306 additions and 21,343 deletions.
550 changes: 550 additions & 0 deletions backend/Data_Analysis (1).ipynb

Large diffs are not rendered by default.

1,357 changes: 1,357 additions & 0 deletions backend/Model_Preparation.ipynb

Large diffs are not rendered by default.

302 changes: 302 additions & 0 deletions backend/Predictions.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "0b7b123f",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import difflib\n",
"import pickle \n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4d9e252b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>imdb_title_id</th>\n",
" <th>original_title</th>\n",
" <th>year</th>\n",
" <th>genre</th>\n",
" <th>director</th>\n",
" <th>actors</th>\n",
" <th>avg_vote</th>\n",
" <th>cast_size</th>\n",
" <th>comb</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>tt0035423</td>\n",
" <td>kate &amp; leopold</td>\n",
" <td>2001</td>\n",
" <td>Comedy, Fantasy, Romance</td>\n",
" <td>James Mangold</td>\n",
" <td>Meg Ryan Hugh Jackman Liev Schreiber Breckin M...</td>\n",
" <td>6.4</td>\n",
" <td>15</td>\n",
" <td>James Mangold Meg Ryan Hugh Jackman Liev Schre...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>tt0069049</td>\n",
" <td>the other side of the wind</td>\n",
" <td>2018</td>\n",
" <td>Drama</td>\n",
" <td>Orson Welles</td>\n",
" <td>John Huston Oja Kodar Peter Bogdanovich Susan ...</td>\n",
" <td>6.8</td>\n",
" <td>15</td>\n",
" <td>Orson Welles John Huston Oja Kodar Peter Bogda...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>tt0088751</td>\n",
" <td>the naked monster</td>\n",
" <td>2005</td>\n",
" <td>Comedy, Horror, Sci-Fi</td>\n",
" <td>Wayne Berwick, Ted Newsom</td>\n",
" <td>Kenneth Tobey Brinke Stevens R.G. Wilson John ...</td>\n",
" <td>5.4</td>\n",
" <td>15</td>\n",
" <td>Wayne Berwick, Ted Newsom Kenneth Tobey Brinke...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>tt0096056</td>\n",
" <td>crime and punishment</td>\n",
" <td>2002</td>\n",
" <td>Drama</td>\n",
" <td>Menahem Golan</td>\n",
" <td>Crispin Glover Vanessa Redgrave John Hurt Marg...</td>\n",
" <td>5.8</td>\n",
" <td>15</td>\n",
" <td>Menahem Golan Crispin Glover Vanessa Redgrave ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>tt0113026</td>\n",
" <td>the fantasticks</td>\n",
" <td>2000</td>\n",
" <td>Musical, Romance</td>\n",
" <td>Michael Ritchie</td>\n",
" <td>Joel Grey Barnard Hughes Jean Louisa Kelly Joe...</td>\n",
" <td>5.6</td>\n",
" <td>15</td>\n",
" <td>Michael Ritchie Joel Grey Barnard Hughes Jean ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 imdb_title_id original_title year \\\n",
"0 0 tt0035423 kate & leopold 2001 \n",
"1 1 tt0069049 the other side of the wind 2018 \n",
"2 2 tt0088751 the naked monster 2005 \n",
"3 3 tt0096056 crime and punishment 2002 \n",
"4 4 tt0113026 the fantasticks 2000 \n",
"\n",
" genre director \\\n",
"0 Comedy, Fantasy, Romance James Mangold \n",
"1 Drama Orson Welles \n",
"2 Comedy, Horror, Sci-Fi Wayne Berwick, Ted Newsom \n",
"3 Drama Menahem Golan \n",
"4 Musical, Romance Michael Ritchie \n",
"\n",
" actors avg_vote cast_size \\\n",
"0 Meg Ryan Hugh Jackman Liev Schreiber Breckin M... 6.4 15 \n",
"1 John Huston Oja Kodar Peter Bogdanovich Susan ... 6.8 15 \n",
"2 Kenneth Tobey Brinke Stevens R.G. Wilson John ... 5.4 15 \n",
"3 Crispin Glover Vanessa Redgrave John Hurt Marg... 5.8 15 \n",
"4 Joel Grey Barnard Hughes Jean Louisa Kelly Joe... 5.6 15 \n",
"\n",
" comb \n",
"0 James Mangold Meg Ryan Hugh Jackman Liev Schre... \n",
"1 Orson Welles John Huston Oja Kodar Peter Bogda... \n",
"2 Wayne Berwick, Ted Newsom Kenneth Tobey Brinke... \n",
"3 Menahem Golan Crispin Glover Vanessa Redgrave ... \n",
"4 Michael Ritchie Joel Grey Barnard Hughes Jean ... "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movieslist = pickle.load(open('movieslist.pkl','rb'))\n",
"movieslist.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b670471f",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\LEGION\\AppData\\Roaming\\Python\\Python39\\site-packages\\IPython\\core\\interactiveshell.py:3441: DtypeWarning: Columns (3) have mixed types.Specify dtype option on import or set low_memory=False.\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n"
]
}
],
"source": [
"similarity = pickle.load(open('similarity.pkl','rb'))\n",
"df = pd.read_csv('final_movies.csv')\n",
"movies_titles = df['original_title'].tolist()\n",
"big_df = pd.read_csv('IMDb movies.csv')\n",
"mt = big_df['original_title'].tolist()\n",
"def closest(arr,s):\n",
" l = s.split()\n",
" l = set(l)\n",
" match = arr[0]\n",
" wrd_match = -1\n",
" if(len(s.split())==1):\n",
" for i in arr:\n",
" i1 = set(i.split())\n",
" if wrd_match<len(list(l&i1)):\n",
" wrd_match = max(wrd_match,len(list(l&i1)))\n",
" match = i\n",
" return match"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "253e6beb",
"metadata": {},
"outputs": [],
"source": [
"similarity_arr=similarity.tolist()\n",
"with open(\"sample.json\", \"w\") as outfile:\n",
" json.dump(similarity_arr, outfile)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c7e7474d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"the avengers\n",
"avengers: age of ultron\n",
"avengers: infinity war\n",
"iron man 2\n",
"avengers: endgame\n",
"captain america: civil war\n",
"captain america: the winter soldier\n",
"iron man\n",
"much ado about nothing\n",
"thor\n",
"iron man three\n"
]
}
],
"source": [
"name = input(\"Enter a movie name: \")\n",
"name = name.lower()\n",
"close_match = difflib.get_close_matches(name,movies_titles)\n",
"# close_match[0]\n",
"closest_match = closest(close_match,name).lower()\n",
"print(closest_match)\n",
"closest_index = df[df['original_title'] == closest_match].index.values[0]\n",
"similarity_score = list(enumerate(similarity[closest_index]))\n",
"# print(similarity_score)\n",
"sorted_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)\n",
"# print(sorted_score)\n",
"for i in range(1, 11):\n",
" idx = sorted_score[i][0]\n",
" # print(idx)de\n",
" print(df['original_title'][idx])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4d4e9a9e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['finding nemo', 'finding joy', 'finding neighbors']\n"
]
}
],
"source": [
"print(difflib.get_close_matches('finding nemo',movies_titles))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "49521031",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit d6879ca

Please sign in to comment.