-
Notifications
You must be signed in to change notification settings - Fork 0
/
recommend.py
116 lines (101 loc) · 4.15 KB
/
recommend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Content based movie recommender system
# References and credits at bottom
# imports
import pandas as pd
import numpy as np
import time
import sys
import os
import itertools
from tqdm import tqdm
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# function definitions
def get_title(index):
return df[df.index == index]["title"].values[0]
def get_index(title):
return df[df.title == title]["index"].values[0]
def combine(row):
return row["keywords"]+" "+row["genres"]+" "+row["director"]+" "+row["cast"]
try:
# Loading CSV and doing some string operations
df = pd.read_csv(os.path.dirname(__file__)+"/dataset/movie_dataset.csv")
df["title"] = df["title"].str.upper()
# Cleaning up the data
features = ["keywords", "genres", "director", "cast"]
for feature in features:
df[feature]=df[feature].fillna(" ")
# Creating new df with only the selected relevant features
df["combined"] = df.apply(combine, axis=1)
# Creating the count matrix and using fit and transform method
cv = CountVectorizer()
count_matrix = cv.fit_transform(df["combined"])
# Using cosine similarity as the basis of recommendations
cosine_sim = cosine_similarity(count_matrix)
# Menu for selecting movies and getting recommendations
print("Starting Program.....", end = " ")
# Create a spinning loader
spinner = itertools.cycle(["|", "/", "-", "\\"])
for i in range(25):
sys.stdout.write(next(spinner))
sys.stdout.flush()
time.sleep(0.1)
sys.stdout.write("\b")
print()
print("\nMovie Recommendation Engine (Content Based)")
while True:
usrmovie = input("\nPlease enter a movie that you like: ")
usrmovie = usrmovie.upper()
# Create loading bar using tqdm
for i in tqdm(range(100), desc="Loading recommendations for you"):
time.sleep(0.015)
try:
index = get_index(usrmovie)
# Creating a list of tuples of movies from the count matrix for given movie index
recommended_movies = list(enumerate(cosine_sim[index]))
# Sorting the list in descending order
sorted_recommendations = sorted(recommended_movies, key = lambda x:x[1], reverse = True)
# Displaying top 25 similar movies
print("\nHere are the top 10 movies we recommend based on your choice: ")
# Counter for 10
i = 0
for movie in sorted_recommendations:
if get_title(movie[0])==usrmovie:
pass
else:
print(get_title(movie[0]))
i+=1
if i==10:
break
except:
# Returning error if there is no data of the movie in csv file
print("\nSorry we couldn't find recommendations for {}, please try another movie.".format(usrmovie))
# Option to continue going through the program or quit
while True:
inp = input("Do you want to continue finding recommendations? (y/n): ")
inp.casefold()
if inp == "n" or inp == "no":
print("User selected \"no\", Terminating!.....")
flag = 0
break
elif inp == "y" or inp == "yes":
flag = 1
break
else:
print("Invalid input please enter y or n.")
#Flag check to find out what user selected
if flag == 1:
continue
elif flag == 0:
break
#Error message (just in case)
else:
print("Fatal error! Terminating....")
break
# Exception for keyboard interrupt
except KeyboardInterrupt:
print("\nSIGTERM received terminating...")
# References:
# Code Heroku YouTube Channel
# Documentation of libraries and modules used
# Dataset downloaded from https://notebooks.azure.com/hello-codeheroku/projects/recommendation-systems/html/movie_dataset.csv