0% found this document useful (0 votes)
13 views8 pages

Code - Recommender System

Uploaded by

f2022332007
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views8 pages

Code - Recommender System

Uploaded by

f2022332007
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd

#!

/usr/bin/env python
# coding: utf-8

# In[1]:

import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns

# # Data Analysis

# In[2]:

# load data
books = pd.read_csv('Books.csv')
users = pd.read_csv('Users.csv')
ratings = pd.read_csv('Ratings.csv')

# In[3]:

books.head()

# In[4]:

users.head()

# In[5]:

ratings.head()

# In[6]:

books.drop_duplicates(subset=['Book-Title']).shape

# In[7]:
# Get shapes of dataframes
print(books.shape)
print(ratings.shape)
print(users.shape)

# In[8]:

# check for null values


books.isnull().sum()

# In[9]:

# check for null values


users.isnull().sum()

# In[10]:

# check for null values


ratings.isnull().sum()

# In[11]:

# check for any duplicates in dataframes


books.duplicated().sum(), ratings.duplicated().sum(),
users.duplicated().sum()

# In[12]:

# Ratings Stats

print('min rating:', ratings['Book-Rating'].min())


print('max rating:', ratings['Book-Rating'].max())
ratings['Book-Rating'].unique()

# In[13]:

# Merge dataframes
merged_df = pd.merge(ratings, books, on="ISBN")
# In[14]:

# Display basic statistics


print(merged_df.describe())
print()

# Check for missing values


print(merged_df.isnull().sum())

# In[15]:

# Visualize rating distribution


sns.countplot(x='Book-Rating', data=merged_df)
plt.title('Distribution of Book Ratings')
plt.show()

# In[16]:

num_rating_df = merged_df.groupby('Book-Title').count()['Book-
Rating'].reset_index()
num_rating_df.rename(columns = {'Book-Rating':'num_ratings'},
inplace=True)

avg_rating_df = merged_df.groupby('Book-Title').mean()['Book-
Rating'].reset_index()
avg_rating_df.rename(columns = {'Book-Rating':'avg_rating'},
inplace=True)

# In[ ]:

# Visualize rating distribution


sns.countplot(x='Book-Rating', data=merged_df)
plt.title('Distribution of Book Ratings')
plt.show()

# In[ ]:

# Calculate average rating for each book


average_ratings = merged_df.groupby('Book-Title')['Book-
Rating'].mean().reset_index().sort_values(by='Book-
Rating',ascending=False)

# Display top-rated books


print("Top Rated Books:")
print(average_ratings.head(10))

# In[ ]:

average_ratings.head()

# In[ ]:

import matplotlib.pyplot as plt

# Select the top N books for visualization


top_n = 10
top_rated_books = average_ratings.head(top_n)

# Plotting
plt.figure(figsize=(10, 6))
top_rated_books.plot(kind='barh', color='skyblue')
plt.xlabel('Average Rating')
plt.title(f'Top {top_n} Books by Average Rating')
plt.gca().invert_yaxis() # Invert y-axis for better readability
plt.show()

# # Recommender System

# In[ ]:

class RecommendationSystem:

# Constructor
def __init__(self, books_path: str = 'Books.csv', ratings_path: str
= 'Ratings.csv'):
# Load books and ratings data
self.books = pd.read_csv(books_path)
self.ratings = pd.read_csv(ratings_path)
# Initialize matrices and dataframes
self.interaction_matrix = None
self.popular_df = None
self.similarity_scores_matrix = None

# Method to train the recommendation system


def train(self):
# Merge ratings with book information
ratings_with_name = self.ratings.merge(self.books, on='ISBN')

# Compute number of ratings and average rating per book


num_rating_df = ratings_with_name.groupby('Book-Title').count()
['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating': 'num_ratings'},
inplace=True)
avg_rating_df = ratings_with_name.groupby('Book-Title').mean()
['Book-Rating'].reset_index()
avg_rating_df.rename(columns={'Book-Rating': 'avg_rating'},
inplace=True)

# Merge rating statistics with books dataset


stats_df = num_rating_df.merge(avg_rating_df, on='Book-Title')
self.books = self.books.merge(stats_df, on='Book-Title')
self.books = self.books.drop_duplicates(subset=['Book-Title'])

# Filter out less popular books


self.popular_df = stats_df[stats_df['num_ratings'] >=
250].sort_values('avg_rating', ascending=False).head(50)
self.popular_df = self.popular_df.merge(self.books, on='Book-
Title').drop_duplicates('Book-Title')
self.popular_df = self.popular_df[['Book-Title', 'Book-Author',
'num_ratings_x', 'avg_rating_x']]
self.popular_df.rename(columns={'num_ratings_x': 'num_ratings',
'avg_rating_x': 'avg_rating'}, inplace=True)

# Identify active users and filter ratings


x = ratings_with_name.groupby('User-ID').count()['Book-Rating']
> 200
active_users = x[x].index
filtered_rating = ratings_with_name[ratings_with_name['User-
ID'].isin(active_users)]

# Identify famous books and filter ratings


y = filtered_rating.groupby('Book-Title').count()['Book-
Rating'] >= 50
famous_books = y[y].index
final_ratings = filtered_rating[filtered_rating['Book-
Title'].isin(famous_books)]

# Create interaction matrix and fill missing values with 0


self.interaction_matrix =
final_ratings.pivot_table(index='Book-Title', columns='User-ID',
values='Book-Rating')
self.interaction_matrix.fillna(0, inplace=True)

# Compute cosine similarity matrix


self.similarity_scores_matrix = self.cosine_similarity()

print("--> Training Complete <--")

# Method to compute cosine similarity using numpy


def cosine_similarity(self):
matrix = np.array(self.interaction_matrix)
dot_product = np.dot(matrix, matrix.T)
norm = np.linalg.norm(matrix, axis=1)
self.similarity_scores_matrix = dot_product / (norm[:, None] *
norm)

return self.similarity_scores_matrix

# Alternative method to compute cosine similarity manually


def cosine_similarity_manual(self):
matrix = np.array(self.interaction_matrix)
num_of_books = matrix.shape[0]
num_of_users = matrix.shape[1]
self.similarity_scores_matrix = np.zeros((num_of_books,
num_of_books))

for i in range(num_of_books):
for j in range(num_of_books):
dot_product = sum(matrix[i][k] * matrix[j][k] for k in
range(num_of_users))
norm_i = math.sqrt(sum(val ** 2 for val in matrix[i]))
norm_j = math.sqrt(sum(val ** 2 for val in matrix[j]))

self.similarity_scores_matrix[i][j] = dot_product /
(norm_i * norm_j)

return self.similarity_scores_matrix

# Method to print popular recommendations


def get_popular_recommendations(self):
for _, row in self.popular_df.iterrows():
for col, value in row.items():
print(f"{col}: {value}")
print()

# Method to get recommendations for a given book


def get_recommendations(self, book_name):
index = np.where(self.interaction_matrix.index == book_name)[0]
[0]
similar_items =
sorted(list(enumerate(self.similarity_scores_matrix[index])),
key=lambda x: x[1], reverse=True)[1:6]

recommendations = []
print(" --> RECOMMENDATIONS <--\n")
for i in similar_items:
book_to_recommend = self.books[self.books['Book-Title'] ==
self.interaction_matrix.index[i[0]]]
title = book_to_recommend['Book-Title'].values[0]
print("Book Title:", title)
author = book_to_recommend['Book-Author'].values[0]
print("Author:", author)
num_rating = book_to_recommend['num_ratings'].values[0]
print("Number of ratings received:", num_rating)
avg_rating = book_to_recommend['avg_rating'].values[0]
print("Average rating:", avg_rating)
print()
recommendations.append((title, author))

return recommendations

# In[ ]:

# Initialize a object for RecommendationSystem


recommender = RecommendationSystem()

# In[ ]:

# Train the recommender


recommender.train()

# In[ ]:

# Get top 5 recommendataions for a book


recommended_books = recommender.get_recommendations('1984')

# In[ ]:
# Get top 50 popular book in the dataset
recommender.get_popular_recommendations()

# In[ ]:

You might also like