Zomato EDA
Zomato EDA
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
In [2]: Zomato_path='FlightData/zomato.csv'
In [3]: Zomato_data=pd.read_csv(Zomato_path)
In [4]: Zomato_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 url 51717 non-null object
1 address 51717 non-null object
2 name 51717 non-null object
3 online_order 51717 non-null object
4 book_table 51717 non-null object
5 rate 43942 non-null object
6 votes 51717 non-null int64
7 phone 50509 non-null object
8 location 51696 non-null object
9 rest_type 51490 non-null object
10 dish_liked 23639 non-null object
11 cuisines 51672 non-null object
12 approx_cost(for two people) 51371 non-null object
13 reviews_list 51717 non-null object
14 menu_item 51717 non-null object
15 listed_in(type) 51717 non-null object
16 listed_in(city) 51717 non-null object
dtypes: int64(1), object(16)
memory usage: 6.7+ MB
In [5]: Zomato_data.head()
Out[5]:
url address name online_order book_table rate votes
942, 21st
Main Road,
https://www.zomato.com/bangalore/jalsa-
0 2nd Stage, Jalsa Yes Yes 4.1/5 775 4
banasha...
Banashankari,
...
2nd Floor, 80
https://www.zomato.com/bangalore/spice- Feet Road, Spice
1 Yes No 4.1/5 787
elephan... Near Big Elephant
Bazaar, 6th ...
1112, Next to
San
https://www.zomato.com/SanchurroBangalore? KIMS Medical
2 Churro Yes No 3.8/5 918 +
cont... College, 17th
Cafe
Cross...
3 https://www.zomato.com/bangalore/addhuri- 1st Floor, Addhuri No No 3.7/5 88 +
udupi... Annakuteera, Udupi
3rd Stage, Bhojana
Banashankar...
In [6]: Zomato_data.isna().sum()
url 0
Out[6]:
address 0
name 0
online_order 0
book_table 0
rate 7775
votes 0
phone 1208
location 21
rest_type 227
dish_liked 28078
cuisines 45
approx_cost(for two people) 346
reviews_list 0
menu_item 0
listed_in(type) 0
listed_in(city) 0
dtype: int64
In [7]: Zomato_data.columns
In [9]: Zomato_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 online_order 51717 non-null object
1 book_table 51717 non-null object
2 rate 43942 non-null object
3 votes 51717 non-null int64
4 phone 50509 non-null object
5 location 51696 non-null object
6 rest_type 51490 non-null object
7 dish_liked 23639 non-null object
8 cuisines 51672 non-null object
9 approx_cost(for two people) 51371 non-null object
10 reviews_list 51717 non-null object
11 menu_item 51717 non-null object
12 listed_in(type) 51717 non-null object
13 listed_in(city) 51717 non-null object
dtypes: int64(1), object(13)
memory usage: 5.5+ MB
In [10]: Zomato_data.duplicated().sum()
63
Out[10]:
In [12]: Zomato_data.duplicated().sum()
0
Out[12]:
total_rows = data.shape[0]
for columns in data.columns:
null_values = data[columns].isnull().sum()
if null_values > 0:
per_null_value = float(null_values)*100/total_rows
print(f'{columns} column has {round(per_null_value,2)}% null values')
In [14]: get_null_data(Zomato_data)
In [15]: #checking how much Percentage information loss after dropping null values.
rows_after_dropping_null = Zomato_data.dropna(axis=0).shape[0]
rows_before_dropping_null = Zomato_data.shape[0]
In [16]: Zomato_data.drop('dish_liked',axis=1,inplace=True)
In [18]: Zomato_data[['rate']]
Out[18]: rate
0 4.1/5
1 4.1/5
2 3.8/5
3 3.7/5
4 3.8/5
... ...
51712 3.6 /5
51713 NaN
51714 NaN
51715 4.3 /5
51716 3.4 /5
In [19]: Zomato_data.rate.unique()
array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
Out[19]:
'3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
'4.3/5', 'NEW', '2.9/5', '3.5/5', nan, '2.6/5', '3.8 /5', '3.4/5',
'4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
'3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
'4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
'3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
'4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
'4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
'2.1 /5', '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)
In [21]: Zomato_data.rate.unique()
In [23]: Zomato_data.rate.unique()
In [25]: Zomato_data.info()
<class 'pandas.core.frame.DataFrame'>
Index: 51654 entries, 0 to 51716
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 online_order 51654 non-null object
1 book_table 51654 non-null object
2 rate 51654 non-null float64
3 votes 51654 non-null int64
4 phone 50450 non-null object
5 location 51635 non-null object
6 rest_type 51429 non-null object
7 cuisines 51611 non-null object
8 approx_cost(for two people) 51312 non-null object
9 reviews_list 51654 non-null object
10 menu_item 51654 non-null object
11 listed_in(type) 51654 non-null object
12 listed_in(city) 51654 non-null object
dtypes: float64(1), int64(1), object(11)
memory usage: 5.5+ MB
In [26]: #Filling null values with mean value for rate column
Zomato_data['rate'].replace(0,Zomato_data['rate'].mean(),inplace=True)
In [27]: get_null_data(Zomato_data)
In [29]: Zomato_data.info()
<class 'pandas.core.frame.DataFrame'>
Index: 51654 entries, 0 to 51716
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 online_order 51654 non-null object
1 book_table 51654 non-null object
2 rate 51654 non-null float64
3 votes 51654 non-null int64
4 phone 50450 non-null object
5 location 51635 non-null object
6 rest_type 51429 non-null object
7 cuisines 51611 non-null object
8 approx_cost(for two people) 51312 non-null object
9 reviews_list 51654 non-null object
10 menu_item 51654 non-null object
11 listed_in(type) 51654 non-null object
12 listed_in(city) 51654 non-null object
dtypes: float64(1), int64(1), object(11)
memory usage: 5.5+ MB
In [30]: Zomato_data.dropna(inplace=True)
In [32]: Zomato_data.columns
Index(['online_order', 'book_table', 'rate', 'votes', 'phone', 'location',
Out[32]:
'rest_type', 'cuisines', 'approx_cost(for two people)', 'reviews_list',
'menu_item', 'listed_in(type)', 'listed_in(city)'],
dtype='object')
In [40]: Zomato_data.info()
<class 'pandas.core.frame.DataFrame'>
Index: 50220 entries, 0 to 51716
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 online_order 50220 non-null bool
1 book_table 50220 non-null bool
2 rate 50220 non-null float64
3 votes 50220 non-null int64
4 phone 50220 non-null object
5 location 50220 non-null object
6 rest_type 50220 non-null object
7 cuisines 50220 non-null object
8 cost 50220 non-null int32
9 reviews_list 50220 non-null object
10 menu_item 50220 non-null object
11 type 50220 non-null object
12 city 50220 non-null object
dtypes: bool(2), float64(1), int32(1), int64(1), object(8)
memory usage: 4.5+ MB
In [35]: Zomato_data.cost.unique()
In [36]: #Removing comma from cost and converting from object to int.
Zomato_data['cost'] = Zomato_data['cost'].str.replace(',', '').astype(int)
In [37]: Zomato_data.cost.unique()
array([ 800, 300, 600, 700, 550, 500, 450, 650, 400, 900, 200,
Out[37]:
750, 150, 850, 100, 1200, 350, 250, 950, 1000, 1500, 1300,
199, 80, 1100, 160, 1600, 230, 130, 50, 190, 1700, 1400,
180, 1350, 2200, 2000, 1800, 1900, 330, 2500, 2100, 3000, 2800,
3400, 40, 1250, 3500, 4000, 2400, 2600, 120, 1450, 469, 70,
3200, 60, 560, 240, 360, 6000, 1050, 2300, 4100, 5000, 3700,
1650, 2700, 4500, 140])
In [48]: #Replacing online_order and book_table row value from (Yes/No) to boolean(True/False).
Zomato_data['online_order']=Zomato_data['online_order'].replace({'Yes': True,'No': False
Zomato_data['book_table']=Zomato_data['book_table'].replace({'Yes': True,'No': False}).a
In [49]: Zomato_data.head(5)
Out[49]: online_order book_table rate votes phone location rest_type cuisines cost reviews_list
[('Rated 4.0',
Chinese,
'RATED\n
Casual North
1 True True 4.1 787 080 41714161 Banashankari 800 Had been
Dining Indian,
here for
Thai
din..
[('Rated 3.0',
Cafe, Cafe,
"RATED\n
2 True True 3.8 918 +91 9663487993 Banashankari Casual Mexican, 800
Ambience is
Dining Italian
not that ..
[('Rated 4.0',
South
"RATED\n
Quick Indian,
3 True True 3.7 88 +91 9620009302 Banashankari 300 Great food
Bites North
and
Indian
proper..
[('Rated 4.0',
+91 North
Casual 'RATED\n
4 True True 3.8 166 8026612447\r\n+91 Basavanagudi Indian, 600
Dining Very good
9901210005 Rajasthani
restaurant ..
In [53]: a=Zomato_data.reviews_list
In [60]: a[1]
'[(\'Rated 4.0\', \'RATED\\n Had been here for dinner with family. Turned out to be a g
Out[60]:
ood choose suitable for all ages of people. Can try this place. We liked the most was th
eir starters. Service is good. Prices are affordable. Will recommend this restaurant for
early dinner. The place is little noisy.\'), (\'Rated 3.0\', \'RATED\\n The ambience is
really nice, staff is courteous. The price is pretty high for the quantity, but overall
the experience was fine. The quality of food is nice but nothing extraordinary. They als
o have buffet(only veg)\'), (\'Rated 3.0\', \'RATED\\n I felt good is little expensive
for the quantity they serve and In terms of taste is decent. There is nothing much to ta
lk about the ambience, regular casual dining restaurant where you can take your family f
or dinner or lunch. If they improve on that quantity or may be reduce the price a bit or
may be improve the presentation of the food it might Manage to get more repeat customer
s.\'), (\'Rated 4.0\', \'RATED\\n I was looking for a quite place to spend some time wi
th family and as well wanted to try some new place. Since I was at Banashankari I though
t of trying this place. The place had good rating and was part of Zomato gold. So I deci
ded to try this place. It was a delite to see a very friendly staff and food we ordered
was very tasty as well.\\n\\nFood : 4/5\\nAmbience :3/5\\nFriendly staff : 4/5\\nPocket
friendly : 4/5\\n\\nWill definitely visit again ??\'), (\'Rated 4.0\', "RATED\\n Nice p
lace to dine and has a good ambiance... Food is good and the serving time is also good..
neat restrooms and we\'ll arranged tables....only thing is we went at 12.30 for lunch...
and we noticed that they kept on playing one music back to back which was a little annoy
ing...\\n\\n1. Chicken biriyani was so good and the chicken was fresh and tender ,rice w
as well cooked and overall was great\\n\\n2. Mutton biriyani was very very good and tast
y and It had plenty of mutton pieces..."), (\'Rated 5.0\', \'RATED\\n This place just c
ool ? with good ambience and slow music and having delicious food is where you find peac
e. Staff very friendly and they have maintained the place so clean. The price is average
for what the quantity of food they serve.\\nThom yum Thai soup was best and was treat to
mouth, roti was soft with that vilaythi paneer was perfect to have for veggie foodies, i
n rice we tried burnt garlic fried rice with vegetables and it was the perfect thing to
end.\'), (\'Rated 4.0\', "RATED\\n Quiet a good family type of place.. too calm and usu
ally we don\'t find crowd here.. panner curry and the deserts is what we had tasted.. th
ey wer really good but we found it a little expensive"), (\'Rated 2.0\', "RATED\\n I ha
d a very bad experience here.\\nI don\'t know about a la carte, but the buffet was the w
orst. They gave us complementary drink and momos before the buffet. The momos were reall
y good.\\nThe number of varieties first of all was very disappointing. The service was v
ery slow. They refilled the food very slowly. The starters were okay. The main course al
so was so so. There was two gravies with roti and some rice with raitha. They had chats,
sev puri and pan puri, which was average. But the desert was disappointing. They had gul
ab Jamun and chocolate cake. The jamun was not cooked inside. There was a cold blob of r
aw dough inside. The chocolate cake also was really hard and not that good.\\nOverall th
e buffet was a bad experience for me."), (\'Rated 4.0\', "RATED\\n Food: 8/10\\nAmbienc
e:8/10\\nStaff:8/10\\nOne of the good places to try north Indian food...but depends on u
r taste buds. Not everyone will like all the items here. Specially when u r particular a
bt sweet and spicy food.\\nThere\'s buffet available too.\\nWe had ordered paneer uttar
dakshin and paneer kurchan..was amazing. The Gobi hara pyaz and mix veg were average."),
(\'Rated 3.0\', \'RATED\\n A decent place for a family lunch or dinner.. well arranged
in a simple manner. Food was tasty and the crew was very helpful and understanding..\'),
(\'Rated 4.0\', "RATED\\n Great place to have a heavy lunch. Good service.\\nThe chicke
n biryani was undoubtedly one of the best I\'ve had. Biriyani and Lassi would be the sug
gested combo. Buffet is the talk of the place, so try according to your appetite. A nice
place."), (\'Rated 4.0\', \'RATED\\n Its the one restaurant near katriguppe that i foun
d was really good. Good variety of Chinese and thai dishes. Service is good and good pla
ce to hangout with family as its a peaceful place where noise is really less and good vi
ew.\'), (\'Rated 2.0\', "RATED\\n Spice elephant soup SPL: almost manchow flavour sou
p.. Just above medium spicy\\n\\nLasooni fish tikka was awesome\\n\\nI don\'t remember t
he dessert name but I have attached the photo .. It had vanilla ice inside wafers... Waf
er was hell hard, egg smell chewy ... Nightmare dessert !\\n\\nTable leg space was very
bad... I was so uncomfortable, the whole time kept on adjusting my legs\\n\\nNo parking
\\n\\nFor the taste felt this is too costly"), (\'Rated 4.0\', \'RATED\\n Zomato gold p
artner at this price. It was insane. They have really nice food. small place with very c
ourteous staff and very cheap food for this ambience. Cost of soups is 80-100. Starters
from 150-250. Main course 200-300. Cost for two was 800 for us.\')]'
Note: you may need to restart the kernel to use updated packages.
Usage:
C:\Users\sathv\anaconda3\python.exe -m pip install [options] <requirement specifier>
[package-index-options] ...
C:\Users\sathv\anaconda3\python.exe -m pip install [options] -r <requirements file> [p
ackage-index-options] ...
C:\Users\sathv\anaconda3\python.exe -m pip install [options] [-e] <vcs project url>
...
C:\Users\sathv\anaconda3\python.exe -m pip install [options] [-e] <local project path>
...
C:\Users\sathv\anaconda3\python.exe -m pip install [options] <archive url/path> ...
no such option: -a
In [ ]: