0% found this document useful (0 votes)
50 views3 pages

DAT p3 EDA - Ipynb - Colaboratory

Uploaded by

FRAG Gamer
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
50 views3 pages

DAT p3 EDA - Ipynb - Colaboratory

Uploaded by

FRAG Gamer
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

#CHETAN AWARI

#ROLL NO.:25
from google.colab import drive
drive.mount('/content/drive')

!pip install pandas

import pandas as pd
df=pd.read_csv('/content/drive/MyDrive/dataset/jordan_career.csv')
df.head(5)

game date age team opp result mp fg fga fgp ... orb drb trb ast stl blk tov pts game_score plus_minus

0 1 1984-10-26 21-252 CHI WSB W (+16) 40 5 16 0.313 ... 1 5 6 7 2 4 5 16 12.5 NaN

1 2 1984-10-27 21-253 CHI MIL L (-2) 34 8 13 0.615 ... 3 2 5 5 2 1 3 21 19.4 NaN

2 3 1984-10-29 21-255 CHI MIL W (+6) 34 13 24 0.542 ... 2 2 4 5 6 2 3 37 32.9 NaN

3 4 1984-10-30 21-256 CHI KCK W (+5) 36 8 21 0.381 ... 2 2 4 5 3 1 6 25 14.7 NaN

4 5 1984-11-01 21-258 CHI DEN L (-16) 33 7 15 0.467 ... 3 2 5 5 1 1 2 17 13.2 NaN

5 rows × 26 columns

df=pd.read_csv('/content/drive/MyDrive/dataset/jordan_career.csv')
print(f'The mean of game_score is : {df["game_score"].mean()}')
print(f'The median of game_score is :{df["game_score"].median()}')
print(f'The median of game_score is :{df["game_score"].mode()}')
print(f'The median of game_score is :{df["game_score"].max()-ds["game_score"].min()}')
print(f'The median of game_score is :{df["game_score"].var()}')
print(f'The median of game_score is :{df["game_score"].std()}')

The mean of game_score is : 23.440951492537312


The median of game_score is :23.45
The median of game_score is :0 20.8
Name: game_score, dtype: float64
The median of game_score is :66.0
The median of game_score is :90.00544559241608
The median of game_score is :9.487119984084531

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set(color_codes=True)

df.dtypes

game int64
date object
age object
team object
opp object
result object
mp int64
fg int64
fga int64
fgp float64
three int64
threeatt int64
threep float64
ft int64
fta int64
ftp float64
orb int64
drb int64
trb int64
ast int64
stl int64
blk int64
tov int64
pts int64
game_score float64
plus_minus float64
dtype: object
df.shape
duplicate_rows_df = df[df.duplicated()]
print("number of duplicate rows: ", duplicate_rows_df.shape)
df.count()

number of duplicate rows: (0, 26)


game 1072
date 1072
age 1072
team 1072
opp 1072
result 1072
mp 1072
fg 1072
fga 1072
fgp 1072
three 1072
threeatt 1072
threep 742
ft 1072
fta 1072
ftp 1042
orb 1072
drb 1072
trb 1072
ast 1072
stl 1072
blk 1072
tov 1072
pts 1072
game_score 1072
plus_minus 0
dtype: int64

df = df.drop_duplicates()
df.head(5)

game date age team opp result mp fg fga fgp ... orb drb trb ast stl blk

1984- 21- W
0 1 CHI WSB 40 5 16 0.313 ... 1 5 6 7 2 4
10-26 252 (+16)

1984- 21-
1 2 CHI MIL L (-2) 34 8 13 0.615 ... 3 2 5 5 2 1
10-27 253

1984- 21-
2 3 CHI MIL W (+6) 34 13 24 0.542 ... 2 2 4 5 6 2
10-29 255

1984- 21-
3 4 CHI KCK W (+5) 36 8 21 0 381 2 2 4 5 3 1

df = df.drop(['game', 'team', 'date', 'age'], axis=1)


df.head(5)

opp result mp fg fga fgp three threeatt threep ft ... orb drb trb ast st

W
0 WSB 40 5 16 0.313 0 0 NaN 6 ... 1 5 6 7
(+16)

1 MIL L (-2) 34 8 13 0.615 0 0 NaN 5 ... 3 2 5 5

2 MIL W (+6) 34 13 24 0.542 0 0 NaN 11 ... 2 2 4 5

3 KCK W (+5) 36 8 21 0.381 0 0 NaN 9 ... 2 2 4 5

4 DEN L (-16) 33 7 15 0.467 0 0 NaN 3 ... 3 2 5 5

print(df.isnull().sum())

game 0
date 0
age 0
team 0
opp 0
result 0
mp 0
fg 0
fga 0
fgp 0
three 0
threeatt 0
threep 330
ft 0
fta 0
ftp 30
orb 0
drb 0
trb 0
ast 0
stl 0
blk 0
tov 0
pts 0
game_score 0
plus_minus 1072
dtype: int64

df=df.dropna()
df.count()

game 0
date 0
age 0
team 0
opp 0
result 0
mp 0
fg 0
fga 0
fgp 0
three 0
threeatt 0
threep 0
ft 0
fta 0
ftp 0
orb 0
drb 0
trb 0
ast 0
stl 0
blk 0
tov 0
pts 0
game_score 0
plus_minus 0
dtype: int64

print(df.isnull().sum())

game 0.0
date 0.0
age 0.0
team 0.0
opp 0.0
result 0.0
mp 0.0
fg 0.0
fga 0.0
fgp 0.0
three 0.0
threeatt 0.0
threep 0.0
ft 0.0
fta 0.0
ftp 0.0
orb 0.0
drb 0.0
trb 0.0
ast 0.0
stl 0.0
blk 0.0
tov 0.0
pts 0.0
game_score 0.0
plus_minus 0.0
dtype: float64

You might also like