pandas-1
April 21, 2025
[1]: import pandas as pd
[ ]: # it is created using a list / tuple
[2]: sr1 = pd.Series([10,20,30,40,50])
sr1
[2]: 0 10
1 20
2 30
3 40
4 50
dtype: int64
[3]: type(sr1)
[3]: pandas.core.series.Series
[4]: sr1[3]
[4]: 40
[ ]: # pandas.core.series.Series
[ ]: # pd.Series --> Series is a function which is available in pandas module
[6]: list1 = [10,20,30,40,50]
list1[3]
[6]: 40
[5]: sr2 = pd.Series([10,20,30,40,50],index = ['a','b','c','d','e'], dtype = 'float')
sr2
[5]: a 10.0
b 20.0
c 30.0
1
d 40.0
e 50.0
dtype: float64
[7]: sr2['d']
[7]: 40.0
[ ]:
DataFrame
[10]:
[10]: 0 1
1 2
2 3
3 4
dtype: int64
[8]: # creating a dataframe using a dictionary is the most easiest way
# data frame --> table --> rows and columns
# columns can be anything : list , tuple,set , array(1D), series
dict1 = {
'col1': [10,20,30,40,50],
'col2': pd.Series(['a','b','c','d','e'])
}
df1 = pd.DataFrame(dict1)
df1
# each column should have single data type
# col1 can have either int data --> it cant process [1,'a',2.0]
# each column can have its own data type (single data type)
[8]: col1 col2
0 10 a
1 20 b
2 30 c
3 40 d
4 50 e
[12]: df1.dtypes
# int --> int64/32
# float --> float64/float32
# string --> objects
[12]: col1 int64
col2 object
2
dtype: object
[ ]:
Merge
[13]: df1 = pd.DataFrame({'id': [1,2,3,4,5], 'ml_marks': [94,95,98,92,90]})
df1
[13]: id ml_marks
0 1 94
1 2 95
2 3 98
3 4 92
4 5 90
[14]: df2 = pd.DataFrame({'id': [3,4,5,6,7], 'stat_marks': [98,84,88,96,94]})
df2
[14]: id stat_marks
0 3 98
1 4 84
2 5 88
3 6 96
4 7 94
[15]: df1.merge(df2, on = 'id',how = 'inner')
[15]: id ml_marks stat_marks
0 3 98 98
1 4 92 84
2 5 90 88
[16]: df1.merge(df2, on = 'id',how = 'left')
[16]: id ml_marks stat_marks
0 1 94 NaN
1 2 95 NaN
2 3 98 98.0
3 4 92 84.0
4 5 90 88.0
[17]: df1.merge(df2, on = 'id',how = 'right')
[17]: id ml_marks stat_marks
0 3 98.0 98
1 4 92.0 84
3
2 5 90.0 88
3 6 NaN 96
4 7 NaN 94
[18]: df1.merge(df2, on = 'id',how = 'outer') # it will return every record
[18]: id ml_marks stat_marks
0 1 94.0 NaN
1 2 95.0 NaN
2 3 98.0 98.0
3 4 92.0 84.0
4 5 90.0 88.0
5 6 NaN 96.0
6 7 NaN 94.0
[ ]: # df1.merge(df2,how = 'cross')
# df1.merge(df1, on = 'id')
[ ]:
Concate
[ ]: # combine two data frame at row level ( increaase no.of rows by using second␣
↪data frame)
[19]: df1 = pd.DataFrame({'id': [1,2,3,4,5], 'ml_marks': [94,95,98,92,90]})
df2 = pd.DataFrame({'id': [6,7,8,9,10], 'ml_marks': [94,95,98,92,90]})
[20]: df2
[20]: id ml_marks
0 6 94
1 7 95
2 8 98
3 9 92
4 10 90
[21]: pd.concat([df1,df2])
[21]: id ml_marks
0 1 94
1 2 95
2 3 98
3 4 92
4 5 90
0 6 94
1 7 95
4
2 8 98
3 9 92
4 10 90
[ ]: # as in reality data is not stored in sequential manner joining on index doesnt␣
↪make any sense
[22]: df1 = pd.DataFrame({'id': [1,2,3,4,5], 'ml_marks': [94,95,98,92,90]})
df2 = pd.DataFrame({'id': [3,4,5,6,7], 'stat_marks': [98,84,88,96,94]})
[ ]: # Join --> it performs the same operation of combining data frames like Merge␣
↪but on index values
# join is performed on index values
# suffix parameter is needed to differentiate common column names
[23]: # snake_case : divide larger words by using _ --> new_df
# NewDf
# newDf
df1.join(df2, lsuffix = '_df1',rsuffix='_df2')
[23]: id_df1 ml_marks id_df2 stat_marks
0 1 94 3 98
1 2 95 4 84
2 3 98 5 88
3 4 92 6 96
4 5 90 7 94
[ ]: # download Auto.csv from drive folder
[24]: pwd # folder where your current code is available
# pandas-1 is available in 2802
[24]: 'C:\\Users\\admin\\2802'
[ ]: # copy paste Auto.csv into the folder 'C:\\Users\\admin\\2802'
# ensure the code and data set should be in same folder
[ ]: # pd.read_excel('filename.xlsx')
[25]: df1 = pd.read_csv('Auto.csv')
df1.head()
[25]: mpg cylinders displacement Horse Power weight acceleration year \
0 18.0 8.0 307.0 130 3504 12.0 70
1 15.0 8.0 350.0 165 3693 11.5 70
2 NaN 8.0 318.0 150 3436 11.0 70
3 NaN 8.0 NaN 150 3433 12.0 70
5
4 NaN 8.0 NaN 140 3449 10.5 70
origin name
0 1 chevrolet chevelle malibu
1 1 buick skylark 320
2 1 plymouth satellite
3 1 amc rebel sst
4 1 ford torino