Pandas
import numpy as np
import pandas as pd
Series
Creating series
My_index = ["a","b","c","d"]
my_list = [1,2,3,4]
arr = np.array(my_list)
dic = {1:10,2:20,3:30,4:40}
pd.Series(my_list)
0 1
1 2
2 3
3 4
dtype: int64
pd.Series(my_list,index = My_index)
a 1
b 2
c 3
d 4
dtype: int64
pd.Series(arr)
0 1
1 2
2 3
3 4
dtype: int64
pd.Series(arr,index = My_index)
a 1
b 2
c 3
d 4
dtype: int64
pd.Series(dic)
1 10
2 20
3 30
4 40
dtype: int64
Data frames
Creating dataframes
data = {
"Name":["Bala","sanu","subbu","mark"],
"Age":[25,34,43,18],
"City":["bengaluru","mumbai","delhi","Pune"],
"Salary":[30000,89000,56000,2300]
}
df1 = pd.DataFrame(data)
df1
Name Age City Salary
0 Bala 25 bengaluru 30000
1 sanu 34 mumbai 89000
2 subbu 43 delhi 56000
3 mark 18 Pune 2300
list1 = [
["Bala",25,"bengaluru",30000],
["sanu",34,"mumbai",89000],
["subbu",43,"delhi",56000],
["mark",18,"Pune",2300]
]
df2 = pd.DataFrame(list1)
df2
0 1 2 3
0 Bala 25 bengaluru 30000
1 sanu 34 mumbai 89000
2 subbu 43 delhi 56000
3 mark 18 Pune 2300
col = ["Name","Age","city","Salary"]
df3 = pd.DataFrame(list1,columns=col)
df3
Name Age city Salary
0 Bala 25 bengaluru 30000
1 sanu 34 mumbai 89000
2 subbu 43 delhi 56000
3 mark 18 Pune 2300
Selecting columns in Dataframe
df3
Name Age city Salary
0 Bala 25 bengaluru 30000
1 sanu 34 mumbai 89000
2 subbu 43 delhi 56000
3 mark 18 Pune 2300
df3["Name"]
0 Bala
1 sanu
2 subbu
3 mark
Name: Name, dtype: object
df3[["Name","city"]]
Name city
0 Bala bengaluru
1 sanu mumbai
2 subbu delhi
3 mark Pune
creating a new column
df3["Gender"] = ["M","M","M","F"]
df3
Name Age city Salary Gender
0 Bala 25 bengaluru 30000 M
1 sanu 34 mumbai 89000 M
2 subbu 43 delhi 56000 M
3 mark 18 Pune 2300 F
Removing columns
df3.drop("Name",axis=1)
Age city Salary Gender
0 25 bengaluru 30000 M
1 34 mumbai 89000 M
2 43 delhi 56000 M
3 18 Pune 2300 F
df3
Name Age city Salary Gender
0 Bala 25 bengaluru 30000 M
1 sanu 34 mumbai 89000 M
2 subbu 43 delhi 56000 M
3 mark 18 Pune 2300 F
df3.drop("Gender",axis=1,inplace=True)
df3
Name Age city Salary
0 Bala 25 bengaluru 30000
1 sanu 34 mumbai 89000
2 subbu 43 delhi 56000
3 mark 18 Pune 2300
Removing rows
df3.drop(0,axis=0)
Name Age city Salary
1 sanu 34 mumbai 89000
2 subbu 43 delhi 56000
3 mark 18 Pune 2300
df3
Name Age city Salary
0 Bala 25 bengaluru 30000
1 sanu 34 mumbai 89000
2 subbu 43 delhi 56000
3 mark 18 Pune 2300
Selecting rows in Dataframe
df3.loc[0]
Name Bala
Age 25
city bengaluru
Salary 30000
Name: 0, dtype: object
df3.loc[[0,2]]
Name Age city Salary
0 Bala 25 bengaluru 30000
2 subbu 43 delhi 56000
df3.iloc[[0,1]]
Name Age city Salary
0 Bala 25 bengaluru 30000
1 sanu 34 mumbai 89000
Selecting both rows & columns
df3.loc[[0,1]][["Name","city"]]
Name city
0 Bala bengaluru
1 sanu mumbai
Conditional selection
df3[df3["Age"]>=30]
Name Age city Salary
1 sanu 34 mumbai 89000
2 subbu 43 delhi 56000
df3[(df3["Age"]>=30) |(df3["Salary"] >= 10000)]
Name Age city Salary
0 Bala 25 bengaluru 30000
1 sanu 34 mumbai 89000
2 subbu 43 delhi 56000