Pandas is a versatile and widely-used Python library specifically designed for data
manipulation and analysis. It provides high-performance, easy-to-use data structures
and data analysis tools that make working with structured data efficient and intuitive.
df = pd.DataFrame({
"Name": ["Braund, Mr. Owen Harris","Allen, Mr. William
Henry","Bonnell, Miss. Elizabeth"],
"Age": [22, 35, 58],
"Sex": ["male", "male", "female"]
})
df
df = pd.read_csv("/content/train.csv")
df.head()
dfx = df.copy('Deep')
dfx = dfx.drop(['PassengerId','Ticket','Name'],axis = 1)
dfx.head()
# Replacing Values/Names in a Column:
df1 = dfx.copy('Deep')
df1["Survived"].replace({0:"Died" , 1:"Saved"},inplace = True)
df1.head(3)
Learn Step by Step Code with Explanation: https://sites.google.com/view/aiml-
deepthought/machine-learning/python-pandas
# Drop Rows
df1 = dfx.drop(labels=[1,3,5,7],axis=0)
df1.head()
df.columns.tolist()
Download Code: https://t.me/AIMLDeepThaught
# Missing Value check
print('Method 1:')
df.isnull().sum()/len(df)*100
print('Method 2:')
var1 = [col for col in df.columns if df[col].isnull().sum() != 0]
print(df[var1].isnull().sum())
Download Code: https://t.me/AIMLDeepThaught
# Missing Value check 3
print('Method 3:')
import missingno as msno
msno.matrix(df)
plt.show()
# Find The Null Rows in a Particular Featues
df[df['Embarked'].isnull()]
# Find Rows with missing Values
sample_incomplete_rows =df[df.isnull().any(axis=1)].head()
sample_incomplete_rows
Download Code: https://t.me/AIMLDeepThaught
# Describe dataset
df.describe()
# Aggregate Function
df[['Age','Fare','Pclass']].agg(['sum','max','mean','std','skew','kurt
'])
# value_counts
df['Embarked'].value_counts().to_frame()
# value_counts for Multiple Columns
for col in df[['Survived','Sex','Embarked']]:
print(df[col].value_counts().to_frame())
print("****"*7)
#Count
df[['Age','Embarked','Sex']].count()
#Shuffling the data
df2 = df.sample(frac=1,random_state=3)
df2.head()
# Correlation of Data
import seaborn as sns
corr = df.select_dtypes('number').corr()
display(corr)
sns.heatmap(corr, annot=True, cmap='viridis')
plt.xlabel('Features')
plt.ylabel('Features')
plt.title('Correlation Heatmap')
plt.show()
Download Code: https://t.me/AIMLDeepThaught
# Find all Notna Columns
df1 = df1[df1['Cabin'].notna()]
df1.head()
# Dropna Method
df1 = df1.dropna()
df1.head()
# Fillna (ffill) Method
df1.fillna(method="ffill", inplace=True)
df1.head()
# Fill Null Values by Mean Value
df1["Age"] = df1["Age"].fillna(df1["Age"].mean())
df1.head()
# Fill Null Values by Desired Value
df1['Embarked'] = df1['Embarked'].fillna(df1['Embarked'] == 'Q')
df1.head(5)
# Find All Null Values in the dataframe
df1 = df1.drop('Cabin',axis =1)
sample_incomplete_rows = df1[df1.isnull().any(axis=1)]
display(sample_incomplete_rows.shape)
sample_incomplete_rows.head()
# Find Selected Values
titanic_Fare500 = df1[df1['Fare'] > 500][['Name','Embarked']]
display(titanic_Fare500.shape)
titanic_Fare500
# Find Value in a Range
titanic_age_selection = df[(df["Sex"] == "male") & (df["Age"] >
50.00)]
display(titanic_age_selection.shape)
titanic_age_selection.head()
# Select particular value in a feature
titanic_Pclass = df1[df1["Pclass"].isin([1, 2])]
display(titanic_Pclass.shape)
titanic_Pclass.head()
Download Code: https://t.me/AIMLDeepThaught
# Select with Multiple Conditions
titanic_Pclass = df1[(df1["Pclass"] == 1) & (df1["Sex"] ==
'female') & (df1["Age"] > 50 ) ]
display(titanic_Pclass.shape)
titanic_Pclass.head()
# Sort_Values
df1 = df.copy()
df1.sort_values(by = 'Age' , ascending =
False)[['Name','Ticket','Survived','Pclass', 'Age' ]].head()