Pandas Cheat Sheet (Most Used Functions)
1. Data Loading & Saving
- pd.read_csv("file.csv")
- pd.read_excel("file.xlsx")
- pd.read_json("file.json")
- pd.read_sql(query, conn)
- df.to_csv("file.csv")
- df.to_excel("file.xlsx")
2. Data Overview
- df.head(), df.tail()
- df.shape
- df.info()
- df.describe()
- df.dtypes
- df.columns
- df.index
3. Selection & Indexing
- df['col']
- df[['col1','col2']]
- df.loc[2, 'col']
- df.iloc[2, 3]
- df.iloc[0:5]
- df.at[2, 'col']
- df.iat[2, 3]
4. Filtering & Conditions
- df[df['col'] > 10]
- df[(df['col1'] > 10) & (df['col2'] < 20)]
- df.query("col1 > 10 and col2 < 20")
5. Missing Data Handling
- df.isnull().sum()
- df.dropna()
- df.fillna(0)
- df.fillna(df.mean())
6. Column Operations
- df['new'] = df['col1'] + df['col2']
- df.rename(columns={'old':'new'})
- df.drop('col', axis=1)
- df.astype(int)
7. Sorting
- df.sort_values('col')
- df.sort_values('col', ascending=False)
- df.sort_index()
8. Aggregation
- df['col'].sum(), df['col'].mean()
- df['col'].min(), df['col'].max()
- df['col'].value_counts()
- df['col'].unique(), df['col'].nunique()
9. GroupBy
- df.groupby('col')['val'].mean()
- df.groupby('col').agg({'val':'sum', 'col2':'max'})
- df.pivot_table(values='val', index='col1', columns='col2', aggfunc='mean')
10. Merging & Joining
- pd.concat([df1, df2])
- pd.merge(df1, df2, on='key')
- df1.join(df2)
11. String Operations
- df['col'].str.lower()
- df['col'].str.upper()
- df['col'].str.contains('text')
- df['col'].str.replace('old','new')
12. Datetime Handling
- df['date'] = pd.to_datetime(df['date'])
- df['date'].dt.year, df['date'].dt.month, df['date'].dt.day
- df['date'].dt.day_name()
13. Apply & Lambda
- df['col'].apply(lambda x: x**2)
- df.applymap(lambda x: str(x))
14. Window Functions
- df['col'].rolling(3).mean()
- df['col'].expanding().sum()
15. Export Final Dataset
- df.to_csv("cleaned.csv", index=False)
- df.to_excel("cleaned.xlsx", index=False)