import pandas as pd
# DataFrame 1
data1 = {'Name': ['Pankaj', 'Meghna', 'Lisa'],
'Country': ['India', 'India', 'USA'],
'Role': ['CEO', 'CTO', 'CTO']}
df1 = [Link](data1)
# DataFrame 2
data2 = {'ID': [1, 2, 3],
'Name': ['Pankaj', 'Anupam', 'Amit']}
df2 = [Link](data2)
print("DataFrame 1:")
print(df1)
print("\nDataFrame 2:")
print(df2)
DataFrame 1:
Name Country Role
0 Pankaj India CEO
1 Meghna India CTO
2 Lisa USA CTO
DataFrame 2:
ID Name
0 1 Pankaj
1 2 Anupam
2 3 Amit
result_row = [Link](df1, df2, on='Name')
print(result_row)
Name Country Role ID
0 Pankaj India CEO 1
# Left Join
result_left = [Link](df1, df2, on='Name', how='left')
print("\nResult Left Join:")
print(result_left)
# Right Join
result_right = [Link](df1, df2, on='Name', how='right')
print("\nResult Right Join:")
print(result_right)
# Outer Join
result_outer = [Link](df1, df2, on='Name', how='outer')
print("\nResult Outer Join:")
print(result_outer)
Result Left Join:
Name Country Role ID
0 Pankaj India CEO 1.0
1 Meghna India CTO NaN
2 Lisa USA CTO NaN
Result Right Join:
Name Country Role ID
0 Pankaj India CEO 1
1 Anupam NaN NaN 2
2 Amit NaN NaN 3
Result Outer Join:
Name Country Role ID
0 Amit NaN NaN 3.0
1 Anupam NaN NaN 2.0
2 Lisa USA CTO NaN
3 Meghna India CTO NaN
4 Pankaj India CEO 1.0
# Left Join
result_left = [Link](df1, df2, on='Name', how='left')
print("\nResult Left Join:")
print(result_left)
# Right Join
result_right = [Link](df1, df2, on='Name', how='right')
print("\nResult Right Join:")
print(result_right)
# Outer Join
result_outer = [Link](df1, df2, on='Name', how='outer')
print("\nResult Outer Join:")
print(result_outer)
Result Left Join:
Name Country Role ID
0 Pankaj India CEO 1.0
1 Meghna India CTO NaN
2 Lisa USA CTO NaN
Result Right Join:
Name Country Role ID
0 Pankaj India CEO 1
1 Anupam NaN NaN 2
2 Amit NaN NaN 3
Result Outer Join:
Name Country Role ID
0 Amit NaN NaN 3.0
1 Anupam NaN NaN 2.0
2 Lisa USA CTO NaN
3 Meghna India CTO NaN
4 Pankaj India CEO 1.0
# Sales Dictionary and Region Dictionary
sales_dict = {'ID': [1, 2, 3, 4],
'Amount': [100, 200, 300, 400]}
region_dict = {'ID': [1, 2, 3, 5],
'Region': ['East', 'West', 'North', 'South']}
# Create DataFrames
sales_df = [Link].from_dict(sales_dict)
region_df = [Link].from_dict(region_dict)
print("Sales DataFrame:")
print(sales_df)
print("\nRegion DataFrame:")
print(region_df)
Sales DataFrame:
ID Amount
0 1 100
1 2 200
2 3 300
3 4 400
Region DataFrame:
ID Region
0 1 East
1 2 West
2 3 North
3 5 South
# b) Merging with Inner Join
result_inner = [Link](sales_df, region_df, on='ID', how='inner')
print("\nInner Join:")
print(result_inner)
# c) Merging with Left Join
result_left = [Link](sales_df, region_df, on='ID', how='left')
print("\nLeft Join:")
print(result_left)
# d) Merging with Right Join
result_right = [Link](sales_df, region_df, on='ID', how='right')
print("\nRight Join:")
print(result_right)
# e) Merging with Outer Join
result_outer = [Link](sales_df, region_df, on='ID', how='outer')
print("\nOuter Join:")
print(result_outer)
Inner Join:
ID Amount Region
0 1 100 East
1 2 200 West
2 3 300 North
Left Join:
ID Amount Region
0 1 100 East
1 2 200 West
2 3 300 North
3 4 400 NaN
Right Join:
ID Amount Region
0 1 100.0 East
1 2 200.0 West
2 3 300.0 North
3 5 NaN South
Outer Join:
ID Amount Region
0 1 100.0 East
1 2 200.0 West
2 3 300.0 North
3 4 400.0 NaN
4 5 NaN South
import numpy as np
import pandas as pd
# Data with Missing Values
data = {'A': [1, [Link], 3, 4],
'B': [5, 6, [Link], 8],
'C': [[Link], [Link], 9, 10]}
df = [Link](data)
print("Original DataFrame:")
print(df)
# 1. Drop rows with any missing value
print("\nDrop rows with any missing values:")
print([Link]())
# 2. Drop columns with at least one missing value
print("\nDrop columns with at least one missing value:")
print([Link](axis=1))
# 3. Drop rows/columns with all missing values
print("\nDrop rows/columns with all missing values:")
print([Link](how='all'))
# 4. Drop rows/columns based on threshold (at least 2 non-NaN values)
print("\nDrop rows/columns based on threshold:")
print([Link](thresh=2))
# 5. Replace NaN with the previous value (Forward Fill)
print("\nReplace NaN with the previous value:")
print([Link]()) # Using ffill() instead of fillna(method='pad')
# 6. Replace NaN with the previous value, limit=1 (Forward Fill with Limit)
print("\nReplace NaN with the previous value, limit=1:")
print([Link](limit=1)) # Using ffill() with limit
# 7. Replace NaN with the next value (Backward Fill)
print("\nReplace NaN with the forward value:")
print([Link]()) # Using bfill() instead of fillna(method='bfill')
Original DataFrame:
A B C
0 1.0 5.0 NaN
1 NaN 6.0 NaN
2 3.0 NaN 9.0
3 4.0 8.0 10.0
Drop rows with any missing values:
A B C
3 4.0 8.0 10.0
Drop columns with at least one missing value:
Empty DataFrame
Columns: []
Index: [0, 1, 2, 3]
Drop rows/columns with all missing values:
A B C
0 1.0 5.0 NaN
1 NaN 6.0 NaN
2 3.0 NaN 9.0
3 4.0 8.0 10.0
Drop rows/columns based on threshold:
A B C
0 1.0 5.0 NaN
2 3.0 NaN 9.0
3 4.0 8.0 10.0
Replace NaN with the previous value:
A B C
0 1.0 5.0 NaN
1 1.0 6.0 NaN
2 3.0 6.0 9.0
3 4.0 8.0 10.0
Replace NaN with the previous value, limit=1:
A B C
0 1.0 5.0 NaN
1 1.0 6.0 NaN
2 3.0 6.0 9.0
3 4.0 8.0 10.0
Replace NaN with the forward value:
A B C
0 1.0 5.0 9.0
1 3.0 6.0 9.0
2 3.0 8.0 9.0
3 4.0 8.0 10.0
import pandas as pd
fruit = { 'orange' : [3,2,0,1], 'apple' : [0,3,7,2], 'grapes' : [7,14,6,15] }
df1 = [Link](fruit)
df1
orange apple grapes
0 3 0 7
1 2 3 14
2 0 7 6
3 1 2 15
Next steps: Generate code with df1
toggle_off View recommended plots New interactive sheet
fruit = { 'grapes' : [13,12,10,2,55,98], 'mango' : [10,13,17,2,9,76], 'banana' : [20,23,27,4,[Link],[Link]]} # Added [Link]
df2 = [Link](fruit)
df2
grapes mango banana
0 13 10 20.0
1 12 13 23.0
2 10 17 27.0
3 2 2 4.0
4 55 9 NaN
5 98 76 NaN
Next steps: Generate code with df2
toggle_off View recommended plots New interactive sheet
df2 = [Link]([Link][2])
df2
grapes mango banana
0 13 10 20.0
1 12 13 23.0
3 2 2 4.0
4 55 9 NaN
5 98 76 NaN
Next steps: Generate code with df2
toggle_off View recommended plots New interactive sheet
[Link]((df1, df2), axis = 0)
orange apple grapes mango banana
0 3.0 0.0 7 NaN NaN
1 2.0 3.0 14 NaN NaN
2 0.0 7.0 6 NaN NaN
3 1.0 2.0 15 NaN NaN
0 NaN NaN 13 10.0 20.0
1 NaN NaN 12 13.0 23.0
3 NaN NaN 2 2.0 4.0
4 NaN NaN 55 9.0 NaN
5 NaN NaN 98 76.0 NaN
df1
orange apple grapes
0 3 0 7
1 2 3 14
2 0 7 6
3 1 2 15
Next steps: Generate code with df1
toggle_off View recommended plots New interactive sheet
[Link]([df1, df2], ignore_index=True)
orange apple grapes mango banana
0 3.0 0.0 7 NaN NaN
1 2.0 3.0 14 NaN NaN
2 0.0 7.0 6 NaN NaN
3 1.0 2.0 15 NaN NaN
4 NaN NaN 13 10.0 20.0
5 NaN NaN 12 13.0 23.0
6 NaN NaN 2 2.0 4.0
7 NaN NaN 55 9.0 NaN
8 NaN NaN 98 76.0 NaN
%%time
df = [Link](columns=['A'])
for i in range(30):
# Instead of append, use concat to add rows
df = [Link]([df, [Link]([{'A': i*2}])], ignore_index=True)
CPU times: user 17.4 ms, sys: 0 ns, total: 17.4 ms
Wall time: 16.7 ms
%%time
df = [Link]([[Link]([i*2], columns=['A']) for i in range(30)], ignore_index=True)
CPU times: user 11.4 ms, sys: 1.04 ms, total: 12.5 ms
Wall time: 39.6 ms
Start coding or generate with AI.