Q1: Create a tabular structure for user profiles
python
import pandas as pd
Dataset
users = [
{"user_id": 1, "name": "Alice", "email": "[email protected]", "signup_date":
"2024-01-01"},
{"user_id": 2, "name": "Bob", "email": "[email protected]", "signup_date": "2024-
01-15"},
{"user_id": 3, "name": "Charlie", "email": "[email protected]",
"signup_date": "2024-02-01"},
]
Code
df = pd.DataFrame(users)
df['signup_date'] = pd.to_datetime(df['signup_date'])
print(df)
print(df.dtypes)
---
Q2: Extract first 5 transactions
python
import pandas as pd
Dataset
sales_data = [
{"transaction_id": 101, "customer_id": 1, "product_name": "iPhone 14",
"purchase_amount": 999},
{"transaction_id": 102, "customer_id": 2, "product_name": "Galaxy S22",
"purchase_amount": 899},
{"transaction_id": 103, "customer_id": 3, "product_name": "Pixel 6",
"purchase_amount": 799},
{"transaction_id": 104, "customer_id": 4, "product_name": "iPhone 14",
"purchase_amount": 999},
{"transaction_id": 105, "customer_id": 5, "product_name": "OnePlus",
"purchase_amount": 749},
{"transaction_id": 106, "customer_id": 6, "product_name": "iPhone 14",
"purchase_amount": 999},
]
Code
sales_df = pd.DataFrame(sales_data)
print(sales_df.head())
print(sales_df.dtypes)
---
Q3: Display last 5 employee entries
python
import pandas as pd
Dataset
employee_df = pd.DataFrame({
"employee_id": [101, 102, 103, 104, 105, 106],
"name": ["John", "Emma", "Liam", "Olivia", "Noah", "Ava"],
"salary": [90000, 95000, 100000, 97000, 110000, 120000]
})
Code
print(employee_df.tail())
print(employee_df.dtypes)
---
Q4: Check for missing values in feedback
python
import pandas as pd
Dataset
feedback_df = pd.DataFrame({
"customer_id": [1, 2, 3, 4],
"customer_feedback": ["Great", None, "", "Loved it"]
})
Code
print(feedback_df['customer_feedback'].isnull().sum())
print(feedback_df.dtypes)
---
Q5: Check data types for ID and date
python
import pandas as pd
Dataset reused from Q1
df = pd.DataFrame([
{"user_id": 1, "name": "Alice", "email": "
[email protected]", "signup_date":
"2024-01-01"},
])
df['signup_date'] = pd.to_datetime(df['signup_date'])
Code
print(df.dtypes)
---
Q6: Product rating count and shape
python
import pandas as pd
Dataset
ratings_df = pd.DataFrame({
"product_id": [1, 2, 3, 4, 5],
"rating": [5, 4, None, 3, 5]
})
Code
print("Rated products:", ratings_df['rating'].notnull().sum())
print("Shape:", ratings_df.shape)
print(ratings_df.dtypes)
---
Q7: Extract customer ID and amount
python
import pandas as pd
Dataset
sales_df = pd.DataFrame({
"customer_id": [1, 2, 3],
"purchase_amount": [999, 899, 799],
"product": ["iPhone", "Samsung", "Pixel"]
})
Code
print(sales_df[['customer_id', 'purchase_amount']])
---
Q8: Filter iPhone 14 sales
python
import pandas as pd
Dataset
sales_df = pd.DataFrame({
"product_name": ["iPhone 14", "Samsung", "iPhone 14", "Pixel"],
"purchase_amount": [999, 899, 999, 799]
})
Code
print(sales_df[sales_df['product_name'] == "iPhone 14"])
---
Q9: Value at 3rd row and 2nd column
python
import pandas as pd
Dataset
marketing_df = pd.DataFrame({
"campaign_id": [1, 2, 3],
"channel": ["Email", "Social Media", "SEO"],
"budget": [500, 1000, 750]
})
Code
print(marketing_df.iloc[2, 1]) 3rd row, 2nd column
---
Q10: Random feedback row
python
import pandas as pd
Dataset
feedback_df = pd.DataFrame({
"review_id": [1, 2, 3],
"review": ["Excellent", "Poor", "Average"]
})
Code
print(feedback_df.sample(1))
---
Q11: Sort orders by delivery date
python
import pandas as pd
Dataset
orders_df = pd.DataFrame({
"order_id": [101, 102, 103],
"delivery_date": ["2024-03-01", "2024-02-15", "2024-03-10"]
})
orders_df["delivery_date"] = pd.to_datetime(orders_df["delivery_date"])
Code
print(orders_df.sort_values("delivery_date"))
---
Q12: Show index values
python
import pandas as pd
Dataset
df = pd.DataFrame({
"week": ["W1", "W2", "W3"],
"sales": [1000, 1500, 2000]
})
Code
print(df.index)
---
Q13: Memory usage of each column
python
import pandas as pd
Dataset
df = pd.DataFrame({
"A": [1, 2, 3],
"B": ["x", "y", "z"]
})
Code
print(df.memory_usage(deep=True))
---
Q14: Check duplicate records
python
import pandas as pd
Dataset
df = pd.DataFrame({
"customer_id": [1, 1, 2],
"name": ["Amy", "Amy", "Bob"]
})
Code
print(df.duplicated().sum())
---
Q15: Remove rows with missing income or credit score
python
import pandas as pd
Dataset
df = pd.DataFrame({
"id": [1, 2, 3],
"income": [50000, None, 60000],
"credit_score": [700, 650, None]
})
Code
print(df.dropna(subset=["income", "credit_score"]))
---
Q16: Replace negative profit with 0
python
import pandas as pd
Dataset
df = pd.DataFrame({
"item": ["A", "B", "C"],
"profit": [100, -20, 200]
})
Code
df["profit"] = df["profit"].apply(lambda x: max(x, 0))
print(df)
---
Q17: Replace 'Not Available' with 'Unknown'
python
import pandas as pd
Dataset
df = pd.DataFrame({
"Location": ["Delhi", "Not Available", "Mumbai"]
})
Code
df["Location"] = df["Location"].replace("Not Available", "Unknown")
print(df)
---
Q18: Filter NY customers with purchase > 500
python
import pandas as pd
Dataset
df = pd.DataFrame({
"customer": ["A", "B", "C"],
"location": ["NY", "LA", "NY"],
"purchase": [600, 300, 800]
})
Code
print(df[(df["location"] == "NY") & (df["purchase"] > 500)])
---
Q19: Drop duplicate transactions
python
import pandas as pd
Dataset
df = pd.DataFrame({
"customer_id": [1, 1, 2],
"product": ["Laptop", "Laptop", "Tablet"]
})
Code
print(df.drop_duplicates())
---
Q20: Employees earning above 100k
python
import pandas as pd
Dataset
df = pd.DataFrame({
"name": ["Amy", "Ben", "Clara"],
"salary": [95000, 110000, 120000]
})
Code
print(df[df["salary"] > 100000][["name", "salary"]])