Project 5 E-commerce Return Rate Reduction Analysis
Data Cleaning by Using SQL
Step 1: Firstly, I Create a database by using command
Create database Ecommerce_Data;
Step 2: populate database for using purpose by using “use command”
use Ecommerce_data;
step 3: Create table by using “Create command”
Create table data_new(Order_ID VARCHAR(50), Product_ID VARCHAR(50), User_ID VARCHAR(50),
Order_Date date, Return_Date date, Product_Category VARCHAR(50),
Product_Price FLOAT, Order_Quantity INT, Return_Reason VARCHAR(50), Return_Status VARCHAR(50),
Days_to_Return int, User_Age int, User_Gender VARCHAR(50), User_Location VARCHAR(50),
Payment_Method VARCHAR(50), Shipping_Method VARCHAR(50), Discount_Applied float
);
select * from data_new;
Step 4: Import the dataset by using “Imort data” tool in the sql
Step 5: Run the select query to fetch the data
select * from data_new;
Step 6: Then we use multiple select query to count the number of null that are present in the column
-- Data imported By CSV file
SELECT COUNT(*) FROM Data_new WHERE Order_ID IS NULL;
SELECT COUNT(*) FROM Data_new WHERE product_id IS NULL;
SELECT COUNT(*) FROM Data_new WHERE user_id IS NULL;
SELECT COUNT(*) FROM Data_new WHERE order_date IS NULL;
SELECT COUNT(*) FROM Data_new WHERE return_date IS NULL;
SELECT COUNT(*) FROM Data_new WHERE product_category IS NULL;
SELECT COUNT(*) FROM Data_new WHERE product_price IS NULL;
SELECT COUNT(*) FROM Data_new WHERE order_quantity IS NULL;
SELECT COUNT(*) FROM Data_new WHERE return_reason IS NULL;
SELECT COUNT(*) FROM Data_new WHERE return_status IS NULL;
SELECT COUNT(*) FROM Data_new WHERE days_to_return IS NULL;
SELECT COUNT(*) FROM Data_new WHERE user_age IS NULL;
SELECT COUNT(*) FROM Data_new WHERE user_gender IS NULL;
SELECT COUNT(*) FROM Data_new WHERE user_location IS NULL;
SELECT COUNT(*) FROM Data_new WHERE payment_method IS NULL;
SELECT COUNT(*) FROM Data_new WHERE shipping_method IS NULL;
SELECT COUNT(*) FROM Data_new WHERE discount_method IS NULL;
Step 8: Delete null values if it present
-- Delete null cell row
Delete FROM Data_new WHERE Order_ID IS NULL;
Delete FROM Data_new WHERE product_id IS NULL;
Delete FROM Data_new WHERE user_id IS NULL;
Delete FROM Data_new WHERE order_date IS NULL;
Delete FROM Data_new WHERE return_date IS NULL;
Delete FROM Data_new WHERE product_category IS NULL;
Delete FROM Data_new WHERE product_price IS NULL;
Delete FROM Data_new WHERE order_quantity IS NULL;
Delete FROM Data_new WHERE return_reason IS NULL;
Delete FROM Data_new WHERE return_status IS NULL;
Delete FROM Data_new WHERE days_to_return IS NULL;
Delete FROM Data_new WHERE user_age IS NULL;
Delete FROM Data_new WHERE user_gender IS NULL;
Delete FROM Data_new WHERE user_location IS NULL;
Delete FROM Data_new WHERE payment_method IS NULL;
Delete FROM Data_new WHERE shipping_method IS NULL;
Delete FROM Data_new WHERE discount_method IS NULL;
Step 9: -- Check for duplicates based on order_id
SELECT order_id, COUNT(*)
FROM data_new
GROUP BY order_id
HAVING COUNT(*) > 1;
Step 10 : -- Delete duplicates based on order_id
DELETE FROM data_new
WHERE id NOT IN (
SELECT MIN(id)
FROM data_new
GROUP BY order_id);
Step 11: -- Negative price check
SELECT * FROM data_new WHERE product_price < 0;
Step 12: delete data for < 0