import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
# -----------------------------
# Step 1: Create a dataset
# -----------------------------
data = {
'Ad_Budget': [10, 15, 20, 25, 30, 35, 40, 45, 50, 60], # in ₹000s
'Sales': [25, 30, 35, 45, 48, 52, 60, 63, 67, 80] # in ₹000s
df = pd.DataFrame(data)
# Convert Sales into binary classification: 1 if Sales >= 50, else 0
df['High_Sales'] = df['Sales'].apply(lambda x: 1 if x >= 50 else 0)
# -----------------------------
# Step 2: Prepare features and target
# -----------------------------
X = df[['Ad_Budget']] # Feature
y = df['High_Sales'] # Target (binary)
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# -----------------------------
# Step 3: Train Logistic Regression model
# -----------------------------
model = LogisticRegression()
model.fit(X_train, y_train)
# -----------------------------
# Step 4: Predict
# -----------------------------
y_pred = model.predict(X_test)
# -----------------------------
# Step 5: Evaluate the model
# -----------------------------
print(" Accuracy Score:", accuracy_score(y_test, y_pred))
print("\n Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\n Classification Report:\n", classification_report(y_test, y_pred))
# -----------------------------
# Step 6: Predict new sample
# -----------------------------
new_budget = [[55]]
prediction = model.predict(new_budget)
print(f"\n Predicted class for ₹{new_budget[0][0]}000 ad budget: {'High Sales' if prediction[0]==1 else
'Low Sales'}")
# -----------------------------
# Step 7: Visualize
# -----------------------------
plt.scatter(df['Ad_Budget'], df['High_Sales'], color='blue', label='Actual')
plt.plot(df['Ad_Budget'], model.predict_proba(df[['Ad_Budget']])[:,1], color='red', label='Predicted
Probability')
plt.title('Logistic Regression: Ad Budget vs High Sales')
plt.xlabel('Advertising Budget (₹000s)')
plt.ylabel('Probability of High Sales')
plt.legend()
plt.grid()
plt.show()