1.
Write a python program to compute Central Tendency Measures: Mean, Median, Mode Measure of
Dispersion: Variance, Standard Deviation
Code :
import statistics
n = int(input("Enter the number of elements: "))
data = []
for i in range(n):
num = float(input(f"Enter number {i + 1}: "))
data.append(num)
mean = statistics.mean(data)
median = statistics.median(data)
mode = statistics.mode(data)
variance = statistics.variance(data)
std_deviation = statistics.stdev(data)
print("Mean:", mean)
print("Median:", median)
print("Mode:", mode)
print("Variance:", variance)
print("Standard Deviation:", std_deviation)
OUTPUT:
2. Study of Python Basic Libraries such as Statistics, Math, Numpy and Scipy
CODE:
import statistics as st, math, numpy as np
from scipy import stats
data1 = [10, 20, 20, 30, 40]
print("STATISTICS MODULE")
print("Mean:", st.mean(data1))
print("Median:", st.median(data1))
print("Mode:", st.mode(data1))
print("Variance:", st.variance(data1))
print("Standard Deviation:", st.stdev(data1))
num1, num2, base, exp, log_val = 25, 5, 2, 3, 100
print("\nMATH MODULE")
print("Square Root:", math.sqrt(num1))
print("Factorial:", math.factorial(num2))
print("Power:", math.pow(base, exp))
print("Log base 10:", math.log10(log_val))
data2 = np.array([1, 2, 3, 4, 5])
print("\nNUMPY MODULE")
print("Mean:", np.mean(data2))
print("Median:", np.median(data2))
print("Standard Deviation:", np.std(data2))
print("Variance:", np.var(data2))
print("Sorted Array:", np.sort(data2))
data3 = np.array([5, 10, 10, 15, 20])
mode_result = stats.mode(data3)
print("Mode:", mode_result.mode[0])
print("Mode Count:", mode_result.count[0])
output:
3 Study of Python Libraries for ML application such as Pandas and Matplotlib
Code
import pandas as pd
import matplotlib.pyplot as plt
print("PANDAS MODULE")
data = {
'Name': ['Alice', 'Bob', 'Charlie', 'David'],
'Marks': [85, 90, 78, 92]
df = pd.DataFrame(data)
print(df)
print("Describe:\n", df.describe())
print("Names:\n", df['Name'])
print("Marks Mean:", df['Marks'].mean())
print("\nMATPLOTLIB MODULE")
x = [1, 2, 3, 4]
y = [10, 20, 25, 30]
plt.plot(x, y, marker='o')
plt.title("Simple Line Plot")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")
plt.grid(True)
plt.show()
OUTPUT
. 4. Write a Python program to implement Simple Linear Regression
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
n = int(input("Enter the number of data points: "))
x = []
y = []
print("Enter values for X:")
for i in range(n):
val = float(input(f"X[{i+1}]: "))
x.append(val)
print("Enter corresponding values for Y:")
for i in range(n):
val = float(input(f"Y[{i+1}]: "))
y.append(val)
x = np.array(x).reshape(-1, 1)
y = np.array(y)
model = LinearRegression()
model.fit(x, y)
slope = model.coef_[0]
intercept = model.intercept_
print("\nSimple Linear Regression Model")
print("Slope (m):", slope)
print("Intercept (c):", intercept)
y_pred = model.predict(x)
print("\nPredicted Y values:")
for i in range(n):
print(f"For X = {x[i][0]}, Predicted Y = {y_pred[i]}")
plt.scatter(x, y, color='blue', label='Actual Data')
plt.plot(x, y_pred, color='red', label='Regression Line')
plt.title('Simple Linear Regression')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.grid(True)
plt.show()
OUTPUT
Enter the number of data points: 5
Enter values for X:
X[1]: 1
X[2]: 2
X[3]: 3
X[4]: 4
X[5]: 5
Enter corresponding values for Y:
Y[1]: 2
Y[2]: 4
Y[3]: 6
Y[4]: 8
Y[5]: 10
Simple Linear Regression Model
Slope (m): 2.0
Intercept (c): 0.0
Predicted Y values:
For X = 1.0, Predicted Y = 2.0
For X = 2.0, Predicted Y = 4.0
For X = 3.0, Predicted Y = 6.0
For X = 4.0, Predicted Y = 8.0
For X = 5.0, Predicted Y = 10.0
3. Implementation of Multiple Linear Regression for House Price Prediction using sklearn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
x_train = np.array([
[1400, 3, 2],
[1600, 3, 2],
[1700, 3, 3],
[1875, 4, 4],
[1100, 2, 1],
[1550, 3, 3],
[2350, 4, 3],
[2450, 4, 4],
[1425, 3, 2],
[1700, 3, 3]
])
y_train = np.array([245000, 312000, 279000, 308000, 199000, 219000, 405000, 324000, 319000,
255000])
model = LinearRegression()
model.fit(x_train, y_train)
coefficients = model.coef_
intercept = model.intercept_
print("\nMultiple Linear Regression Model for House Price Prediction")
print("Coefficients:", coefficients)
print("Intercept:", intercept)
print("\nEnter features for new house (size in sqft, number of bedrooms, and age of the house):")
new_features = list(map(float, input("Enter size, bedrooms, and age separated by space: ").split()))
predicted_price = model.predict([new_features])
print(f"Predicted Price for new house: ${predicted_price[0]:,.2f}")
predicted_prices_train = model.predict(x_train)
plt.figure(figsize=(10, 6))
plt.scatter(range(1, len(y_train) + 1), y_train, color='blue', label='Actual Prices', marker='o')
plt.scatter(range(1, len(y_train) + 1), predicted_prices_train, color='red', label='Predicted Prices',
marker='x')
plt.title('Actual vs Predicted House Prices (Training Data)')
plt.xlabel('House Number')
plt.ylabel('Price ($)')
plt.legend()
plt.grid(True)
plt.show()
OUTPUT
Multiple Linear Regression Model for House Price Prediction
Coefficients: [ 7.02856369e+01 1.05581291e+05 -5.77946392e+04]
Intercept: -15854.474075709237
Enter features for new house (size in sqft, number of bedrooms, and age of
the house):
Enter size, bedrooms, and age separated by space: 1500 2 4
Predicted Price for new house: $69,558.01
6. Implementation of Decision tree using sklearn and its parameter tuning
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
X, y = make_regression(n_samples=100, n_features=1, noise=0.1, random_state=42)
model = DecisionTreeRegressor(random_state=42)
param_grid = {
'max_depth': [3, 5, 10, 20, None],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4]
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_search.fit(X, y)
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print("\nBest Parameters:", best_params)
y_pred = best_model.predict(X)
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='blue', label='Actual Data', marker='o')
plt.plot(X, y_pred, color='red', label='Predicted Data')
plt.title('Decision Tree Regression - Actual vs Predicted')
plt.xlabel('Feature')
plt.ylabel('Target')
plt.legend()
plt.grid(True)
plt.show()
output:
7. Implementation of KNN using sklearn
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
n = int(input("Enter the number of data points: "))
X = []
y = []
print("\nEnter the feature values (one feature per data point):")
for i in range(n):
feature = float(input(f"Feature value for data point {i+1}: "))
X.append([feature])
print("\nEnter the target values:")
for i in range(n):
target = float(input(f"Target value for data point {i+1}: "))
y.append(target)
X = np.array(X)
y = np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
k = int(input("\nEnter the number of neighbors (k) for KNN: "))
model = KNeighborsRegressor(n_neighbors=k)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("\nMean Squared Error:", mse)
plt.figure(figsize=(10, 6))
plt.scatter(X_test, y_test, color='blue', label='Actual Data', marker='o')
plt.scatter(X_test, y_pred, color='red', label='Predicted Data', marker='x')
plt.title('KNN Regression - Actual vs Predicted')
plt.xlabel('Feature')
plt.ylabel('Target')
plt.legend()
plt.grid(True)
plt.show()
output
8. Implementation of Logistic Regression using sklearn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
n = int(input("Enter number of data points: "))
X = []
y = []
print("\nEnter feature values:")
for i in range(n):
X.append(float(input(f"Feature {i+1}: ")))
print("\nEnter target values (0 or 1):")
for i in range(n):
y.append(int(input(f"Target {i+1}: ")))
X = np.array(X).reshape(-1, 1)
y = np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)
x_min = min(X)[0] - 1
x_max = max(X)[0] + 1
x_range = np.linspace(x_min, x_max, 300).reshape(-1, 1)
y_prob = model.predict_proba(x_range)[:, 1]
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='blue', label='Actual Data', marker='o')
plt.plot(x_range, y_prob, color='red', label='Logistic Regression Curve')
plt.title('Logistic Regression using sklearn')
plt.xlabel('Feature')
plt.ylabel('Probability')
plt.legend()
plt.grid(True)
plt.show()
9.Implementation of K-Means Implementation of K-Means Clustering
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
n = int(input("Enter number of data points: "))
X = []
print("\nEnter coordinates for each point (x y):")
for i in range(n):
point = list(map(float, input(f"Point {i+1}: ").split()))
X.append(point)
X = np.array(X)
k = int(input("\nEnter number of clusters (k): "))
model = KMeans(n_clusters=k, random_state=42)
model.fit(X)
labels = model.labels_
centroids = model.cluster_centers_
print("\nCluster Centers:")
for idx, center in enumerate(centroids):
print(f"Cluster {idx+1}: {center}")
colors = ['red', 'blue', 'green', 'purple', 'orange', 'cyan', 'magenta', 'yellow']
plt.figure(figsize=(10, 6))
for i in range(k):
cluster_points = X[labels == i]
plt.scatter(cluster_points[:, 0], cluster_points[:, 1], color=colors[i % len(colors)], label=f'Cluster {i+1}')
plt.scatter(centroids[:, 0], centroids[:, 1], color='black', marker='x', s=200, label='Centroids')
plt.title('K-Means Clustering')
plt.xlabel('X Coordinate')
plt.ylabel('Y Coordinate')
plt.legend()
plt.grid(True)
plt.show()
OUTPUT