# Union and intersection of two list #Matrix Multiplication
def union_intersection(lst1, lst2): def multiply_matrices(matrix1, matrix2):
rows1 = len(matrix1)
union = list(set(lst1) | set(lst2))
cols1 = len(matrix1[0])
intersection = list(set(lst1) & set(lst2))
return union, intersection rows2 = len(matrix2)
cols2 = len(matrix2[0])
if cols1 != rows2:
# Test with numeric lists
return "Matrix multiplication not possible. The number of columns in the first matrix must be equal to
nums1 = [1, 2, 3, 4, 5]
the number of rows in the second matrix."
nums2 = [3, 4, 5, 6, 7, 8]
print("Original lists:")
result = [[0 for _ in range(cols2)] for _ in range(rows1)]
print(nums1)
for i in range(rows1):
print(nums2)
for j in range(cols2):
result = union_intersection(nums1, nums2)
for k in range(cols1):
print("\nUnion of said two lists:")
result[i][j] += matrix1[i][k] * matrix2[k][j]
print(result[0])
return result
print("\nIntersection of said two lists:")
matrix1 = [
print(result[1])
[1, 2, 3],
[4, 5, 6]
# Test with color lists
]
colors1 = ["Red", "Green", "Blue"]
matrix2 = [
colors2 = ["Red", "White", "Pink", "Black"]
[7, 8],
print("\nOriginal lists:")
[9, 10],
print(colors1)
[11, 12]
print(colors2)
]
result = union_intersection(colors1, colors2)
result_matrix = multiply_matrices(matrix1, matrix2)
print("\nUnion of said two lists:")
if isinstance(result_matrix, str):
print(result[0])
print(result_matrix)
print("\nIntersection of said two lists:")
else:
print(result[1])
for row in result_matrix:
#Occurrence of words in a given sentence print(row)
def word_count(str):
counts = dict() #Frequent words in a text file
words = str.split() file = open("gfg.txt", "r")
frequent_word = ""
frequency = 0
for word in words:
if word in counts: words = []
counts[word] += 1 for line in file:
else: line_word = line.lower().replace(',', '').replace('.', '').split(" ")
counts[word] = 1 for w in line_word:
return counts words.append(w)
for i in range(len(words)):
print(word_count('the quick brown fox jumps over the lazy dog.')) count = 1
for j in range(i + 1, len(words)):
if words[i] == words[j]:
count += 1
if count > frequency:
frequency = count
frequent_word = words[i]
print("Most repeated word: " + frequent_word)
print("Frequency: " + str(frequency))
file.close()
# Single Linear Regression #Polynomial Regression
import pandas as pd
import pandas as pd import numpy as np
import numpy as np from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
data = pd.read_csv('training_data.csv') # Load the dataset
data = pd.read_csv('training_data.csv') X = data['X'].values.reshape(-1, 1)
X = data['X'].values.reshape(-1, 1) Y = data['Y'].values
Y = data['Y'].values # Transform the data for polynomial regression
model = LinearRegression() # Initialize and fit the model poly = PolynomialFeatures(degree=2) # You can adjust the degree
model.fit(X, Y) X_poly = poly.fit_transform(X)
Y_pred = model.predict(X) # Make predictions model = LinearRegression() # Fit the model
mse = mean_squared_error(Y, Y_pred) # Calculate performance metrics model.fit(X_poly, Y)
r2 = r2_score(Y, Y_pred) Y_pred = model.predict(X_poly) # Make predictions
plt.scatter(X, Y, label='Data') # Plot the data and the regression line X_sort, Y_sort = zip(*sorted(zip(X, Y_pred))) # Sort X and Y_pred for plotting
plt.plot(X, Y_pred, color='red', label='Regression Line') mse = mean_squared_error(Y, Y_pred) # Calculate performance metrics
plt.xlabel('X') r2 = r2_score(Y, Y_pred)
plt.ylabel('Y') # Plot the data and polynomial regression line
plt.title('Single Linear Regression') plt.scatter(X, Y, label='Data')
plt.legend() plt.plot(X_sort, Y_sort, color='red', label='Polynomial Regression')
plt.show() plt.xlabel('X')
print(f"Mean Squared Error: {mse}") # Print performance metrics plt.ylabel('Y')
print(f"R-squared (Accuracy): {r2}") plt.title('Polynomial Regression')
plt.legend()
#Multivariable Regression plt.show()
import pandas as pd print(f"Mean Squared Error: {mse}") # Print performance metrics
import numpy as np print(f"R-squared (Accuracy): {r2}")
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
#Naive Bayes
data = pd.read_csv('training_data.csv') # Load the dataset
X = data.drop('Y', axis=1) from sklearn.model_selection import train_test_split
Y = data['Y'] from sklearn.naive_bayes import GaussianNB
model = LinearRegression() # Initialize and fit the model from sklearn.metrics import accuracy_score, precision_score, recall_score
model.fit(X, Y) from sklearn.datasets import load_iris
Y_pred = model.predict(X) # Make predictions iris = load_iris() # Load the dataset
mse = mean_squared_error(Y, Y_pred) # Calculate performance metrics X = iris.data
r2 = r2_score(Y, Y_pred) y = iris.target
# Split the data
# Print performance metrics X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(f"Mean Squared Error: {mse}") nb_classifier = GaussianNB() # Initialize and fit the model
print(f"R-squared (Accuracy): {r2}") nb_classifier.fit(X_train, y_train)
y_pred = nb_classifier.predict(X_test) # Make predictions
accuracy = accuracy_score(y_test, y_pred) # Calculate performance metrics
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
# Print performance metrics
print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
#Decision Trees
import pandas as pd
import matplotlib.pyplot as plt #K-Means Clustering
import seaborn as sns
from sklearn.model_selection import train_test_split import pandas as pd
from sklearn.preprocessing import LabelEncoder import numpy as np
from sklearn.tree import DecisionTreeClassifier import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay import matplotlib.pyplot as plt
import matplotlib.cm as cm
# Load the dataset from sklearn.datasets import load_iris
colnames = ['Buying_price', 'maint_cost', 'doors', 'persons', 'lug_boot', 'safety', 'decision'] from sklearn.cluster import KMeans
data = pd.read_csv('car_evaluation.csv', names=colnames, header=None)
plt.figure(figsize=(5, 5)) # Plot the distribution of the 'decision' column # Load the dataset
sns.countplot(x='decision', data=data) X, y = load_iris(return_X_y=True)
plt.title('Count plot for decision') # Initialize and fit KMeans
data.decision.replace('vgood', 'acc', inplace=True) # Simplify the categories in 'decision' kmeans = KMeans(n_clusters=3, random_state=2)
data.decision.replace('good', 'acc', inplace=True) kmeans.fit(X)
new_data = data.apply(LabelEncoder().fit_transform) # Encode categorical features pred = kmeans.predict(X)
x = new_data.drop(['decision'], axis=1) # Separate features and target # Plot the results
y = new_data['decision'] plt.figure(figsize=(12, 5))
# Split the data # Plot for the first two features
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42) plt.subplot(1, 2, 1)
dt = DecisionTreeClassifier(criterion="entropy") # Initialize and fit the Decision Tree model plt.scatter(X[:, 0], X[:, 1], c=pred, cmap=cm.Accent)
dt.fit(x_train, y_train) plt.grid(True)
dt_pred = dt.predict(x_test) # Make predictions # Plot cluster centers
cm = confusion_matrix(y_test, dt_pred) # Display the confusion matrix for center in kmeans.cluster_centers_:
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm) center = center[:2]
cm_display.plot() plt.scatter(center[0], center[1], marker='^', c='red')
plt.show()
plt.xlabel("Petal Length (cm)")
plt.ylabel("Petal Width (cm)")
# Plot for the last two features
#Support Vector Machine plt.subplot(1, 2, 2)
plt.scatter(X[:, 2], X[:, 3], c=pred, cmap=cm.Accent)
import numpy as np plt.grid(True)
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split # Plot cluster centers
from sklearn.svm import SVC for center in kmeans.cluster_centers_:
from sklearn.metrics import confusion_matrix, accuracy_score center = center[2:4]
plt.scatter(center[0], center[1], marker='^', c='red')
dataset = load_digits() # Load the dataset
x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.30, plt.xlabel("Sepal Length (cm)")
random_state=4) plt.ylabel("Sepal Width (cm)")
classifier = SVC(kernel="linear") # Initialize and train the SVM classifier plt.show()
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test) # Make predictions
accuracy = accuracy_score(y_test, y_pred) * 100 # Calculate accuracy and confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Accuracy for SVM is:", accuracy) # Print the results
print("Confusion Matrix:")
print(confusion_mat)