Slip:1
Q1
v1<-c(1L,2L,3L,4L,5L)
v2<-c(10L,20L,30L,40L,50L)
v1+v2
v1-v2
v1*v2
v1/v2
Q2
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
df
=pd.read_csv('/Users/TusharDighe/Desktop/DMSLIPS/studentscores.csv')
X = df[['Hours']]
y = df['Scores']
model = LinearRegression().fit(X, y)
y_pred = model.predict(X)
mae = mean_absolute_error(y, y_pred)
mse = mean_squared_error(y, y_pred)
rmse = np.sqrt(mse)
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
Slip:2
Q1
t_function<-function(n)
{
i=c(1:10)
for(x in i)
{
print(n*x)
}
}
t_function(10) //function calling….!
Q2
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
# Generating a synthetic dataset with 2 features and 3 centers (clusters)
X, y = make_blobs(n_samples=300, centers=3, random_state=42,
cluster_std=1.0)
# Plotting the synthetic dataset (before clustering)
plt.scatter(X[:, 0], X[:, 1], s=50, c='gray', label='Data points')
plt.title("Synthetic Dataset (Before Clustering)")
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.show()
# Implementing the K-Means algorithm
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X) # Fit the model on the data
# Getting the cluster centers and labels
centers = kmeans.cluster_centers_ # Coordinates of cluster centers
labels = kmeans.labels_ # Predicted labels for each point
# Plotting the clustered dataset
plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='viridis') # Plot data points
colored by cluster
plt.scatter(centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.75,
marker='X', label='Cluster Centers') # Plot cluster centers
plt.title("K-Means Clustering")
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.show()
Slip:3
Q1
x<-c(3,7,2,7)
rev(x)
sum(x)
Q2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
x = np.array([[0],[1],[2],[3],[4],[5],[6],[7],[8],[9],[11],[13]])
y = np.array([1,3,2,5,7,8,8,9,10,12,16,18])
model = LinearRegression()
model.fit(x, y)
b0, b1 = model.intercept_, model.coef_[0]
print('Estimated coefficients:\nb0 = {} \nb1 = {}'.format(b0, b1))
plt.scatter(x, y, color='blue', marker='*')
y_predicted = b0 + b1 * x
plt.plot(x, y_predicted, color='green')
plt.xlabel('size')
plt.ylabel('cost')
plt.show()
Slip:4
Q1
mat1<-matrix(c(1,2,3,4,5,6),nrow=3,ncol=2,byrow=TRUE)
mat2<-matrix(c(3,4,5,6,7,8),nrow=3,ncol=2,byrow=TRUE)
add=mat1+mat2
print(add)
Q2
import numpy as np
import matplotlib.pyplot as plt
weather=['Sunny','Sunny','Overcast','Rainy','Rainy','Rainy','Overcast','Sunny
','Sunny','Rainy','Sunny',
'Overcast','Overcast','Rainy']
temp=['Hot','Hot','Hot','Mild','Cool','Cool','Cool','Mild','Cool','Mild','Mild','Mild','
Hot','Mild']
play=['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','Yes','Yes','No
']
from sklearn import preprocessing
le=preprocessing.LabelEncoder()
w=le.fit_transform(weather)
t=le.fit_transform(temp)
p=le.fit_transform(play)
features=list(zip(w,t))
from sklearn.naive_bayes import GaussianNB
model=GaussianNB()
model.fit(features,p)
predicted=model.predict([[0,2]])
print("predicted value:",predicted)
Slip:5
Q1
fact1=factor(1:5)
fact2=factor(5:10)
concat=unlist(list(fact1,fact2))
print(concat)
Q2
import numpy as np
import pandas as pd
from sklearn import tree
import matplotlib.pyplot as plt
dataset =
pd.read_csv("/Users/Tushardighe/Desktop/DMSLIPS/diabetes.csv")
# Select features (all except the 'Outcome' column) and target (the
'Outcome' column)
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
# Train the decision tree model
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf = clf.fit(X, y)
# Plot the decision tree
plt.figure(figsize=(15,10)) # Adjust the plot size for better visualization
tree.plot_tree(clf, filled=True)
plt.show()
Slip:6
Q1
vector1 <- c(1, 2, 3, 4, 5, 2, 10,12,3727)
vector2 <- c(7, 8, 9, 10, 11, 12, 8, 9,3727)
df <- data.frame(Column1 = vector1, Column2 = vector2)
print("duplicates elements")
print(df)
duplicates<-intersect(vecotr1,vector2)
print(duplicates)
Q2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster.hierarchy import fcluster
from sklearn.preprocessing import StandardScaler
df =
pd.read_csv('/Users/Tushardighe/Desktop/DMSLIPS/Mall_Customers.csv')
print("First few rows of the dataset:")
print(df.head())
# Select features for clustering (Annual Income and Spending Score)
X = df[['Annual Income (k$)', 'Spending Score (1-100)']].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Perform hierarchical clustering using 'ward' method
linked = linkage(X_scaled, method='ward')
# Plot the dendrogram
plt.figure(figsize=(10, 7))
dendrogram(linked,
orientation='top',
distance_sort='descending',
show_leaf_counts=True)
plt.title('Dendrogram for Mall Customers')
plt.xlabel('Sample Index or Cluster Size')
plt.ylabel('Distance')
plt.show()
# Set the number of clusters
max_clusters = 5
clusters = fcluster(linked, max_clusters, criterion='maxclust')
# Add the cluster assignments to the original dataframe
df['Cluster'] = clusters
# Print the first few rows of the dataset with cluster assignments
print("\nDataset with cluster assignments:")
print(df.head())
# Visualize the clusters
plt.figure(figsize=(10, 7))
plt.scatter(X[:, 0], X[:, 1], c=clusters, cmap='rainbow')
plt.title('Mall Customer Segments')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.show()
Slip: 7
Q1
sequence <- seq(20, 50)
print("Sequence from 20 to 50:")
print(sequence)
mean <- mean(20:60)
print("Mean of numbers from 20 to 60:", mean))
sum <- sum(51:91)
print("Sum of numbers from 51 to 91:", sum))
Q2
import numpy as np
from sklearn.linear_model import LinearRegression
x = np.array([1, 2, 3, 4, 5, 6, 7, 8]).reshape(-1, 1)
y = np.array([7, 14, 15, 18, 19, 21, 26, 23])
model = LinearRegression().fit(x, y)
b0, b1 = model.intercept_, model.coef_[0]
print(f"b0: {b0}, b1: {b1}")
Slip 8
Q1
fibonacci <- function(n) {
fibseq <- numeric(n)
fibseq[1] <- 0
fibseq[2] <- 1
for (i in 3:n) {
fibseq[i] <- fibseq[i - 1] + fibseq[i - 2]
}
return(fibseq)
}
n <- 10
fib_numbers <- fibonacci(n)
print(fib_numbers)
Q2
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
df = pd.read_csv("/Users/Tushardighe/Desktop/DMSLIPS/CC
GENERAL.csv")
# Preprocess data
df.drop('CUST_ID', axis=1, inplace=True)
df.fillna(df.mean(), inplace=True)
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)
# Fit K-Means
kmeans = KMeans(n_clusters=3, random_state=42)
df['Cluster'] = kmeans.fit_predict(df_scaled)
# Plot results
plt.scatter(df_scaled[:, 0], df_scaled[:, 1], c=df['Cluster'], cmap='viridis')
plt.title('K-Means Clustering')
plt.xlabel('Feature 1 (Standardized)')
plt.ylabel('Feature 2 (Standardized)')
plt.show()
# Print cluster counts
print(df['Cluster'].value_counts()
Slip:9
Q1
employees <- data.frame(
EmployeeID = 1:5,
Name = c("Tushar", "Rohit", "Rachit", "Don", “TD"),
Age = c(21, 21, 21, 28, 40),
Department = c("HR", "Finance", "IT", "Marketing", "Sales"),
Salary = c(50000, 60000, 55000, 65000, 70000)
)
print("Employee Details:")
print(employees)
print("Summary of the Data:")
summary(employees)
Q2
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
cancer = datasets.load_breast_cancer()
X, y = cancer.data, cancer.target
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# Train SVM model
model = SVC(kernel='linear')
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Print accuracy, precision, and recall
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print(classification_report(y_test, y_pred))
Slip:10
Q1
vector1<-c(3,7,2,7)
max(vector1)
min(vector1)
Q2
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
df = pd.read_csv("/Users/Tushardighe/Desktop/DMSLIPS/Iris.csv")
# One-hot encode the 'Species' column
df_onehot = pd.get_dummies(df['Species'])
# Apply the Apriori algorithm
frequent_itemsets = apriori(df_onehot, min_support=0.1,
use_colnames=True)
# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
# Print the results
print("Frequent Itemsets:")
print(frequent_itemsets)
print("\nAssociation Rules:")
print(rules)
Slip:11
Q1
list1 <- list("x", "y", "z","T","S")
list2 <- list("X", "Y", "Z", "x", "y", "z")
result <- setdiff(unlist(list1), unlist(list2))
print(result)
[1] "T" "S"
Q2
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
data = pd.read_csv("/Users/Tushardighe/Desktop/DMSLIPS/Wholesale
customers data.csv")
# Drop non-numeric columns
data = data.drop(columns=['Channel', 'Region'])
# Standardize the data
data_scaled = StandardScaler().fit_transform(data)
# Hierarchical clustering
linked = linkage(data_scaled, method='ward')
# Dendrogram
plt.figure(figsize=(10, 7))
dendrogram(linked)
plt.title("Dendrogram")
plt.xlabel("Customers")
plt.ylabel("Euclidean distances")
plt.show()
# Form clusters (e.g., 3 clusters)
clusters = fcluster(linked, 3, criterion='maxclust')
data['Cluster'] = clusters
# Display the data with cluster labels
print(data.head())
Slip:12
Q1 Same as slip no 9 Q1..!
Q2
import pandas as pd
from sklearn import linear_model
df = pd.read_csv("/Users/Tushardighe/Desktop/DMSLIPS/Cardataset.csv")
x=df[['Weight','Volume']]
y=df['CO2']
regr=linear_model.LinearRegression()
regr.fit(x,y)
predictedCO2=regr.predict([[2500,1300]])
print(predictedCO2)
...NOT RUN...!
Slip:13
Q1
dice_numbers <- c(1, 2, 3, 4, 5, 6)
frequency <- c(7, 2, 6, 3, 4, 8)
pie(frequency, labels = dice_numbers, main = "Frequency of Dice Rolls",
col = rainbow(length(dice_numbers)))
Q2
import pandas as pd
data =
pd.read_csv("/Users/Tushardighe/Desktop/DMSLIPS/StudentsPerformance
.csv")
# Display the shape of the dataset
print("Shape of the dataset:", data.shape)
# Display the top rows of the dataset with their columns
print("Top 5 rows of the dataset:")
print(data.head())
Slip:14
Q1
employees <- list("Tushar", “Dighe”,"Rohit",”Gadade”, “Rachit")
print(employees)
employees <- append(employees, "Sharma")
print(employees)
employees <- employees[-3]
print(employees)
Q2
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
data =
pd.read_csv("/Users/Tushardighe/Desktop/DMSLIPS/Groceries_dataset.cs
v")
# Create a basket of transactions
basket = data.groupby(['Member_number', 'Date'])
['itemDescription'].apply(set).reset_index()
# One-hot encode the transactions
basket_encoded = basket['itemDescription'].str.join('|').str.get_dummies()
# Apply Apriori algorithm
frequent_itemsets = apriori(basket_encoded, min_support=0.01,
use_colnames=True)
# Generate and display association rules with support and confidence
rules = association_rules(frequent_itemsets, metric='confidence',
min_threshold=0.2)
print(rules[['antecedents', 'consequents', 'support', 'confidence']])
Slip:15
Q1: Same as slip no 1 Q1
Q2: Not get actual output…!
import pandas
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
dataset=pandas.read_csv("/Users/Tushardighe/Desktop/DMSLIPS/
data2.csv")
d={'UK':0,'USA':1,'N':2}
dataset['Nationality']=dataset['Nationality'].map(d)
d={'YES':1,'NO':0}
dataset['Go']=dataset['Go'].map(d)
x=dataset.iloc[:,:-1].values
y=dataset.iloc[:,4].values
dtree=DecisionTreeClassifier()
dtree=dtree.fit(x,y)
print(dtree.predict([[40,10,7,1]]))
Slip:16
Q1
year <- c(2001, 2002, 2003)
export <- c(26, 32, 35)
import <- c(35, 40, 50)
data <- data.frame(year, export, import)
barplot(t(as.matrix(data[, 2:3])), beside=TRUE, names.arg=data$year,
col=c("blue", "red"), legend = c("Export", "Import"), main="Export and
Import over Years", ylab="Amount")
Q2
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
import matplotlib.pyplot as plt
data = pd.read_csv("/Users/Tushardighe/Desktop/DMSLIPS/diabetes
(1).csv")
# Define features (X) and target (y)
X = data.drop('Outcome', axis=1)
y = data['Outcome']
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
# Create a Decision Tree Classifier
clf = DecisionTreeClassifier()
# Train the model
clf.fit(X_train, y_train)
# Make predictions on the test data
y_pred = clf.predict(X_test)
# Calculate and print the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
# Plot the Decision Tree
plt.figure(figsize=(12,8))
tree.plot_tree(clf, feature_names=X.columns, class_names=['No Diabetes',
'Diabetes'], filled=True)
plt.show()
Slip:17
Q1
fibonacci <- function(n) {
fibseq <- numeric(n)
fibseq[1] <- 0
fibseq[2] <- 1
for (i in 3:n) {
fibseq[i] <- fibseq[i - 1] + fibseq[i - 2]
}
return(fibseq)
}
n <- 20
fib_numbers <- fibonacci(n)
print(fib_numbers)
Q2
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
Stock_Market = {
'Year': [2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017,
2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016,
2016, 2016],
'Month': [12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3,
2, 1],
'Interest_Rate': [2.75, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.25, 2.25, 2.25, 2, 2,
2, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75,
1.75],
'Unemployment_Rate': [5.3, 5.3, 5.3, 5.3, 5.4, 5.6, 5.5, 5.5, 5.5, 5.6, 5.7,
5.9,
6, 5.9, 5.8, 6.1, 6.2, 6.1, 6.1, 6.1, 5.9, 6.2, 6.2, 6.1],
'Stock_Index_Price': [1464, 1394, 1357, 1293, 1256, 1254, 1234, 1195,
1159, 1167,
1130, 1075, 1047, 965, 943, 958, 971, 949, 884, 866, 876,
822, 704, 719]
}
# Convert dictionary to DataFrame
df = pd.DataFrame(Stock_Market)
# Define the independent variables (Interest Rate, Unemployment Rate)
and dependent variable (Stock Index Price)
X = df[['Interest_Rate', 'Unemployment_Rate']]
y = df['Stock_Index_Price']
# Create and train the Multiple Linear Regression model
model = LinearRegression()
model.fit(X, y)
# Print model coefficients (optional)
print(f'Intercept: {model.intercept_}')
print(f'Coefficients: {model.coef_}')
# Predict Stock Index Price using the model
y_pred = model.predict(X)
# Plot Stock Index Price vs. Interest Rate
plt.scatter(df['Interest_Rate'], df['Stock_Index_Price'], color='blue')
plt.plot(df['Interest_Rate'], y_pred, color='red')
plt.xlabel('Interest Rate')
plt.ylabel('Stock Index Price')
plt.title('Stock Index Price vs. Interest Rate')
plt.show()
Slip:18
Q1: Same as Slip 10 Q1
Q2:Same as slip 7 Q2
Slip:19
Q1
students <- data.frame( Rollno = c(101, 102, 103, 104, 105)
Studname = c("John", "Emma", "Liam", "Olivia", "Noah")
Address = c("NY", "LA", "Chicago", "Houston", "Boston")
Marks = c(85, 90, 78, 92, 88) )
print("Student Details:")
print(students)
Q2
Same as slip no 12 Q2
Slip:20
Q1
rollno <- c(35, 27, 29)
names <- c("TUSHAR", "Rohit", "Rachit")
age <- c(21, 21, 21)
marks <- c(80, 100, 100)
students <- data.frame(Roll_No = rollno, Name = names, Age = age, Marks
= marks)
print("Student Data Frame:")
print(students)
Q2: Same as slip6 Q2……!!
TD
T