0% found this document useful (0 votes)
24 views3 pages

Assignment 2

The document contains Python code for regression and classification tasks using machine learning libraries like scikit-learn. It includes functions for polynomial regression, calculating R² scores, and feature importance in decision trees, as well as validation curves for SVM models. The code also involves data preprocessing and visualization using matplotlib.

Uploaded by

ajieiengq
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
24 views3 pages

Assignment 2

The document contains Python code for regression and classification tasks using machine learning libraries like scikit-learn. It includes functions for polynomial regression, calculating R² scores, and feature importance in decision trees, as well as validation curves for SVM models. The code also involves data preprocessing and visualization using matplotlib.

Uploaded by

ajieiengq
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

**Part 1- Regression

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

np.random.seed(0)
n = 15
x = np.linspace(0,10,n) + np.random.randn(n)/5
y = np.sin(x)+x/6 + np.random.randn(n)/10

X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=0)

def intro():
%matplotlib notebook

plt.figure()
plt.scatter(X_train, y_train, label='training data')
plt.scatter(X_test, y_test, label='test data')
plt.legend(loc=4);

intro()

**Question 1
def answer_one():
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
# To capture interactions between the original features by adding them as
features to the linear model.

clf = LinearRegression() # Initialize linear classifier


preds = np.zeros((4,100)) # Final tuple with predictions for all orders.
X_input = np.linspace(0,10,100) # Given requirement
orders = [1,3,6,9] # Given requirement

for i in range(len(orders)):
poly = PolynomialFeatures(orders[i]) # Object to add polynomial features.

# Add polynomial features to training data and input data:


# Need to transpose X_train and X_input for poly_fit to work.
X_train_poly = poly.fit_transform(X_train[None].T)
X_input_poly = poly.fit_transform(X_input[None].T)

# Train linear regression classifier with training data:


clf.fit(X_train_poly, y_train)

# Get predictions from linear classifier using transformed input data:


# This is still a weighted linear combination of features, so it's still a
linear model, and can use same least-squares estimation method for w and b.
preds[i,:] = clf.predict(X_input_poly)

return preds

answer_one()

**Question 2
def answer_two():
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score

r2_test, r2_train = [],[]

for i in range (10):


poly = PolynomialFeatures(i)
X_train_poly = poly.fit_transform(X_train[None].T)
X_test_poly = poly.fit_transform(X_test[None].T)
linreg = LinearRegression().fit(X_train_poly,y_train)
r2_train.append(linreg.score(X_train_poly,y_train))
r2_test.append(linreg.score(X_test_poly,y_test))

return (r2_train, r2_test)

answer_two()

**Question 3
def answer_three():
return (0,9,6)
answer_three()

**Question 4
def answer_four():

from sklearn.preprocessing import PolynomialFeatures


from sklearn.linear_model import Lasso, LinearRegression
from sklearn.metrics import r2_score

poly = PolynomialFeatures(12)
X_train_poly = poly.fit_transform(X_train.reshape(-1, 1))
X_test_poly = poly.transform(X_test.reshape(-1, 1))

linreg = LinearRegression().fit(X_train_poly,y_train)
linlasso = Lasso(alpha=0.01,max_iter=10000, tol=0.1).fit(X_train_poly,y_train)

# Asked to find score for TEST SET!


return (linreg.score(X_test_poly,y_test),linlasso.score(X_test_poly,y_test))

answer_four()

**Part 2 - Classification
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

mush_df = pd.read_csv('assets/mushrooms.csv')
mush_df2 = pd.get_dummies(mush_df)

X_mush = mush_df2.iloc[:,2:]
y_mush = mush_df2.iloc[:,1]

X_train2, X_test2, y_train2, y_test2 = train_test_split(X_mush, y_mush,


random_state=0)
**Question 5
def answer_five():
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=0).fit(X_train2, y_train2)

df = pd.DataFrame({'feature': X_train2.columns.values, 'feature importance':


clf.feature_importances_})

top_features = df.sort_values(by=['feature importance'], ascending=0)


['feature'].head(5).tolist()

return top_features

**Question 6
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import validation_curve, train_test_split
from sklearn.datasets import load_iris

data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

X_subset, y_subset = X.iloc[:50], y[:50]

def answer_six():
from sklearn.svm import SVC
from sklearn.model_selection import validation_curve
import numpy as np

param_range = np.logspace(-4, 1, 6) # [0.0001, 0.001, 0.01, 0.1, 1, 10]

train_scores, test_scores = validation_curve(


SVC(kernel='rbf', C=1, random_state=0),
X_subset, y_subset,
param_name='gamma',
param_range=param_range,
cv=3,
n_jobs=2
)

mean_train_scores = np.mean(train_scores, axis=1)


mean_test_scores = np.mean(test_scores, axis=1)

return (mean_train_scores, mean_test_scores)

print(answer_six())

**Question 7
def answer_seven():

return (0.0001,10,0.1)

answer_seven()

You might also like