Reg no: 211722118054
PROGRAM:
import numpy as np
[Link](0) # seed for reproducibility
x1 = [Link](10, size=6) # One-dimensional array
x2 = [Link](10, size=(3, 4)) # Two-dimensional array
x3 = [Link](10, size=(3, 4, 5)) # Three-dimensional array
# Accessing elements in one-dimensional array
print(x1)
print("First element:", x1[0])
print("Fifth element:", x1[4])
print("Last element using negative index:", x1[-1])
print("Second-to-last element using negative index:", x1[-2])
print("\n")
# Accessing elements in a two-dimensional array
print(x2)
print("Element at (0, 0):", x2[0, 0])
print("Element at (2, 0):", x2[2, 0])
print("Element at (2, -1):", x2[2, -1])
print("\n")
# Modifying values using index notation
x2[0, 0] = 12
print("Modified array:")
print(x2)
print("\n")
# Modifying values in a one-dimensional array
x1[0] = 3.14159 # This will be truncated to an integer
print("Modified one-dimensional array:")
print(x1)
2
Reg no: 211722118054
OUTPUT:
[5 0 3 3 7 9]
First element: 5
Fifth element: 7
Last element using negative index: 9
Second-to-last element using negative index: 7
[[3 5 2 4]
[7 6 8 8]
[1 6 7 7]]
Element at (0, 0): 3
Element at (2, 0): 1
Element at (2, -1): 7
Modified array:
[[12 5 2 4]
[ 7 6 8 8]
[ 1 6 7 7]]
Modified one-dimensional array:
[3 0 3 3 7 9]
3
Reg no: 211722118054
PROGRAM:
import numpy as np
#One-dimensional arrray
x = [Link](10)
print(x)
print(x[:5]) # first five elements
print(x[5:]) # elements after index 5
print(x[4:7]) # middle subarray
print(x[::2] )# every other element
print(x[1::2])# every other element, starting at index 1
print(x[::-1]) # all elements, reverse
print(x[5::-2]) # reversed every other from index 5
# Two-dimensional array
x2 = [Link](10, size=(3, 4))
print(x2)
print(x2[:2, :3]) # two rows, three columns
print(x2[:3, ::2]) # all rows, every other column
print(x2[::-1, ::-1])#subarray dimensions reversed together
print(x2[:, 0]) # first column of x2
print(x2[0, :]) # first row of x2
print(x2[0]) # equivalent to x2[0, :]
5
Reg no: 211722118054
OUTPUT:
[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4]
[5 6 7 8 9]
[4 5 6]
[0 2 4 6 8]
[1 3 5 7 9]
[9 8 7 6 5 4 3 2 1 0]
[5 3 1]
[[2 5 7 5]
[4 5 3 3]
[6 1 0 6]]
[[2 5 7]
[4 5 3]]
[[2 7]
[4 3]
[6 0]]
[[6 0 1 6]
[3 3 5 4]
[5 7 5 2]]
[2 4 6]
[2 5 7 5]
[2 5 7 5]
6
Reg no: 211722118054
PROGRAM:
import numpy as np
# Creating a one-dimensional array
x = [Link](10)
print(x)
# Slicing operations on the one-dimensional array
print(x[:5]) # First five elements
print(x[5:]) # Elements after index 5
print(x[4:7]) # Middle subarray from index 4 to 6
print(x[::2]) # Every other element
print(x[1::2]) # Every other element, starting at index 1
print(x[::-1]) # All elements, reversed
print(x[5::-2]) # Reversed every other element, starting at index 5
print("\n")
# Creating a two-dimensional array
x2 = [Link](10, size=(3, 4))
print(x2)
# Slicing operations on the two-dimensional array
print(x2[:2, :3]) # Two rows, three columns
print(x2[:3, ::2]) # All rows, every other column
print(x2[::-1, ::-1]) # Subarray with dimensions reversed together
print(x2[:, 0]) # First column of x2
print(x2[0, :]) # First row of x2
print(x2[0]) # Equivalent to x2[0, :]
8
Reg no: 211722118054
OUTPUT:
[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4]
[5 6 7 8 9]
[4 5 6]
[0 2 4 6 8]
[1 3 5 7 9]
[9 8 7 6 5 4 3 2 1 0]
[5 3 1]
[[7 8 2 7]
[9 1 2 9]
[8 4 4 1]]
[[7 8 2]
[9 1 2]]
[[7 2]
[9 2]
[8 4]]
[[1 4 4 8]
[9 2 1 9]
[7 2 8 7]]
[7 9 8]
[7 8 2 7]
[7 8 2 7]
9
Reg no: 211722118054
PROGRAM:
import pandas as pd
# Creating a Series
data = {'A': 10, 'B': 20, 'C': 30, 'D': 40}
s = [Link](data)
# Display the Series
print("Original Series:")
print(s)
print()
# 1. Accessing Elements by Label
print("Accessing Element by Label:")
print("Value at index 'B':", s['B'])
print()
# 2. Accessing Elements by Position
print("Accessing Element by Position:")
print("Value at position 2:", [Link][2])
print()
# 3. Slicing
print("Slicing:")
print("Subset from index 1 to 3:")
print(s[1:4])
print()
# 4. Conditional Selection (Boolean Indexing)
print("Conditional Selection:")
print("Values greater than 20:")
print(s[s > 20])
print()
# 5. Fancy Indexing
print("Fancy Indexing:")
print("Selecting specific indices:")
print(s[['A', 'C']])
print()
# 6. Modifying Elements
print("Modifying Elements:")
s['A'] = 50
print("Modified Series:")
print(s)
11
Reg no: 211722118054
OUTPUT:
Original Series:
A 10
B 20
C 30
D 40
dtype: int64
Accessing Element by Label:
Value at index 'B': 20
Accessing Element by Position:
Value at position 2: 30
Slicing:
Subset from index 1 to 3:
B 20
C 30
D 40
dtype: int64
Conditional Selection:
Values greater than 20:
C 30
D 40
dtype: int64
Fancy Indexing:
Selecting specific indices:
A 10
C 30
dtype: int64
Modifying Elements:
Modified Series:
A 50
B 20
C 30
D 40
dtype: int64
12
Reg no: 211722118054
PROGRAM:
#PANDAS SERIES AS OBJECT
import pandas as pd
# Basic Series Operations
data = [Link]([0.25, 0.5, 0.75, 1.0])
print(data)
print([Link])
print([Link])
print(data[1])
print(data[1:3])
# Series with Explicit Index
data = [Link]([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
print(data)
print(data['b'])
# Series with Non-Integer Index
data = [Link]([0.25, 0.5, 0.75, 1.0], index=[2, 5, 3, 7])
print(data)
print(data[5])
# Series as a Specialized Dictionary
population_dict = {'California': 38332521,
'Texas': 26448193,
'New York': 19651127,
'Florida': 19552860,
'Illinois': 12882135}
population = [Link](population_dict)
print(population)
print(population['California'])
print(population['California':'Florida'])
# Constructing Series Objects
a = [Link]([2, 4, 6])
print(a)
b = [Link](5, index=[100, 200, 300])
print(b)
c = [Link]({2: 'a', 1: 'b', 3: 'c'})
print(c)
# After Indexing
c = [Link]({2: 'a', 1: 'b', 3: 'c'}, index=[3, 2])
print(c)
14
Reg no: 211722118054
OUTPUT:
0 0.25
1 0.50
2 0.75
3 1.00
dtype: float64
[0.25 0.5 0.75 1. ]
RangeIndex(start=0, stop=4, step=1)
0.5
1 0.50
2 0.75
dtype: float64
a 0.25
b 0.50
c 0.75
d 1.00
dtype: float64
0.5
2 0.25
5 0.50
3 0.75
7 1.00
dtype: float64
0.5
California 38332521
Texas 26448193
New York 19651127
Florida 19552860
Illinois 12882135
dtype: int64
38332521
California 38332521
Texas 26448193
New York 19651127
dtype: int64
0 2
1 4
2 6
dtype: int64
100 5
200 5
300 5
dtype: int64
2 a
1 b
3 c
dtype: object
3 c
2 a
dtype: obje
15
Reg no: 211722118054
PROGRAM:
import pandas as pd
# Creating Pandas Series for population and area
population_dict = {'California': 38332521,
'Texas': 26448193,
'New York': 19651127,
'Florida': 19552860,
'Illinois': 12882135}
area_dict = {'California': 423967,
'Texas': 695662,
'New York': 141297,
'Florida': 170312,
'Illinois': 149995}
# Creating Series with specified names
population = [Link](population_dict, name='population')
area = [Link](area_dict, name='area')
# Creating a DataFrame using the two Series
states = [Link]({'population': population, 'area': area})
# Displaying the DataFrame, index, and columns
print(states)
print("\nIndex:", [Link])
print("Columns:", [Link])
# Accessing a specific column in the DataFrame
print("\nArea Column:\n", states['area'])
# Creating a DataFrame using only the 'population' Series, specifying columns
a = [Link]({'population': population})
print("\nDataFrame 'a':\n", a)
18
Reg no: 211722118054
OUTPUT:
population area
California 38332521 423967
Texas 26448193 695662
New York 19651127 141297
Florida 19552860 170312
Illinois 12882135 149995
Index: Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')
Columns: Index(['population', 'area'], dtype='object')
Area Column:
California 423967
Texas 695662
New York 141297
Florida 170312
Illinois 149995
Name: area, dtype: int64
DataFrame 'a':
population
California 38332521
Texas 26448193
New York 19651127
Florida 19552860
Illinois 12882135
19
Reg no: 211722118054
PROGRAM:
from csv import reader
from math import sqrt
# Load a CSV file
def load_csv(filename):
dataset = list()
with open(filename, 'r') as file:
csv_reader = reader(file)
for row in csv_reader:
if not row:
continue
[Link](row)
return dataset
# Convert string column to float
def str_column_to_float(dataset, column):
for row in dataset:
row[column] = float(row[column].strip())
# Convert string column to integer
def str_column_to_int(dataset, column):
class_values = [row[column] for row in dataset]
unique = set(class_values)
lookup = dict()
for i, value in enumerate(unique):
lookup[value] = i
print('[%s] => %d' % (value, i))
for row in dataset:
row[column] = lookup[row[column]]
return lookup
# Find the min and max values for each column
def dataset_minmax(dataset):
minmax = list()
for i in range(len(dataset[0])):
col_values = [row[i] for row in dataset]
value_min = min(col_values)
value_max = max(col_values)
[Link]([value_min, value_max])
return minmax
# Rescale dataset columns to the range 0-1
def normalize_dataset(dataset, minmax):
for row in dataset:
21
Reg no: 211722118054
for i in range(len(row)):
row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])
# Calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
distance = 0.0
for i in range(len(row1)-1):
distance += (row1[i] - row2[i])**2
return sqrt(distance)
# Locate the most similar neighbors
def get_neighbors(train, test_row, num_neighbors):
distances = list()
for train_row in train:
dist = euclidean_distance(test_row, train_row)
[Link]((train_row, dist))
[Link](key=lambda tup: tup[1])
neighbors = list()
for i in range(num_neighbors):
[Link](distances[i][0])
return neighbors
# Make a prediction with neighbors
def predict_classification(train, test_row, num_neighbors):
neighbors = get_neighbors(train, test_row, num_neighbors)
output_values = [row[-1] for row in neighbors]
prediction = max(set(output_values), key=output_values.count)
return prediction
# Make a prediction with KNN on Iris Dataset
filename = '[Link]'
dataset = load_csv(filename)
# Convert string columns to float and integer
for i in range(len(dataset[0])-1):
str_column_to_float(dataset, i)
# Convert class column to integers
str_column_to_int(dataset, len(dataset[0])-1)
# Define model parameter
num_neighbors = 5
# Define a new record
row = [5.1, 3.7, 1.5, 0.4]
# Predict the label
label = predict_classification(dataset, row, num_neighbors)
print('Data=%s, Predicted: %s' % (row, label))
22
Reg no: 211722118054
OUTPUT:
[Setosa] => 0
[Versicolor] => 1
[Virginica] => 2
Data=[5.1, 3.7, 1.5, 0.4],
Predicted: 0
23
Reg no: 211722118054
PROGRAM:
import numpy as np
import [Link] as plt
from csv import DictReader
def estimate_coef(x, y):
n = [Link](x)
m_x, m_y = [Link](x), [Link](y)
SS_xy = [Link](y * x - n * m_y * m_x)
SS_xx = [Link](x * x - n * m_x * m_x)
b_1 = SS_xy / SS_xx
b_0 = m_y - b_1 * m_x
return b_0, b_1
def plot_regression_line(x, y, b):
[Link](x, y, color="m", marker="o", s=30)
y_pred = b[0] + b[1] * x
[Link](x, y_pred, color="g")
[Link]('Glucose')
[Link]('BloodPressure')
[Link]()
def main():
# Full path to the CSV file
file_path = "p:/[Link]"
# Observations
Data = []
X, Y = [], []
# Opening CSV file from another location
with open(file_path, 'r') as file:
reader = DictReader(file)
for row in reader:
[Link](row)
for i in Data:
[Link](int(i['Glucose']))
[Link](int(i['BloodPressure']))
x = [Link](X)
y = [Link](Y)
# Estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {}\nb_1 = {}".format(b[0], b[1]))
# Plotting regression line
plot_regression_line(x, y, b)
25
Reg no: 211722118054
if name == " main ":
main()
Output:
26
Reg no: 211722118054
PROGRAM:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import seaborn as sn
import [Link] as plt
from csv import DictReader
Data = []
Glucose, BloodPressure, BMI, Outcome = [], [], [], []
# Replace 'full/path/to/your/[Link]' with the actual full file path
file_path = 'p:/diabetes_2.csv'
# Opening csv file
with open(file_path, 'r') as file:
reader = DictReader(file)
for row in reader:
[Link](row)
for i in Data:
[Link](int(i['Glucose']))
[Link](int(i['Blood_Pressure']))
[Link](float(i["BMI"]))
[Link](int(i["Outcome"]))
candidates = {'Glucose': Glucose, 'BMI': BMI, 'BloodPressure': BloodPressure, 'Outcome':
Outcome}
df = [Link](candidates, columns=['Glucose', 'BMI', 'BloodPressure', 'Outcome'])
print(df)
print("Df printed\n")
X = df[['Glucose', 'BMI', 'BloodPressure']]
y = df['Outcome']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
print(X_train)
print(y_train)
print("Train\n")
logistic_regression = LogisticRegression()
logistic_regression.fit(X_train, y_train)
28
Reg no: 211722118054
y_pred = logistic_regression.predict(X_test)
confusion_matrix = [Link](y_test, y_pred, rownames=['Actual'], colnames=['Predicted'])
[Link](confusion_matrix, annot=True)
print('Accuracy: ', metrics.accuracy_score(y_test, y_pred))
print('confusion_matrix:', confusion_matrix, sep='\n', end='\n\n')
[Link]()
OUTPUT:
29
Reg no: 211722118054
PROGRAM:
import [Link] as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pandas as pd
# Read data from CSV file
df = pd.read_csv('multiple [Link]') # Replace 'your_dataset.csv' with the actual file name
# Defining feature matrix (X) and response vector (y)
X = df[['Feature1', 'Feature2', 'Feature3']]
y = df['Target']
# Splitting X and y into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)
# Create linear regression object
reg = LinearRegression()
# Train the model using the training sets
[Link](X_train, y_train)
# Regression coefficients
print('Coefficients:', reg.coef_)
# Variance score: 1 means perfect prediction
print('Variance score: {:.2f}'.format([Link](X_test, y_test)))
# Plot for residual error
# Setting plot style
[Link]('fivethirtyeight')
# Plotting residual errors in training data
[Link]([Link](X_train), [Link](X_train) - y_train, color="green", s=10, label='Train
data')
# Plotting residual errors in test data
[Link]([Link](X_test), [Link](X_test) - y_test, color="blue", s=10, label='Test data')
# Plotting line for zero residual error
[Link](y=0, xmin=0, xmax=50, linewidth=2)
# Plotting legend
[Link](loc='upper right')
31
Reg no: 211722118054
# Plot title
[Link]("Residual errors")
# Method call for showing the plot
[Link]()
OUTPUT:
32
Reg no: 211722118054
PROGRAM:
import numpy as np
import [Link] as plt
from scipy import stats # Corrected import statement
# Create a standard normal distribution with mean as 0 and standard deviation as 1
mu = 0
std = 1
snd = [Link](mu, std)
# Generate 100 random values between -5, 5
x = [Link](-5, 5, 100)
# Plot the standard normal distribution for different values of random variable
# falling in the range -5, 5
[Link](figsize=(7.5, 7.5))
[Link](x, [Link](x))
[Link](-5, 5)
[Link]('Normal Distribution', fontsize='15')
[Link]('Values of Random Variable X', fontsize='15')
[Link]('Probability', fontsize='15')
[Link]()
OUTPUT:
34
Reg no: 211722118054
PROGRAM:
import math
# Function that returns the correlation coefficient.
def correlationCoefficient(X, Y, n):
sum_X = 0
sum_Y = 0
sum_XY = 0
squareSum_X = 0
squareSum_Y = 0
i=0
while i < n:
# Sum of elements of array X.
sum_X = sum_X + X[i]
# Sum of elements of array Y.
sum_Y = sum_Y + Y[i]
# Sum of X[i] * Y[i].
sum_XY = sum_XY + X[i] * Y[i]
# Sum of square of array elements.
squareSum_X = squareSum_X + X[i] * X[i]
squareSum_Y = squareSum_Y + Y[i] * Y[i]
i=i+1
# Use the formula for calculating the correlation coefficient.
corr = (float)(n * sum_XY - sum_X * sum_Y) / (float)([Link]((n * squareSum_X - sum_X *
sum_X) * (n * squareSum_Y - sum_Y * sum_Y)))
return corr
# Driver function
X = [15, 18, 21, 24, 27]
Y = [25, 25, 27, 31, 32]
print(X)
print(Y)
# Find the size of the array.
n = len(X)
# Function call to
correlationCoefficient.z
=correlationCoefficient(X,
Y, n)
if abs(z) > 0.5:
print('{0:.6f}'.format(z), "Highly Correlated")
else:
print('{0:.6f}'.format(z), "Low Correlated")
37
Reg no: 211722118054
OUTPUT:
[15, 18, 21, 24, 27]
[25, 25, 27, 31, 32]
0.953463 Highly Correlated
38
Reg no: 211722118054
PROGRAM:
import numpy as np
from collections import Counter
from scipy import stats
# Given data
a = [11, 21, 34, 22, 27, 11, 23, 21]
# Finding Mean by simple Computation
mean = sum(a) / len(a)
print("Finding Mean by simple Computation")
print(mean)
# Finding Mean using numpy method
mean_np = [Link](a)
print("Finding Mean using numpy method ")
print(mean_np)
# Finding Median by simple Computation.
def median(nums):
[Link]()
if len(nums) % 2 == 0:
return int((nums[len(nums)//2-1] + nums[len(nums)//2]) / 2)
else:
return nums[len(nums)//2]
print("Finding Median by simple Computation")
print(median(a))
# Finding Median using numpy method
print("Finding Median by numpy method")
print([Link](a))
# Finding Mode by simple Computation
data = dict(Counter(a))
mode = [k for k, v in [Link]() if v == max(list([Link]()))]
print("Finding Mode by simple Computation ")
print(mode)
# Finding Mode using [Link]
mode_np = [Link](a, axis=None)
print("Finding Mode using numpy method")
print(mode_np)
# Find Standard deviation by simple computation
n = len(a)
std = (sum(map(lambda x: (x - sum(a) / n) ** 2, a)) / n) ** 0.5
print("Finding Standard Deviation by simple computation")
print(std)
40
Reg no: 211722118054
# Find Standard deviation using numpy method
std_np = [Link](a)
print("Finding Standard Deviation using numpy method")
print(std_np)
OUTPUT:
Finding Mean by simple Computation
21.25
Finding Mean using numpy method
21.25
Finding Median by simple Computation
21
Finding Median by numpy method
21.5
Finding Mode by simple Computation
[11, 21]
Finding Mode using numpy method
ModeResult(mode=11, count=2)
Finding Standard Deviation by simple computation
7.1545440106270926
Finding Standard Deviation using numpy method
7.1545440106270926
41
Reg no: 211722118054
PROGRAM:
import pandas as pd
import numpy as np
import [Link] as plt
# Read the Iris dataset
iris = pd.read_csv('[Link]', names=['sepal_length', 'sepal_width', 'petal_length',
'petal_width', 'class'])
print([Link]())
# Read the wine dataset
wine_reviews = pd.read_csv('[Link]', index_col=0)
print(wine_reviews.head())
# Create Color Scatter Plotting
colors = {'Iris-setosa':'r', 'Iris-versicolor':'g', 'Iris-virginica':'b'}
# create a figure and axis
fig, ax = [Link]()
# plot each data-point
for i in range(len(iris['sepal_length'])):
[Link](iris['sepal_length'][i], iris['sepal_width'][i], color=colors[iris['class'][i]])
# set a title and labels
ax.set_title('Iris Dataset')
ax.set_xlabel('sepal_length')
ax.set_ylabel('sepal_width')
[Link]()
# Create Line Chart Plotting
columns = [Link](['class'])
# create x data
x_data = range(0, [Link][0])
# create figure and axis
fig, ax = [Link]()
# plot each column
for column in columns:
[Link](x_data, iris[column], label=column)
43
Reg no: 211722118054
# set title and legend
ax.set_title('Iris Dataset')
[Link]()
[Link]()
# create figure and axis
fig, ax = [Link]()
# plot histogram
[Link](wine_reviews['points'])
# set title and labels
ax.set_title('Wine Review Scores')
ax.set_xlabel('Points')
ax.set_ylabel('Frequency')
[Link]()
# create a figure and axis
fig, ax = [Link]()
# count the occurrence of each class
data = wine_reviews['points'].value_counts()
# get x and y data
points = [Link]
frequency = [Link]
# create bar chart
[Link](points, frequency)
# set title and labels
ax.set_title('Wine Review Scores')
ax.set_xlabel('Points')
ax.set_ylabel('Frequency')
[Link]()
# Iris dataset histogram
[Link](subplots=True, layout=(2,2), figsize=(10, 10), bins=20)
[Link]()
# Wine review scores bar chart
wine_reviews['points'].value_counts().sort_index().[Link]()
[Link]()
# Wine review scores horizontal bar chart
wine_reviews['points'].value_counts().sort_index().[Link]()
[Link]()
44
Reg no: 211722118054
# Wine review scores by country bar chart
wine_reviews.groupby("country").[Link]().sort_values(ascending=False)[:5].[Link]()
[Link]()
# Correlation Matrix
corr = [Link]()
fig, ax = [Link]()
# create heatmap
im = [Link]([Link])
# set labels
ax.set_xticks([Link](len([Link])))
ax.set_yticks([Link](len([Link])))
ax.set_xticklabels([Link])
ax.set_yticklabels([Link])
# Rotate the tick labels and set their alignment.
[Link](ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
for i in range(len([Link])):
for j in range(len([Link])):
text = [Link](j, i, [Link]([Link][i, j], decimals=2), ha="center", va="center",
color="black")
[Link]()
OUTPUT:
Line chart for each attribute of IRIS Dataset
45
Reg no: 211722118054
Bar Chart for Wine Review Score
Multiple histogram for attributes of IRIS Data set
Vertical bar chart for Wine Review Scores Horizontal bar chart for Wine Score Review
Bar chart for Wine Review with highest cost Correlation Matrix
five different Counties
46