0% found this document useful (0 votes)
6 views30 pages

Ex. No.: 01 Working With Numpy Arrays

Uploaded by

hulk54495
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views30 pages

Ex. No.: 01 Working With Numpy Arrays

Uploaded by

hulk54495
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 30

Ex. No.

: 01 WORKING WITH NUMPY ARRAYS

Program:

#Creating 1D array

import numpy as np
arr = np.array([1, 2, 3, 4, 5])
print(arr)

#Creating 2D array

import numpy as np
arr = np.array([[1, 2, 3], [4, 5, 6]])
print(arr)

#Creating 3D array

import numpy as np

arr = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])

print(arr)

#Accessing 1D array

import numpy as np

arr = np.array([1, 2, 3, 4])

print(arr[0])

#Accessing 2D array

import numpy as np

arr = np.array([[1,2,3,4,5], [6,7,8,9,10]])

print('2nd element on 1st dim: ', arr[0, 1])

#Accessing 3D array

import numpy as np

arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

print(arr[0, 1, 2])
#Slicing 1D array

import numpy as np

arr = np.array([11, 12, 13, 14, 15, 16, 17])

print(arr[1:5])

#Slicing 2D array

import numpy as np

arr = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])

print(arr[1, 1:4])

Output:

#Creating 1D array

[1 2 3 4 5]

#Creating 2D array

[[1 2 3]

[4 5 6]]

#Creating 3D array

[[[1 2 3]

[4 5 6]]

[[1 2 3]

[4 5 6]]]

#Accessing 1D array

#Accessing 2D array

#Accessing 3D array

6
#Slicing 1D array

[12 13 14 15]

#Slicing 2D array

[7 8 9]
Ex. No: 2 WORKING WITH PANDAS

PROGRAM

# Creating DataFrame from list

import pandas as pd

data = [['tom', 10], ['nick', 15], ['juli', 14]]

df = pd.DataFrame(data, columns = ['Name', 'Age'])

print(df)

# Creating DataFrame from dictionary

import pandas as pd

data = {'Name':['Tom', 'nick', 'krish', 'jack'],'Age':[20, 21, 19, 18]}

df = pd.DataFrame(data)

print(df)

# Creating DataFrame with explicit index

import pandas as pd

data = {'Name':['Tom', 'Jack', 'nick', 'juli'],'marks':[99, 98, 95, 90]}

df = pd.DataFrame(data, index =['rank1','rank2','rank3','rank4'])

print(df)

# Creating DataFrame from list of dicts

import pandas as pd

data = [{'a': 1, 'b': 2, 'c':3},{'a':10, 'b': 20, 'c': 30}]

df = pd.DataFrame(data)

print(df)

# Add records to dataframe using the .loc function

import pandas as pd
df = pd.DataFrame(columns = ['year','make','model'])

df.loc[0] = [2014,"toyota","corolla"]

df.loc[1] = [2018,"honda","civic"]

print(df)

Output:

# Creating DataFrame from list

Name Age

0 tom 10

1 nick 15

2 juli 14
# Creating DataFrame from dictionary

Name Age

0 Tom 20

1 nick 21

2 krish 19

3 jack 18
# Creating data frame with explicit index

Name marks

rank1 Tom 99

rank2 Jack 98

rank3 nick 95

rank4 juli 90
# Creating DataFrame from list of dicts

a b c

0 1 2 3

1 10 20 30
# Add records to dataframe using the .loc function

year make model

0 2014 toyota corolla

1 2018 honda civic


Ex. No: 3 BASIC PLOTS USING MATPLOTLIB

PROGRAM:~
#Line plot

import matplotlib.pyplot as plt


x = [10, 20, 30, 40]
y = [20, 30, 40, 50]
plt.plot(x, y)
plt.title("Simple Plot")
plt.ylabel("y-axis")
plt.xlabel("x-axis")
plt.show()

#Histogram

import matplotlib.pyplot as plt

x = [1, 2, 3, 4, 5, 6, 7, 4]

plt.hist(x, bins = [1, 2, 3, 4, 5, 6, 7])

plt.title("Histogram")

plt.legend(["bar"])

plt.show()

#Scatter plot

import matplotlib.pyplot as plt


x = [3, 1, 3, 12, 2, 4, 4]
y = [3, 2, 1, 4, 5, 6, 7]
plt.scatter(x, y)
plt.legend("A")
plt.title("Scatter chart")
plt.show()

#Pie chart

import matplotlib.pyplot as plt


import numpy as np
y = np.array([35, 25, 25, 15])
mylabels = ["Apples", "Bananas", "Cherries", "Dates"]
plt.pie(y, labels = mylabels)
plt.show()

#Bar chart

import matplotlib.pyplot as plt


import numpy as np
x = np.array(["A", "B", "C", "D"])
y = np.array([3, 8, 1, 10])
plt.bar(x,y)
plt.show()

Output:

#Line plot

#Histogram
#Scatter plot

#Pie chart

#Bar chart
Ex. No: 4.a FREQUENCY DISTRIBUTION, AVERAGES,VARIABILITY

Program:

#Frequency distribution for marks

import pandas as pd

import matplotlib.pyplot as plt

d={'Maths':[90,75,68,66,72,50,45],'English':[95,74,60,63,79,80,55],'Science':
[60,85,58,76,52,70,65],'Names':['Avni','Bharathi','Dadlin','Irfan','Karan','Mano','Ranjit']}

df=pd.DataFrame(d)

plt.hist(df['English'])

plt.xlabel('Marks')

plt.ylabel('Count')

plt.show()

#Frequency distribution with bins

import matplotlib.pyplot as plt

x=[1,1,2,2,3,3,4,4,5,5,6,7,8,8,10,10,15,16,17,18,20,25,28,30,35,35,37,40]

plt.hist(x,bins=[0,10,20,30,40,50])
plt.show()

#Frequency distribution with colors

import pandas as pd

import matplotlib.pyplot as plt

d={'Maths':[90,75,68,66,72,50,45],'English':[95,74,60,63,79,80,55],'Science':
[60,85,58,76,52,70,65],'Names':['Avni','Bharathi','Dadlin','Irfan','Karan','Mano','Ranjit']}

df=pd.DataFrame(d)

df['Maths'].plot(kind='hist',bins=[40,60,80,100],color='brown')

plt.xlabel('Marks')

plt.ylabel('Count')

plt.show()

#Frequency distribution with edge color

import pandas as pd

import matplotlib.pyplot as plt

d={'Maths':[90,75,68,66,72,50,45],'English':[95,74,60,63,79,80,55],'Science':
[60,85,58,76,52,70,65],'Names':['Avni','Bharathi','Dadlin','Irfan','Karan','Mano','Ranjit']}

df=pd.DataFrame(d)

df['Maths'].plot(kind='hist',bins=4,color='red',edgecolor='black',linewidth=2)

plt.xlabel('Marks')

plt.ylabel('Count')

plt.show()

Output:

#Frequency distribution for marks


#Frequency distribution with bins

#Frequency distribution with colors


#Frequency distribution with edge color
Ex. No: 4.a AVERAGES – MEAN, MEDIAN AND MODE

Program:

#Mean

import numpy as np

s=[2,4,55,6,7,7,7,6,78]

x=np.mean(s)

print(x)

#Median

import numpy as np

s=[2,4,55,6,7,7,7,6,78]

x=np.median(s)

print(x)

#Mode

import scipy.stats as st

s=[2,4,55,6,7,7,7,6,78]

x=st.mode(s)
print(x)

Output:

19.11111111111111

7.0

ModeResult(mode=array([7]), count=array([3]))

Ex.No:5 NORMAL CURVES,CORRELATION AND SCATTER

PLOTS, CORRELATION COEFFICIENT

PROGRAM
PROGRAM:

import matplotlib.pyplot as plt


import numpy as np
import scipy.stats as stats
import math
mu = 0

variance = 1

sigma = math.sqrt(variance)

x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)

plt.plot(x, stats.norm.pdf(x, mu, sigma)) plt.show()

OUTPUT:
PROGRAM:

import pandas as pd

from sklearn.datasets import load_diabetes


import seaborn as sns

import matplotlib.pyplot as plt

df = load_diabetes(as_frame=True)
df = df.frame

corr = df.corr(method = 'pearson')


corr.head(7)

c = np.corrcoef(df['age'],df['sex']) print('Correlations
between age and sex\n',c)

OUTPUT:
PROGRAM:

import sklearn import


numpy as np

import matplotlib.pyplot as plt


import pandas as pd

y = pd.Series([1, 2, 3, 4, 3, 5, 4])

x = pd.Series([1, 2, 3, 4, 5, 6, 7])

correlation = y.corr(x)
plt.title('Correlation')
plt.scatter(x, y)

plt.plot(np.unique(x),np.poly1d(np.polyfit(x, y, 1))(np.unique(x)), color='red')


plt.xlabel('x axis')
plt.ylabel('y axis')

OUTPUT:

Ex. No:6 REGGRESSION

PROGRAM

import numpy as np

import matplotlib.pyplot as plt

20

defestimate_coef(x, y):

# number of observations/points

n = np.size(x)

# mean of x and y vector


m_x = np.mean(x)

m_y = np.mean(y)

# calculating cross-deviation and deviation about x

SS_xy = np.sum(y*x) - n*m_y*m_x

SS_xx = np.sum(x*x) - n*m_x*m_x

# calculating regression coefficients

b_1 = SS_xy / SS_xx

b_0 = m_y - b_1*m_x

return (b_0, b_1)

defplot_regression_line(x, y, b):

# plotting the actual points as scatter plot

plt.scatter(x, y, color = "m",

marker = "o", s = 30)

# predicted response vector

y_pred = b[0] + b[1]*x

# plotting the regression line

plt.plot(x, y_pred, color = "g")

# putting labels

plt.xlabel('x')

plt.ylabel('y')

# function to show plot

21

plt.show()
defmain():

# observations / data

x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])

# estimating coefficients

b = estimate_coef(x, y)

print("Estimated coefficients:\nb_0 = {} \

\nb_1 = {}".format(b[0], b[1]))

# plotting regression line

plot_regression_line(x, y, b)

if __name__ == "__main__":

main()

OUTPUT:

22
Ex. No: 7 Z-TEST

Program:-
import pandas as
pd import numpy
as np

from statsmodels.stats.weightstats import ztest


data= pd.read_csv(r"D:\kp\CAR DETAILS FROM CAR DEKHO.csv")
selling_price=data["selling_price"]
selling_price_mean=np.mean(selling_price)

print("Mean of selling price:",selling_price_mean)


ztest_score,pval=ztest(data['selling_price'],value=484120)
print("Pval:",pval)

if pval>0.05:
print("Accepting the null hypothesis")
else:
print("Rejecting the null hypothesis")
OUTPUT:

Mean of selling price: 504127.3117511521


Pval: 0.022714099677937877
Rejecting the null hypothesis

Ex.No:08 T-TEST

Program:-
PROGRAM:
print('NULL HYPOTHESIS: Both datasets are from same population')
print('ALTERNATE HYPOTHESIS: Both datasets are not from same population') import
pandas as pd
from scipy import stats
data1= pd.read_csv(r"D:\kp\CAR DETAILS FROM CAR DEKHO1.csv")
data2= pd.read_csv(r"D:\kp\FINAL_SPINNY_900.csv")
selling_price1=data1["selling_price"]
selling_price2=data2["selling_price"]
ttest,p_value=stats.ttest_ind(selling_price1,selling_price2)

print('Test statistic is
%f'%float("{:.6f}".format(ttest))) print('p-value for
two tailed test is %f'%p_value)

alpha = 0.05
if p_value<=alpha:
print('''Rejecting null
hypothesis
Both datasets are not from same population''')
else:

print("""Accepting null hypothesis.


Both datasets are from same population""")

OUTPUT:

NULL HYPOTHESIS: Both datasets are from same population ALTERNATE


HYPOTHESIS: Both datasets are not from same population Test statistic is -
1.243571

p-value for two tailed test is 0.213977


Accepting null hypothesis.

Both datasets are from same population

Ex.No:9 ANOVA

PROGRAM:

# Importing libraries

import pandas as pd

import numpy as np

import statsmodels.api as sm

from statsmodels.formula.api import ols

# Create a dataframe

dataframe = pd.DataFrame({'Fertilizer': np.repeat(['daily', 'weekly'], 15),

'Watering': np.repeat(['daily', 'weekly'], 15),


'height': [14, 16, 15, 15, 16, 13, 12, 11,14, 15, 16, 16, 17, 18, 14, 13,14, 14, 14,
15, 16, 16, 17, 18, 14, 13, 14, 14, 14, 15]}

# Performing two-way ANOVA

model = ols('height ~ C(Fertilizer) + C(Watering) +\

C(Fertilizer):C(Watering)',

data=dataframe).fit()

result = sm.stats.anova_lm(model, type=2)

# Print the result

print(result)

OUTPUT:
C(Fertilizer) 1.0 0.033333 0.033333 0.012069 0.913305

C(Watering) 1.0 0.000369 0.000369 0.000133 0.990865

C(Fertilizer):C(Watering) 1.0 0.040866 0.040866 0.014796 0.904053

Residual 28.0 77.333333 2.761905 NaNNaN


Ex No:10 BUILDING AND VALIDATING LINEAR MODELS

PROGRAM:
import matplotlib.pyplot as plt

from scipy import stats

x = [89,43,36,36,95,10,66,34,38,20,26,29,48,64,6,5,36,66,72,40]

y = [21,46,3,35,67,95,53,72,58,10,26,34,90,33,38,20,56,2,47,15]

slope, intercept, r, p, std_err = stats.linregress(x, y)

defmyfunc(x):

return slope * x + intercept

mymodel = list(map(myfunc, x))

plt.scatter(x, y)

plt.plot(x, mymodel)
plt.show()

OUTPUT:

Ex No:11 BUILDING AND VALIDATING LOGISTIC MODEL

PROGRAM:

import numpy

from sklearn import linear_model

X = numpy.array([3.78, 2.44, 2.09, 0.14, 1.72, 1.65, 4.92, 4.37, 4.96, 4.52, 3.69,
5.88]).reshape(-1,1)

y = numpy.array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1])

logr = linear_model.LogisticRegression()

logr.fit(X,y)

deflogit2prob(logr, X):

log_odds = logr.coef_ * X + logr.intercept_

odds = numpy.exp(log_odds)
probability = odds / (1 + odds)

return(probability)

print(logit2prob(logr, X))

OUTPUT:
[[0.60749955]
[0.19268876]
[0.12775886]
[0.00955221]
[0.08038616]
[0.07345637]
[0.88362743]
[0.77901378]
[0.88924409]
[0.81293497]
[0.57719129]
[0.96664243]]
Ex No:12 TIME SERIES ANALYSIS

PROGRAM:

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

# reading the dataset using read_csv

df = pd.read_csv("stock_data.csv",

parse_dates=True,

index_col="Date")

# displaying the first five rows of dataset

df.head()
# deleting column

df.drop(columns='Unnamed: 0')

df['Volume'].plot()

plt.show()

OUTPUT:

You might also like