#1) Import libraries/data set
# A)Import the required libraries and the data set
#2) Python regular expression tasks
# A)Create a list of all months in a year and print months which start with “J” using regex functions
#3) Python date time tasks
# A)Print today’s date and day using datetime functions
#4) Python math library tasks
# A)Execute any five math library functions and print the output
#5) Numpy tasks
# A)Create two arrays A=[[1,2,3],[4,5,6],[7,8,9]] and B= [[3,2,4],[5,5,5],[7,8,8]]
# B)Calculate their sum at row level and at column level and print the outputs
# C)Compare the two arrays to check if they are the same and print the result
#6) SciPy tasks
# A)Execute any five SciPy library functions and print the output
#7) Pandas tasks
# A)Import the advertising dataset and print mean, median, mode of all the columns
# B)Create correlation matrix of all the columns using pandas’ correlation function
# C)Sort the dataset in descending order by radio/sales ratio
# D)Filter out the data set where sales is greater than 15
#1) Import libraries/data set
# A)Import the required libraries and the data set
import pandas as pd
import os
# Load the dataset
# A) Import the advertising dataset and print the data frame
# Importing file from Downloads folder of MACBOOK PRO
working_directory = os.getcwd()
print(working_directory)
path=working_directory + '/Downloads/Week_6_Data Set_Graded_Assignment_6_1.xlsm'
print(path)
df_xladv=pd.read_excel(path,engine="openpyxl")
df_xladv
/Users/juhikulshreshtha
/Users/juhikulshreshtha/Downloads/Week_6_Data Set_Graded_Assignment_6_1.xlsm
TV Radio Newspaper Sales
0 230.1 37.8 69.2 22.1
1 44.5 39.3 45.1 10.4
2 17.2 45.9 69.3 12.0
3 151.5 41.3 58.5 16.5
4 180.8 10.8 58.4 17.9
... ... ... ... ...
195 38.2 3.7 13.8 7.6
196 94.2 4.9 8.1 14.0
197 177.0 9.3 6.4 14.8
198 283.6 42.0 66.2 25.5
199 232.1 8.6 8.7 18.4
200 rows × 4 columns
#2) Python regular expression tasks
# A)Create a list of all months in a year and print months which start with “J” using regex functions
import re
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July',
'August', 'September', 'October', 'November', 'December']
for m in months:
try:
result=re.search(r"\AJ",m)
if(result.start())==0:
print(m)
except AttributeError:
continue
January
June
July
#3) Python date time tasks
# A)Print today’s date and day using datetime functions
from datetime import date
from datetime import datetime,timedelta
today=date.today()
print(today)
print(today.day)
2023-09-10
10
#4) Python math library tasks
# A)Execute any five math library functions and print the output
import math
print("Sq root of 367 is:", math.sqrt(367))
print("Factorial of 21 is:", math.factorial(21))
print("7th power of 367 is:", math.pow(367,7))
print("Ceiling of 20.8 is:", math.ceil(20.8))
print("Floor of 318.3 is:", math.floor(318.3))
Sq root of 367 is: 19.157244060668017
Factorial of 21 is: 51090942171709440000
7th power of 367 is: 8.967315496113902e+17
Ceiling of 20.8 is: 21
Floor of 318.3 is: 318
#5) Numpy tasks
# A)Create two arrays A=[[1,2,3],[4,5,6],[7,8,9]] and B= [[3,2,4],[5,5,5],[7,8,8]]
# B)Calculate their sum at row level and at column level and print the outputs
# C)Compare the two arrays to check if they are the same and print the result
import numpy as np
A=[[1,2,3],[4,5,6],[7,8,9]]
B=[[3,2,4],[5,5,5],[7,8,8]]
C=np.add(A,B)
print("Row Level Addition")
print(C)
a=np.transpose(A)
b=np.transpose(B)
c=np.add(a,b)
print("Column Level Addition")
print(c)
Row Level Addition
[[ 4 4 7]
[ 9 10 11]
[14 16 17]]
Column Level Addition
[[ 4 9 14]
[ 4 10 16]
[ 7 11 17]]
#6) SciPy tasks
# A)Execute any five SciPy library functions and print the output
from scipy import stats
import numpy as np
data1=[3,5,6,8,4,5]
print("Describe Data:")
print(stats.describe(data1))
print("Cumulative Freq:")
print(stats.cumfreq(data1))
print("Harmonic Mean:")
print(stats.hmean(data1))
print("Geometric Mean:")
print(stats.gmean(data1))
print("IQR:")
print(stats.iqr(data1))
Describe Data:
DescribeResult(nobs=6, minmax=(3, 8), mean=5.166666666666667, variance=2.966666666666667, skewness=0.4954596832
389274, kurtosis=-0.5779573286201241)
Cumulative Freq:
CumfreqResult(cumcount=array([1., 1., 2., 2., 4., 5., 5., 5., 5., 6.]), lowerlimit=2.7222222222222223, binsize=
0.5555555555555554, extrapoints=0)
Harmonic Mean:
4.705882352941177
Geometric Mean:
4.93242414866094
IQR:
1.5
#7) Pandas tasks
# A)Import the advertising dataset and print mean, median, mode of all the columns
# B)Create correlation matrix of all the columns using pandas’ correlation function
# C)Sort the dataset in descending order by radio/sales ratio
# D)Filter out the data set where sales is greater than 15
import pandas as pd
import os
# Load the dataset
# A) Import the advertising dataset and print the data frame
# Importing file from Downloads folder of MACBOOK PRO
working_directory = os.getcwd()
print(working_directory)
path=working_directory + '/Downloads/Week_6_Data Set_Graded_Assignment_6_1.xlsm'
print(path)
df=pd.read_excel(path,engine="openpyxl")
#print(df)
print("\n----------- Calculate Mean -----------\n")
print(df.mean())
print("\n----------- Calculate Median -----------\n")
print(df.median())
print("\n----------- Calculate Mode -----------\n")
print(df.mode())
print("\n----------- Correlation Matrix -----------\n")
print(df.corr())
print("\n----------- Adding new column radio/sales and sorting -----------\n")
df['rs'] = df['Radio']/df['Sales']
df.assign(rs = df.Radio/df.Sales)
final_df = df.sort_values(by=['rs'], ascending=False)
final_df
print("\n----------- Filter out the data set where sales is greater than 15 -----------\n")
res_df=df[df['Sales']>15]
res_df
/Users/juhikulshreshtha
/Users/juhikulshreshtha/Downloads/Week_6_Data Set_Graded_Assignment_6_1.xlsm
----------- Calculate Mean -----------
TV 147.0425
Radio 23.2640
Newspaper 30.5540
Sales 15.1305
dtype: float64
----------- Calculate Median -----------
TV 149.75
Radio 22.90
Newspaper 25.75
Sales 16.00
dtype: float64
----------- Calculate Mode -----------
TV Radio Newspaper Sales
0 17.2 4.1 8.7 11.9
1 76.4 5.7 9.3 16.7
2 109.8 NaN 25.6 NaN
3 177.0 NaN NaN NaN
4 184.9 NaN NaN NaN
5 197.6 NaN NaN NaN
6 199.8 NaN NaN NaN
7 222.4 NaN NaN NaN
8 237.4 NaN NaN NaN
9 240.1 NaN NaN NaN
----------- Correlation Matrix -----------
TV Radio Newspaper Sales
TV 1.000000 0.054809 0.056648 0.901208
Radio 0.054809 1.000000 0.354104 0.349631
Newspaper 0.056648 0.354104 1.000000 0.157960
Sales 0.901208 0.349631 0.157960 1.000000
----------- Adding new column radio/sales and sorting -----------
----------- Filter out the data set where sales is greater than 15 -----------
TV Radio Newspaper Sales rs
0 230.1 37.8 69.2 22.1 1.710407
3 151.5 41.3 58.5 16.5 2.503030
4 180.8 10.8 58.4 17.9 0.603352
9 199.8 2.6 21.2 15.6 0.166667
11 214.7 24.0 4.0 17.4 1.379310
... ... ... ... ... ...
188 286.0 13.9 3.7 20.9 0.665072
193 166.8 42.0 3.6 19.6 2.142857
194 149.7 35.6 6.0 17.3 2.057803
198 283.6 42.0 66.2 25.5 1.647059
199 232.1 8.6 8.7 18.4 0.467391
107 rows × 5 columns
Loading [MathJax]/jax/output/CommonHTML/fonts/TeX/fontdata.js