bcj0uik9x
February 5, 2023
[77]: import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
[78]: df = pd.read_csv("dm_office_sales.csv")
[79]: df.head()
[79]: division level of education training level work experience \
0 printers some college 2 6
1 printers associate's degree 2 10
2 peripherals high school 0 9
3 office supplies associate's degree 2 5
4 office supplies high school 1 5
salary sales
0 91684 372302
1 119679 495660
2 82045 320453
3 92949 377148
4 71280 312802
[80]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 division 1000 non-null object
1 level of education 1000 non-null object
2 training level 1000 non-null int64
3 work experience 1000 non-null int64
4 salary 1000 non-null int64
5 sales 1000 non-null int64
dtypes: int64(4), object(2)
memory usage: 47.0+ KB
1
Scatterplot
[81]: sns.scatterplot(x='salary',y='sales',data=df)
[81]: <AxesSubplot:xlabel='salary', ylabel='sales'>
[82]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df)
[82]: <AxesSubplot:xlabel='salary', ylabel='sales'>
2
[83]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,hue='division')
[83]: <AxesSubplot:xlabel='salary', ylabel='sales'>
3
[84]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,hue='work experience')
[84]: <AxesSubplot:xlabel='salary', ylabel='sales'>
4
[85]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,hue='work␣
↪experience',palette='viridis')
[85]: <AxesSubplot:xlabel='salary', ylabel='sales'>
5
[86]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,size='work experience')
[86]: <AxesSubplot:xlabel='salary', ylabel='sales'>
6
[87]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,s=200)
[87]: <AxesSubplot:xlabel='salary', ylabel='sales'>
7
[88]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,s=200,linewidth=0,alpha=0.2)
[88]: <AxesSubplot:xlabel='salary', ylabel='sales'>
8
[89]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,style='level of education')
[89]: <AxesSubplot:xlabel='salary', ylabel='sales'>
9
[90]: plt.figure(figsize=(12,8))
# Sometimes its nice to do BOTH hue and style off the same column
sns.scatterplot(x='salary',y='sales',data=df,style='level of␣
↪education',hue='level of education',s=100)
[90]: <AxesSubplot:xlabel='salary', ylabel='sales'>
10
[91]: plt.figure(figsize=(12,8))
sns.scatterplot(x='salary',y='sales',data=df,style='level of␣
↪education',hue='level of education',s=100)
# Call savefig in the same cell
plt.savefig('example_scatter.jpg')
11
[92]: # The y axis doesn't really represent anything
# X axis is just a stick per data point
sns.rugplot(x='salary',data=df)
[92]: <AxesSubplot:xlabel='salary'>
12
[93]: sns.rugplot(x='salary',data=df,height=0.5)
[93]: <AxesSubplot:xlabel='salary'>
13
[96]: sns.displot(data=df,x='salary',kde=True)
[96]: <seaborn.axisgrid.FacetGrid at 0x2594c988490>
14
[97]: sns.displot(data=df,x='salary')
[97]: <seaborn.axisgrid.FacetGrid at 0x2594f44ec10>
15
[98]: sns.histplot(data=df,x='salary')
[98]: <AxesSubplot:xlabel='salary', ylabel='Count'>
16
[99]: sns.histplot(data=df,x='salary',bins=10)
[99]: <AxesSubplot:xlabel='salary', ylabel='Count'>
17
[100]: sns.histplot(data=df,x='salary',bins=100)
[100]: <AxesSubplot:xlabel='salary', ylabel='Count'>
18
[101]: sns.set(style='darkgrid')
sns.histplot(data=df,x='salary',bins=100)
[101]: <AxesSubplot:xlabel='salary', ylabel='Count'>
19
[102]: sns.set(style='white')
sns.histplot(data=df,x='salary',bins=100)
[102]: <AxesSubplot:xlabel='salary', ylabel='Count'>
20
[103]: sns.displot(data=df,x='salary',bins=20,kde=False,
color='red',edgecolor='black',lw=4,ls='--')
[103]: <seaborn.axisgrid.FacetGrid at 0x2594ec27430>
21
[104]: plt.figure(figsize=(10,4),dpi=200)
sns.countplot(x='division',data=df)
[104]: <AxesSubplot:xlabel='division', ylabel='count'>
22
[105]: plt.figure(figsize=(10,4),dpi=200)
sns.countplot(x='level of education',data=df)
[105]: <AxesSubplot:xlabel='level of education', ylabel='count'>
[106]: plt.figure(figsize=(10,4),dpi=200)
sns.countplot(x='level of education',data=df,hue='training level')
[106]: <AxesSubplot:xlabel='level of education', ylabel='count'>
23
[107]: plt.figure(figsize=(10,4),dpi=200)
sns.countplot(x='level of education',data=df,hue='training␣
↪level',palette='Set1')
[107]: <AxesSubplot:xlabel='level of education', ylabel='count'>
[108]: import numpy as np
plt.figure(figsize=(10,6),dpi=200)
# By default barplot() will show the mean
# Information on the black bar: https://stackoverflow.com/questions/58362473/
↪what-does-black-lines-on-a-seaborn-barplot-mean
sns.barplot(x='level of education',y='salary',data=df,estimator=np.mean,ci='sd')
[108]: <AxesSubplot:xlabel='level of education', ylabel='salary'>
24
[109]: plt.figure(figsize=(12,6))
sns.barplot(x='level of education',y='salary',data=df,estimator=np.
↪mean,ci='sd',hue='division')
[109]: <AxesSubplot:xlabel='level of education', ylabel='salary'>
25
[110]: import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
[111]: df = pd.read_csv("StudentsPerformance.csv")
[112]: df.head()
[112]: gender race/ethnicity parental level of education lunch \
0 female group B bachelor's degree standard
1 female group C some college standard
2 female group B master's degree standard
3 male group A associate's degree free/reduced
4 male group C some college standard
test preparation course math score reading score writing score
0 none 72 72 74
1 completed 69 90 88
2 none 90 95 93
3 none 47 57 44
4 none 76 78 75
[113]: plt.figure(figsize=(12,6))
sns.boxplot(x='parental level of education',y='math score',data=df)
[113]: <AxesSubplot:xlabel='parental level of education', ylabel='math score'>
26
[ ]:
27