10-regplot
August 13, 2024
1 Seaborn: regplot
[1]: import seaborn as sns
from matplotlib import pyplot as plt
[2]: diamonds = sns.load_dataset('diamonds')
[Link]
[2]: (53940, 10)
[3]: [Link]()
[3]: carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
[4]: diamonds = [Link](n=200, random_state=44)
[Link]
[4]: (200, 10)
1.1 Intro Visuals
[ ]: import numpy as np
[ ]: sns.set_style('white')
[Link]('xtick', labelsize=14)
[Link]('ytick', labelsize=14)
[ ]: blue, orange, green, red = sns.color_palette()[:4]
1
[ ]: x_vals = [Link](100)*5
y_vals_posTwo = x_vals*2 + [Link](100)*4
y_vals_posHalf = x_vals*0.5 + [Link](100)*3 + 2
y_vals_negOne = x_vals*(-1) + [Link](100)*3 + 6
[ ]: [Link](figsize=(3, 5))
[Link](x_vals, y_vals_posTwo, scatter_kws={'alpha': 0.4}, line_kws={'lw':␣
↪4})
[Link]()
[Link](0, 5)
[Link](0, 12)
plt.tight_layout();
[ ]: [Link](figsize=(3, 5))
[Link](x_vals, y_vals_posHalf, scatter_kws={'alpha': 0.4}, line_kws={'lw':␣
↪4})
[Link]()
[Link](0, 5)
[Link](0, 12)
plt.tight_layout();
[ ]: [Link](figsize=(3, 5))
[Link](x_vals, y_vals_negOne, scatter_kws={'alpha': 0.4}, line_kws={'lw':␣
↪4})
[Link]()
[Link](0, 5)
[Link](0, 12)
plt.tight_layout();
[ ]: [Link](figsize=(3, 5))
[Link]().set(xlim=(0, 5))
[Link](x_vals, y_vals_posTwo, scatter=False, ci=None, line_kws={'lw': 4})
[Link]()
[Link](0, 5)
[Link](0, 12)
plt.tight_layout();
[ ]: [Link](figsize=(3, 5))
[Link]().set(xlim=(0, 5))
[Link](x_vals, y_vals_posHalf, scatter=False, ci=None, line_kws={'lw': 4})
[Link]()
[Link](0, 5)
[Link](0, 12)
plt.tight_layout();
2
[ ]: [Link](figsize=(3, 5))
[Link]().set(xlim=(0, 5))
[Link](x_vals, y_vals_negOne, scatter=False, ci=None, line_kws={'lw': 4})
[Link]()
[Link](0, 5)
[Link](0, 12)
plt.tight_layout();
[ ]: [Link](figsize=(3, 5))
[Link]().set(xlim=(0, 5))
[Link](x_vals, y_vals_posHalf, fit_reg=False)
[Link]()
[Link](0, 5)
[Link](0, 12)
plt.tight_layout();
[ ]: [Link](figsize=(3, 5))
[Link]().set(xlim=(0, 5))
[Link](x_vals, y_vals_posTwo, fit_reg=False)
[Link]()
[Link](0, 5)
[Link](0, 12)
plt.tight_layout();
[ ]: [Link](figsize=(3, 5))
[Link]().set(xlim=(0, 5))
[Link](x_vals, y_vals_negOne, fit_reg=False)
[Link]()
[Link](0, 5)
[Link](0, 12)
plt.tight_layout();
[ ]: [Link]('xtick', labelsize=10)
[Link]('ytick', labelsize=10)
1.2 Basics
[5]: sns.set_style('dark')
[6]: [Link]([Link], [Link]);
3
[7]: [Link](x='carat', y='price', data=diamonds);
4
[8]: [Link](x='carat', y='price', data=diamonds, fit_reg=False);
[9]: [Link]().set(xlim=(0,2.6))
[Link](x='carat', y='price', data=diamonds, scatter=False);
5
1.3 regplot Options
1.3.1 Confidence Intervals
[10]: [Link](x='carat', y='price', data=diamonds,
ci=None
);
1.3.2 Discrete Variables
[11]: cut_map = {
'Fair': 1,
'Good': 2,
'Very Good': 3,
'Premium': 4,
'Ideal': 5
}
diamonds['cut_value'] = [Link](cut_map)
[12]: diamonds.cut_value.value_counts()
6
[12]: 5 80
4 63
3 39
2 11
1 7
Name: cut_value, dtype: int64
Jitter
[13]: [Link](x='cut_value', y='price', data=diamonds,
x_jitter=0.1
);
Estimator Aggregate
[14]: import numpy as np
[15]: [Link](x='cut_value', y='price', data=diamonds,
x_estimator=[Link]
);
7
1.3.3 Models
Polynomial Regression (order)
[16]: [Link](x='carat', y='price', data=diamonds,
fit_reg=False
);
8
[17]: [Link](x='carat', y='price', data=diamonds,
order=2
);
9
Robust Regression
[18]: x_example=[0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5]
y_example=[0.1, 0.8, 2.2, 2.7, 3.8, 4.5, 6.2, 6.8, 7.9, 9.4, 30.4]
[19]: [Link](x=x_example,
y=y_example,
ci=None
);
[20]: [Link](x=x_example,
y=y_example,
ci=None,
robust=True
);
10
1.4 Styling
[21]: sns.set_style('white')
1.4.1 marker
[22]: [Link](x='carat', y='price', data=diamonds,
marker='d'
);
11
1.4.2 scatter_kws
[23]: [Link](x='carat', y='price', data=diamonds,
scatter_kws={'s': 100, 'alpha': 0.5, 'color': 'lightgray'}
);
12
1.4.3 line_kws
[24]: [Link](x='carat', y='price', data=diamonds,
ci=None,
line_kws={'lw': 4, 'color': 'black', 'linestyle': '-.'}
);
13
1.5 Related Seaborn Plots
[26]: blue, orange, green, red = sns.color_palette()[:4]
[27]: p = [Link](x='carat', y='price',
data=diamonds[[Link](['E', 'J'])],
hue='color',
order=2,
palette=[green, orange])
[Link]('Carat', fontsize=18)
[Link]('Price', fontsize=18)
p._legend.remove()
[Link](fontsize=16)
[Link]([])
[Link]([])
plt.tight_layout();
14
[28]: [Link](x='carat', y='price', data=diamonds,
kind='reg',
color='purple')
[Link]('')
[Link]('')
[Link](-0.1, None)
[Link](-2000, None)
plt.tight_layout();
15
[29]: [Link](diamonds[['carat', 'depth', 'price']],
kind='reg',
palette='colorblind')
plt.tight_layout();
16
[ ]:
17