Visualization
Python
pandas and matplotlib
line plot
multiple line plots
barplot
boxplot
heatmap
Life Expectancy and Health Expenditure
FDATA = "./files/[Link]"
OPTCHART = {1: ['Life Expectancy', 'LifeExpectancy','Number of years'], 2: ['Health
Expenditure','HealthExpenditure','Dollars/Capita Year']}
COLORS = {"LifeExpectancy": "#16acd8", "HealthExpenditure": "#4e16d8"}
COLRENAME = {'Entity':'Country'}
def menu():
strOut = ""
for key, val in [Link]():
strOut += str(key) + ": " + val[0] + "\n"
strOut += "Your choice: "
return strOut
print(menu())
df = pd.read_csv(FDATA)
df = [Link](columns=COLRENAME)
#print([Link]())
# list of countries to check user's input
clist = [Link]()
# select one Country for plotting
country = input("Country: ")
while country not in clist:
country = input("Country: ")
# select what data
opt = int(input(menu()))
while opt not in [Link]():
opt = int(input(menu()))
colname = OPTCHART[opt][1]
selcol = {}
selcol[colname] = COLORS[colname]
# select data for chart
dfsel = df[([Link] == country) & (~df[colname].isna())][['Year',colname]]
#print(dfsel)
Q1 plot one information for a selected country
pandas plot
# one line, no legend
ax = [Link](x='Year', legend=False, color=selcol)
[Link]('Year')
[Link](OPTCHART[opt][2])
[Link]("[Link]: " +OPTCHART[opt][0] + "\n" + [Link]())
#[Link]()
pandas plot - uses line
ax = [Link](x='Year', legend=False, color=selcol)
[Link]('Year')
[Link](OPTCHART[opt][2])
[Link]("[Link]: " + OPTCHART[opt][0] + "\n" + [Link]())
#[Link]()
matplotlib
fig, ax = [Link]()
[Link](dfsel['Year'], dfsel[colname], label=OPTCHART[opt][0]) # Set label for first
line
[Link]('Year')
[Link](OPTCHART[opt][2])
[Link]("matplotlib: " + OPTCHART[opt][0] + "\n" + [Link]())
[Link]() # Show legend with the specified labels
#[Link]()
Q2 plot both data
## Two different Y axes
### separate charts
dfcountry = df[([Link] == country) & (~df[colname].isna())]
fig, axes = [Link](2, 1, figsize=(8, 10))
axes[0].plot([Link], [Link])
axes[1].plot([Link], [Link])
[Link]()
fig, ax1 = [Link](figsize=(8, 8))
ax2 = [Link]()
[Link]([Link], [Link], color=COLORS['LifeExpectancy'])
[Link]([Link], [Link],
color=COLORS['HealthExpenditure'])
ax1.set_xlabel("Year")
ax1.set_ylabel(OPTCHART[1][2])
#ax1.tick_params(axis="y", labelcolor=COLOR_TEMPERATURE)
ax2.set_ylabel(OPTCHART[2][2])
#ax2.tick_params(axis="y", labelcolor=COLOR_PRICE)
[Link]()
Q3 plot several selected countries
create table with column per country and plot with single chart
#TODO : ask the user a set of countries
countries = ['Italy','France']
dftab = df[([Link](countries)) & (~df[colname].isna())]
[['Country','Year',colname]].pivot(index='Year', columns='Country', values=colname)
ax = [Link](kind='line')
[Link](OPTCHART[opt][2])
#[Link](OPTCHART[opt][0] + "\n(countries: " + ", ".join(countries).strip() + ")")
[Link]("[Link] - single chart: " + OPTCHART[opt][0])
[Link]()
one chart per country
#dfsel = df[([Link](countries)) & (~df[colname].isna())]
[['Country','Year',colname]].pivot(index='Year', columns='Country', values=colname)
#print(dfsel)
ax = [Link](kind='line', subplots=True, title="[Link] - multiple chart: " +
OPTCHART[opt][0])
[Link](OPTCHART[opt][2])
#it appears on the last subplot
#[Link]("[Link] - multiple chart: " + OPTCHART[opt][0])
[Link]()
matplotlib
# single char
fig, axes = [Link]()
for i, country in enumerate(countries):
[Link]([Link], dftab[country], label=country) # Plot each entity
[Link]('Year')
[Link](OPTCHART[opt][2])
[Link]("matplotlib - single chart: " + OPTCHART[opt][0])
[Link](title='Country')
[Link]()
one chart per country
COLORS = ['#00202e', '#003f5c', '#2c4875', '#8a508f', '#bc5090', '#ff6361',
'#ff8531', '#ffa600']
ncountries = len(countries)
fig, axes = [Link](ncountries, 1, figsize=(8, 6 * ncountries))
for i, country in enumerate([Link]):
axes[i].plot([Link], dftab[country], color=COLORS[i], legend=False)
axes[i].set_title(f'{country} Life Expectancy Over Years')
axes[i].set_xlabel('Year')
axes[i].set_ylabel(OPTCHART[opt][2])
plt.tight_layout()
[Link]()
Q4 boxplot
selcountries = ['France','Germany','Italy']
[Link]('Year').agg({'LifeExpectancy':'mean'})
dfcs = df[(df['Year']>2000) & (df['Country'].isin(selcountries))]
[['Country','Year','LifeExpectancy','HealthExpenditure']]
#[Link]('Country')[['LifeExpectancy']].boxplot()
ax = [Link]('Country')[['LifeExpectancy']].boxplot(subplots=False)
ax.set_xticklabels(selcountries)
Passwords
FDATA = "./files/[Link]"
FCAT = "./files/[Link]"
TIMECONV = {'seconds': 1/3600,
'minutes': 1/60,
'hours': 1,
'days': 24,
'weeks': 168,
'months': 720,
'years': 8760}
dfp = pd.read_csv(FDATA)
dfc = pd.read_csv(FCAT)
[Link]()
[Link]()
dfp['online_hours'] = dfp['value']*dfp['time_unit'].map(TIMECONV)
dfp['online_hours'] = dfp['online_hours'].astype(int)
#dfp[dfp['online_hours'].isna()]
# number of passwords per category and average online breaking time
dfg = dfp[['catid','online_hours']].groupby(['catid']).agg({'catid': 'size',
'online_hours': 'mean'})
[Link](columns={'catid': 'count'}, inplace=True)
dfg = dfg.reset_index()
dfres = [Link](dfc, how='right', left_on='catid', right_on='id')
#print(dfres)
Q1 pandas number of passwords per category, showing the name
[Link](kind='bar', x='category', y='count')
[Link]('number of passwords')
[Link]('Plot on grouped - flat data')
[Link]()
pandas let it compute
dfpc = [Link](dfc, how='right', left_on='catid', right_on='id')
dfpc[['category']].value_counts().plot(kind='bar', xlabel='Category', ylabel='Count',
rot=90)
[Link]('Plot value count')
[Link]()
#print(dfpc)
matplotlib
fig, ax = [Link]()
[Link](dfres['category'], dfres['online_hours'], width=1, edgecolor="white",
linewidth=0.7)
###
ncat = dfres['category'].nunique()
ax.set_xlim(-1,ncat)
###
[Link](rotation=90)
[Link]('Matplot on grouped')
[Link]()
Q2 show average times
dfpc[dfpc.online_hours < 10000].boxplot(column=['online_hours'], by='category',
grid=False, color='black', rot=90)
histogram
dfpc[['strength']].plot(kind='hist')