0% found this document useful (0 votes)

76 views9 pages

Numpy

Uploaded by

Anh Thư Trần Võ

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

76 views9 pages

Numpy

Uploaded by

Anh Thư Trần Võ

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 9

DATA MANIPULATION WITH PANDAS homelessness_ind = homelessness.

sort_values("individuals")
1) Inspecting a DataFrame # Print the top few rows
print(homelessness_ind.head())
# edited/added
# Sort homelessness by descending family members
import pandas as pd
homelessness_fam = homelessness.sort_values("family_members",
homelessness = pd.read_csv('homelessness.csv', index_col=0)
ascending=False)
# Print the head of the homelessness data
# Print the top few rows
print(homelessness.head())
print(homelessness_fam.head())
# Print information about homelessness
# Sort homelessness by region, then descending family members
print(homelessness.info())
homelessness_reg_fam = homelessness.sort_values(["region", "family_members"],
# Print the shape of homelessness ascending=[True, False])
print(homelessness.shape) # Print the top few rows
# Print a description of homelessness print(homelessness_reg_fam.head())
print(homelessness.describe())
4) Subsetting columns
2) Parts of a DataFrame
# Select the individuals column
# Import pandas using the alias pd individuals = homelessness["individuals"]
import pandas as pd # Print the head of the result
# Print the values of homelessness print(individuals.head())
print(homelessness.values) # Select the state and family_members columns
# Print the column index of homelessness state_fam = homelessness[["state", "family_members"]]
print(homelessness.columns) # Print the head of the result
# Print the row index of homelessness print(state_fam.head())
print(homelessness.index) # Select only the individuals and state columns, in that order
ind_state = homelessness[["individuals", "state"]]
3) Sorting rows
# Print the head of the result
# Sort homelessness by individuals print(ind_state.head())
Subsetting rows # Add total col as sum of individuals and family_members
# Filter for rows where individuals is greater than 10000 homelessness["total"] = homelessness["individuals"] +
homelessness["family_members"]
ind_gt_10k = homelessness[homelessness["individuals"] > 10000]
# Add p_individuals col as proportion of total that are individuals
# See the result
homelessness["p_individuals"] = homelessness["individuals"] /
print(ind_gt_10k)
homelessness["total"]
# Filter for rows where region is Mountain
# See the result
mountain_reg = homelessness[homelessness["region"] == "Mountain"]
print(homelessness)
# See the result
print(mountain_reg) Combo-attack!
# Filter for rows where family_members is less than 1000 # and region is Pacific # Create indiv_per_10k col as homeless individuals per 10k state pop
fam_lt_1k_pac = homelessness[(homelessness["family_members"] < 1000) & homelessness["indiv_per_10k"] = 10000 * homelessness["individuals"] /
(homelessness["region"] == "Pacific")] homelessness["state_pop"]
# See the result # Subset rows for indiv_per_10k greater than 20
print(fam_lt_1k_pac) high_homelessness = homelessness[homelessness["indiv_per_10k"] > 20]
# Sort high_homelessness by descending indiv_per_10k
Subsetting rows by categorical variables
high_homelessness_srt = high_homelessness.sort_values("indiv_per_10k",
# Subset for rows in South Atlantic or Mid-Atlantic regions ascending=False)
south_mid_atlantic = homelessness[(homelessness["region"] == "South Atlantic") | # From high_homelessness_srt, select the state and indiv_per_10k cols
(homelessness["region"] == "Mid-Atlantic")]
result = high_homelessness_srt[["state", "indiv_per_10k"]]
# See the result
# See the result
print(south_mid_atlantic)
print(result)
# The Mojave Desert states
canu = ["California", "Arizona", "Nevada", "Utah"] Mean and median
# Filter for rows in the Mojave Desert states # edited/added
mojave_homelessness = homelessness[homelessness["state"].isin(canu)] sales = pd.read_csv('sales_subset.csv', index_col=0)
# See the result # Print the head of the sales DataFrame
print(mojave_homelessness) print(sales.head())

Adding new columns

# Print the info about the sales DataFrame print(sales[["temperature_c", "fuel_price_usd_per_l", "unemployment"]].agg([iqr,
np.median]))
print(sales.info())
# Print the mean of weekly_sales Cumulative statistics
print(sales["weekly_sales"].mean())
# edited/added
# Print the median of weekly_sales
sales_1_1 = sales[(sales["department"] == 1) & (sales["store"] == 1)]
print(sales["weekly_sales"].median())
# Sort sales_1_1 by date
Summarizing dates sales_1_1 = sales_1_1.sort_values("date")
# Get the cumulative sum of weekly_sales, add as cum_weekly_sales col
# Print the maximum of the date column
sales_1_1["cum_weekly_sales"] = sales_1_1["weekly_sales"].cumsum()
print(sales["date"].max())
# Get the cumulative max of weekly_sales, add as cum_max_sales col
# Print the minimum of the date column
sales_1_1["cum_max_sales"] = sales_1_1["weekly_sales"].cummax()
print(sales["date"].min())
# See the columns you calculated
Efficient summaries print(sales_1_1[["date", "weekly_sales", "cum_weekly_sales", "cum_max_sales"]])

# A custom IQR functiondef iqr(column):

Dropping duplicates
return column.quantile(0.75) - column.quantile(0.25)
# Print IQR of the temperature_c column  Remove rows of sales with duplicate pairs of store and type and save
as store_types and print the head.
print(sales["temperature_c"].agg(iqr))  Remove rows of sales with duplicate pairs of store and department and save
# A custom IQR functiondef iqr(column): as store_depts and print the head.
return column.quantile(0.75) - column.quantile(0.25)
# Drop duplicate store/type combinations
# Update to print IQR of temperature_c, fuel_price_usd_per_l, & unemployment
store_types = sales.drop_duplicates(subset=["store", "type"])
print(sales[["temperature_c", "fuel_price_usd_per_l", "unemployment"]].agg(iqr))
print(store_types.head())
# Import NumPy and create custom IQR functionimport numpy as npdef
iqr(column): # Drop duplicate store/department combinations
return column.quantile(0.75) - column.quantile(0.25) store_depts = sales.drop_duplicates(subset=["store", "department"])
# Update to print IQR and median of temperature_c, fuel_price_usd_per_l, & print(store_depts.head())
unemployment # Subset the rows where is_holiday is True and drop duplicate dates
holiday_dates = sales[sales["is_holiday"]].drop_duplicates(subset="date") sales_C = sales[sales["type"] == "C"]["weekly_sales"].sum()
# Print date col of holiday_dates # Get proportion for each type
print(holiday_dates["date"]) sales_propn_by_type = [sales_A, sales_B, sales_C] / sales_all
print(sales_propn_by_type)
Counting categorical variables
Calculations with .groupby()
# Count the number of stores of each type
store_counts = store_types["type"].value_counts() # Group by type; calc total weekly sales
print(store_counts) sales_by_type = sales.groupby("type")["weekly_sales"].sum()
# Get the proportion of stores of each type # Get proportion for each type
store_props = store_types["type"].value_counts(normalize=True) sales_propn_by_type = sales_by_type / sum(sales_by_type)
print(store_props) print(sales_propn_by_type)
# Count the number of each department number and sort # Group by type and is_holiday; calc total weekly sales
dept_counts_sorted = store_depts["department"].value_counts(sort=True) sales_by_type_is_holiday = sales.groupby(["type", "is_holiday"])
["weekly_sales"].sum()
print(dept_counts_sorted)
print(sales_by_type_is_holiday)
# Get the proportion of departments of each number and sort
dept_props_sorted = store_depts["department"].value_counts(sort=True, Multiple grouped summaries
normalize=True)
print(dept_props_sorted) # Import numpy with the alias npimport numpy as np
# For each store type, aggregate weekly_sales: get min, max, mean, and median
What percent of sales occurred at each store type? sales_stats = sales.groupby("type")["weekly_sales"].agg([np.min, np.max, np.mean,
np.median])
# Calc total weekly sales
# Print sales_stats
sales_all = sales["weekly_sales"].sum()
print(sales_stats)
# Subset for type A stores, calc total weekly sales
# For each store type, aggregate unemployment and fuel_price_usd_per_l: get min,
sales_A = sales[sales["type"] == "A"]["weekly_sales"].sum()
max, mean, and median
# Subset for type B stores, calc total weekly sales
unemp_fuel_stats = sales.groupby("type")[["unemployment",
sales_B = sales[sales["type"] == "B"]["weekly_sales"].sum() "fuel_price_usd_per_l"]].agg([np.min, np.max, np.mean, np.median])
# Subset for type C stores, calc total weekly sales # Print unemp_fuel_stats
print(unemp_fuel_stats) # edited/added
temperatures = pd.read_csv('temperatures.csv', index_col=0)
Pivoting on one variable
temperatures['date'] = pd.to_datetime(temperatures['date'],
# Pivot for mean weekly_sales for each store type infer_datetime_format=True)

mean_sales_by_type = sales.pivot_table(values="weekly_sales", index="type") # Look at temperatures

# Print mean_sales_by_type print(temperatures)

print(mean_sales_by_type) # Set the index of temperatures to city

# Import NumPy as npimport numpy as np temperatures_ind = temperatures.set_index("city")

# Pivot for mean and median weekly_sales for each store type # Look at temperatures_ind

mean_med_sales_by_type = sales.pivot_table("weekly_sales", "type", aggfunc = print(temperatures_ind)

[np.mean, np.median]) # Reset the temperatures_ind index, keeping its contents
# Print mean_med_sales_by_type print(temperatures_ind.reset_index())
print(mean_med_sales_by_type) # Reset the temperatures_ind index, dropping its contents
# Pivot for mean weekly_sales by store type and holiday print(temperatures_ind.reset_index(drop=True))
mean_sales_by_type_holiday = sales.pivot_table("weekly_sales", "type",
"is_holiday") Subsetting with .loc[]
# Print mean_sales_by_type_holiday
# Make a list of cities to subset on
print(mean_sales_by_type_holiday)
cities = ["Moscow", "Saint Petersburg"]
# Subset temperatures using square brackets
Fill in missing values and sum values with pivot tables
print(temperatures[temperatures["city"].isin(cities)])
# Print mean weekly_sales by department and type; fill missing values with 0
# Subset temperatures_ind using .loc[]
print(sales.pivot_table(values="weekly_sales", index="department",
columns="type", fill_value=0)) print(temperatures_ind.loc[cities])

# Print the mean weekly_sales by department and type; fill missing values with 0s;
Setting multi-level indexes
sum all rows and cols
print(sales.pivot_table(values="weekly_sales", index="department", # Index temperatures by country & city
columns="type", fill_value=0, margins =True)) temperatures_ind = temperatures.set_index(["country", "city"])
# List of tuples: Brazil, Rio De Janeiro & Pakistan, Lahore
Setting and removing indexes
rows_to_keep = [("Brazil", "Rio De Janeiro"), ("Pakistan", "Lahore")] print(temperatures_srt.loc[:, "date":"avg_temp_c"])
# Subset for rows to keep # Subset in both directions at once
print(temperatures_ind.loc[rows_to_keep]) print(temperatures_srt.loc[("India", "Hyderabad"):("Iraq", "Baghdad"),
"date":"avg_temp_c"])
Sorting by index values
Slicing time series
# Sort temperatures_ind by index values
# Use Boolean conditions to subset temperatures for rows in 2010 and 2011
print(temperatures_ind.sort_index())
temperatures_bool = temperatures[(temperatures["date"] >= "2010-01-01") &
# Sort temperatures_ind by index values at the city level
(temperatures["date"] <= "2011-12-31")]
print(temperatures_ind.sort_index(level="city"))
print(temperatures_bool)
# Sort temperatures_ind by country then descending city
# Set date as the index and sort the index
print(temperatures_ind.sort_index(level=["country", "city"], ascending = [True,
temperatures_ind = temperatures.set_index("date").sort_index()
False]))
# Use .loc[] to subset temperatures_ind for rows in 2010 and 2011
Slicing index values print(temperatures_ind.loc["2010":"2011"])

# Sort the index of temperatures_ind # Use .loc[] to subset temperatures_ind for rows from Aug 2010 to Feb 2011

temperatures_srt = temperatures_ind.sort_index() print(temperatures_ind.loc["2010-08":"2011-02"])

# Subset rows from Pakistan to Russia

Subsetting by row/column number
print(temperatures_srt.loc["Pakistan":"Russia"])
# Get 23rd row, 2nd column (index 22, 1)
# Try to subset rows from Lahore to Moscow
print(temperatures.iloc[22, 1])
print(temperatures_srt.loc["Lahore":"Moscow"])
# Use slicing to get the first 5 rows
# Subset rows from Pakistan, Lahore to Russia, Moscow
print(temperatures.iloc[:5])
print(temperatures_srt.loc[("Pakistan", "Lahore"):("Russia", "Moscow")])
# Use slicing to get columns 3 to 4
Slicing in both directions print(temperatures.iloc[:, 2:4])

# Subset rows from India, Hyderabad to Iraq, Baghdad # Use slicing in both directions at once

print(temperatures_srt.loc[("India", "Hyderabad"):("Iraq", "Baghdad")]) print(temperatures.iloc[:5, 2:4])

# Subset columns from date to avg_temp_c

Pivot temperature by city and year
# Add a year column to temperatures # edited/added
temperatures["year"] = temperatures["date"].dt.year import urllib.requestimport pickle
# Pivot avg_temp_c by country and city vs year avocados = pd.read_pickle("avoplotto.pkl")
temp_by_country_city_vs_year = temperatures.pivot_table("avg_temp_c", index = # Import matplotlib.pyplot with alias pltimport matplotlib.pyplot as plt
["country", "city"], columns = "year")
# Look at the first few rows of data
# See the result
print(avocados.head())
print(temp_by_country_city_vs_year)
# Get the total number of avocados sold of each size
nb_sold_by_size = avocados.groupby("size")["nb_sold"].sum()
Subsetting pivot tables
# Create a bar plot of the number of avocados sold by size
# Subset for Egypt to India
nb_sold_by_size.plot(kind="bar")
temp_by_country_city_vs_year.loc["Egypt":"India"]
# Show the plot
# Subset for Egypt, Cairo to India, Delhi
plt.show()
temp_by_country_city_vs_year.loc[("Egypt", "Cairo"):("India", "Delhi")]
# Subset for Egypt, Cairo to India, Delhi, and 2005 to 2010 Changes in sales over time
temp_by_country_city_vs_year.loc[("Egypt", "Cairo"):("India", "Delhi"),
# Import matplotlib.pyplot with alias pltimport matplotlib.pyplot as plt
"2005":"2010"]
# Get the total number of avocados sold on each date
Calculating on a pivot table nb_sold_by_date = avocados.groupby("date")["nb_sold"].sum()

# Get the worldwide mean temp by year # Create a line plot of the number of avocados sold by date

mean_temp_by_year = temp_by_country_city_vs_year.mean() nb_sold_by_date.plot(kind="line")

# Filter for the year that had the highest mean temp # Show the plot

print(mean_temp_by_year[mean_temp_by_year == mean_temp_by_year.max()]) plt.show()

# Get the mean temp by city

Avocado supply and demand
mean_temp_by_city = temp_by_country_city_vs_year.mean(axis="columns")
# Scatter plot of avg_price vs. nb_sold with title
# Filter for the city that had the lowest mean temp
avocados.plot(x="nb_sold", y="avg_price", kind="scatter", title="Number of
print(mean_temp_by_city[mean_temp_by_city == mean_temp_by_city.min()])
avocados sold vs. average price")
# Show the plot
Which avocado size is most popular?
plt.show() plt.show()

Price of conventional vs. organic avocados Finding missing values

# Histogram of conventional avg_price # edited/added

avocados[avocados["type"] == "conventional"]["avg_price"].hist() avocados_2016 = pd.read_csv('avocados_2016.csv')
# Histogram of organic avg_price cols_with_missing = ['small_sold', 'large_sold', 'xl_sold']
avocados[avocados["type"] == "organic"]["avg_price"].hist() # Import matplotlib.pyplot with alias pltimport matplotlib.pyplot as plt
# Add a legend # Check individual values for missing values
plt.legend(["conventional", "organic"]) print(avocados_2016.isna())
# Show the plot # Check each column for missing values
plt.show() print(avocados_2016.isna().any())
# Modify histogram transparency to 0.5 # Bar plot of missing values by variable
avocados[avocados["type"] == "conventional"]["avg_price"].hist(alpha=0.5) avocados_2016.isna().sum().plot(kind="bar")
# Modify histogram transparency to 0.5 # Show plot
avocados[avocados["type"] == "organic"]["avg_price"].hist(alpha=0.5) plt.show()
# Add a legend
Removing missing values
plt.legend(["conventional", "organic"])
# Show the plot # Remove rows with missing values
plt.show() avocados_complete = avocados_2016.dropna()
# Modify bins to 20 # Check if any columns contain missing values
avocados[avocados["type"] == "conventional"]["avg_price"].hist(bins=20, print(avocados_complete.isna().any())
alpha=0.5)
# Modify bins to 20 Replacing missing values
avocados[avocados["type"] == "organic"]["avg_price"].hist(bins= 20, alpha=0.5) # List the columns with missing values
# Add a legend cols_with_missing = ["small_sold", "large_sold", "xl_sold"]
plt.legend(["conventional", "organic"]) # Create histograms showing the distributions cols_with_missing
# Show the plot avocados_2016[cols_with_missing].hist()
# Show the plot # Read CSV as DataFrame called airline_bumping
plt.show() airline_bumping = pd.read_csv('airline_bumping.csv') # edited/added
# Take a look at the DataFrame
List of dictionaries
print(airline_bumping.head())
# Create a list of dictionaries with new data # For each airline, select nb_bumped and total_passengers and sum
avocados_list = [ airline_totals = airline_bumping.groupby("airline")[["nb_bumped",
{"date": "2019-11-03", "small_sold": 10376832, "large_sold": 7835071}, "total_passengers"]].sum()

{"date": "2019-11-10", "small_sold": 10717154, "large_sold": 8561348}, # Create new col, bumps_per_10k: no. of bumps per 10k passengers for each airline

] airline_totals["bumps_per_10k"] = airline_totals["nb_bumped"] /
airline_totals["total_passengers"] * 10000
# Convert list into DataFrame
# Print airline_totals
avocados_2019 = pd.DataFrame(avocados_list)
print(airline_totals)
# Print the new DataFrame
# Print airline_totals
print(avocados_2019)
print(airline_totals)
Dictionary of lists
DataFrame to CSV
# Create a dictionary of lists with new data
# Create airline_totals_sorted
avocados_dict = {
airline_totals_sorted = airline_totals.sort_values("bumps_per_10k",
"date": ["2019-11-17", "2019-12-01"], ascending=False)
"small_sold": [10859987, 9291631], # Print airline_totals_sorted
"large_sold": [7674135, 6238096] print(airline_totals_sorted)
} # Save as airline_totals_sorted.csv
# Convert dictionary into DataFrame airline_totals_sorted.to_csv("airline_totals_sorted.csv")
avocados_2019 = pd.DataFrame(avocados_dict)
# Print the new DataFrame
print(avocados_2019)

CSV to DataFrame

Pandas for Data Analysis Enthusiasts
No ratings yet
Pandas for Data Analysis Enthusiasts
9 pages
Data Manipulation With Pandas - Yulei's Sandbox
No ratings yet
Data Manipulation With Pandas - Yulei's Sandbox
18 pages
Python Cheatsheet
No ratings yet
Python Cheatsheet
2 pages
Major Terror Attacks in India Analysis
No ratings yet
Major Terror Attacks in India Analysis
6 pages
Lab Record IP
No ratings yet
Lab Record IP
13 pages
Pandas PDF
No ratings yet
Pandas PDF
6 pages
Pandas Notes
No ratings yet
Pandas Notes
4 pages
Pandas
No ratings yet
Pandas
5 pages
Python Data Structures and Libraries Guide
No ratings yet
Python Data Structures and Libraries Guide
7 pages
Project Intern - Jupyter Notebook
No ratings yet
Project Intern - Jupyter Notebook
16 pages
Content Pandas Cheat Sheet
No ratings yet
Content Pandas Cheat Sheet
9 pages
Pandas Syntax Revision For ML
No ratings yet
Pandas Syntax Revision For ML
10 pages
Real Estate
No ratings yet
Real Estate
10 pages
12 IP Practial Programs 2025-26
No ratings yet
12 IP Practial Programs 2025-26
10 pages
Experiment No 11
No ratings yet
Experiment No 11
19 pages
Python Interviews
No ratings yet
Python Interviews
154 pages
Practical - With Solution - XII - IP
No ratings yet
Practical - With Solution - XII - IP
13 pages
Texas Transportation Code Overview
No ratings yet
Texas Transportation Code Overview
2 pages
Untitled 5
No ratings yet
Untitled 5
10 pages
Pandas For Python Pro Level Cheat Sheet
No ratings yet
Pandas For Python Pro Level Cheat Sheet
14 pages
Data Analysis Tools
No ratings yet
Data Analysis Tools
26 pages
Python Pandas: 12 Data Manipulation Techniques
100% (2)
Python Pandas: 12 Data Manipulation Techniques
19 pages
Creation of Series Using List, Dictionary & Ndarray
No ratings yet
Creation of Series Using List, Dictionary & Ndarray
65 pages
Class XII-IP-Practical File 1
No ratings yet
Class XII-IP-Practical File 1
28 pages
Python Data Science: Pandas & ML Basics
100% (1)
Python Data Science: Pandas & ML Basics
41 pages
Fundamental - Python
No ratings yet
Fundamental - Python
3 pages
Justenoughpython Pandas 220915 175329
No ratings yet
Justenoughpython Pandas 220915 175329
64 pages
File Ip
No ratings yet
File Ip
22 pages
Python Cheat Sheet For Excel Users
100% (2)
Python Cheat Sheet For Excel Users
5 pages
Data Manipulation With Pandas
No ratings yet
Data Manipulation With Pandas
39 pages
Cheat Sheet Pandas
No ratings yet
Cheat Sheet Pandas
4 pages
Exp3 Python
No ratings yet
Exp3 Python
15 pages
Pandas
No ratings yet
Pandas
20 pages
The Data Science Process
100% (1)
The Data Science Process
53 pages
Python Workshops: Data Analysis & Visualization
No ratings yet
Python Workshops: Data Analysis & Visualization
43 pages
Python For Data Science 1662157639
No ratings yet
Python For Data Science 1662157639
6 pages
Cheat Sheet
No ratings yet
Cheat Sheet
12 pages
Practice Questions2
No ratings yet
Practice Questions2
2 pages
WEBINTEL GUIDED LAB ACTIVITY Introduction To Pandas
No ratings yet
WEBINTEL GUIDED LAB ACTIVITY Introduction To Pandas
1 page
Pandas - Cheatsheet
No ratings yet
Pandas - Cheatsheet
4 pages
Pandas
No ratings yet
Pandas
25 pages
Edp 3
No ratings yet
Edp 3
16 pages
Cheat Python
No ratings yet
Cheat Python
8 pages
Numpy Boolean Indexing: Filter
No ratings yet
Numpy Boolean Indexing: Filter
39 pages
Pandas
No ratings yet
Pandas
13 pages
Python Finance & Trading Guide
No ratings yet
Python Finance & Trading Guide
11 pages
Pandas Cheat Sheet Free Resources At: Dataquest - Io/guide
No ratings yet
Pandas Cheat Sheet Free Resources At: Dataquest - Io/guide
7 pages
Python Pandas Data Manipulation Guide
No ratings yet
Python Pandas Data Manipulation Guide
11 pages
100 Pandas Puzzles
No ratings yet
100 Pandas Puzzles
20 pages
External
No ratings yet
External
11 pages
Python Pandas Practical Examples
No ratings yet
Python Pandas Practical Examples
15 pages
Pandas Commands
No ratings yet
Pandas Commands
3 pages
Pandas & PyNumS Essentials
No ratings yet
Pandas & PyNumS Essentials
10 pages
Data Science Cheat Sheet: KEY Imports
100% (1)
Data Science Cheat Sheet: KEY Imports
1 page
Pandas Cheat Sheet for Data Manipulation
No ratings yet
Pandas Cheat Sheet for Data Manipulation
1 page
Xii Ip Practical List 2022-23-1
No ratings yet
Xii Ip Practical List 2022-23-1
23 pages
Pandas
No ratings yet
Pandas
9 pages
Python Data Science Toolbox
No ratings yet
Python Data Science Toolbox
14 pages
Python Basics: Variables and Lists
No ratings yet
Python Basics: Variables and Lists
13 pages
Python Data Science Toolbox Guide
No ratings yet
Python Data Science Toolbox Guide
16 pages
Pandas
No ratings yet
Pandas
9 pages
Python Iterators & Iterables Guide
No ratings yet
Python Iterators & Iterables Guide
14 pages
Python Data Science Toolbox Overview
No ratings yet
Python Data Science Toolbox Overview
17 pages
NYL - Midterm Review
No ratings yet
NYL - Midterm Review
62 pages
Introduction To Python
No ratings yet
Introduction To Python
14 pages
Chap 04
100% (1)
Chap 04
72 pages
Chap 02
No ratings yet
Chap 02
51 pages
Chap 06
No ratings yet
Chap 06
52 pages
10 2015 Anal1 ch2 Par2
No ratings yet
10 2015 Anal1 ch2 Par2
94 pages
Chap 01
No ratings yet
Chap 01
37 pages
Analysis 1 Midterm (Update)
No ratings yet
Analysis 1 Midterm (Update)
11 pages
Pdf24 Merged
No ratings yet
Pdf24 Merged
132 pages
Lecture 12 - Chapter 17 - Oligopoly
No ratings yet
Lecture 12 - Chapter 17 - Oligopoly
35 pages
Assignement 3 (Ateebtahir)
No ratings yet
Assignement 3 (Ateebtahir)
5 pages
Solo Parenting Challenges in Simuay
No ratings yet
Solo Parenting Challenges in Simuay
46 pages
Botany Book Names
No ratings yet
Botany Book Names
4 pages
Echolalia Autism Asd
100% (1)
Echolalia Autism Asd
39 pages
Refrigerant Study for Engineers
No ratings yet
Refrigerant Study for Engineers
3 pages
Experiment-1 +Solutions+Preparation
No ratings yet
Experiment-1 +Solutions+Preparation
9 pages
Tower Crane Pile Foundation Design Calculations
100% (1)
Tower Crane Pile Foundation Design Calculations
10 pages
Organic Food Benefits for Mumbai Residents
No ratings yet
Organic Food Benefits for Mumbai Residents
32 pages
Brochure - Valmet Hydrocyclone TRP 90
No ratings yet
Brochure - Valmet Hydrocyclone TRP 90
4 pages
Nargis Latif Report
No ratings yet
Nargis Latif Report
3 pages
Thesis Framework Guidance and Examples
100% (3)
Thesis Framework Guidance and Examples
10 pages
FRP Flange Design
No ratings yet
FRP Flange Design
29 pages
Grammar Ebook - IELTS With Meghna - 250815 - 182910
No ratings yet
Grammar Ebook - IELTS With Meghna - 250815 - 182910
21 pages
PC Class Xii All Elps
No ratings yet
PC Class Xii All Elps
61 pages
Sutuiltuta: $ (Rrffi
No ratings yet
Sutuiltuta: $ (Rrffi
228 pages
Thompson Et Al 2022 Taking Perspective On Attachment Theory and Research Nine Fundamental Questions
No ratings yet
Thompson Et Al 2022 Taking Perspective On Attachment Theory and Research Nine Fundamental Questions
19 pages
Authorized Hacker Techniques Tools and Incident Handling 3rd Edition Ebook and TestBank Bundle
No ratings yet
Authorized Hacker Techniques Tools and Incident Handling 3rd Edition Ebook and TestBank Bundle
326 pages
Q4 - Sci-DLP - Phases of The Moon
100% (3)
Q4 - Sci-DLP - Phases of The Moon
8 pages
StudentStudymet703756 2
No ratings yet
StudentStudymet703756 2
11 pages
Joseph Pientka Briefing Document
75% (4)
Joseph Pientka Briefing Document
7 pages
JTY GD A30E Addressable Smoke Detector
No ratings yet
JTY GD A30E Addressable Smoke Detector
5 pages
First Choice歡樂大集合醫學 3
No ratings yet
First Choice歡樂大集合醫學 3
164 pages
Self-Compassion Scale Assessment
No ratings yet
Self-Compassion Scale Assessment
3 pages
10.1201 9781003390848 Previewpdf
No ratings yet
10.1201 9781003390848 Previewpdf
70 pages
Year 5 Operations Assessment Sheet
No ratings yet
Year 5 Operations Assessment Sheet
6 pages
Presentasi Car Kelompok 1
No ratings yet
Presentasi Car Kelompok 1
14 pages
Rock Classification for Engineers
No ratings yet
Rock Classification for Engineers
78 pages
Platelet Diluting Fluid He892 PDF
No ratings yet
Platelet Diluting Fluid He892 PDF
1 page
15.modular Electric Vehicle Platforms
No ratings yet
15.modular Electric Vehicle Platforms
13 pages
Adult Lab Values Cheat Sheet
No ratings yet
Adult Lab Values Cheat Sheet
3 pages

Numpy

Uploaded by

Numpy

Uploaded by

DATA MANIPULATION WITH PANDAS homelessness_ind = homelessness.

Adding new columns

# A custom IQR functiondef iqr(column):

mean_sales_by_type = sales.pivot_table(values="weekly_sales", index="type") # Look at temperatures

# Print mean_sales_by_type print(temperatures)

print(mean_sales_by_type) # Set the index of temperatures to city

# Import NumPy as npimport numpy as np temperatures_ind = temperatures.set_index("city")

mean_med_sales_by_type = sales.pivot_table("weekly_sales", "type", aggfunc = print(temperatures_ind)

temperatures_srt = temperatures_ind.sort_index() print(temperatures_ind.loc["2010-08":"2011-02"])

# Subset rows from Pakistan to Russia

print(temperatures_srt.loc[("India", "Hyderabad"):("Iraq", "Baghdad")]) print(temperatures.iloc[:5, 2:4])

# Subset columns from date to avg_temp_c

mean_temp_by_year = temp_by_country_city_vs_year.mean() nb_sold_by_date.plot(kind="line")

print(mean_temp_by_year[mean_temp_by_year == mean_temp_by_year.max()]) plt.show()

# Get the mean temp by city

Price of conventional vs. organic avocados Finding missing values

# Histogram of conventional avg_price # edited/added

You might also like