0% found this document useful (0 votes)

62 views9 pages

Numpy

Uploaded by

Anh Thư Trần Võ

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

62 views9 pages

Numpy

Uploaded by

Anh Thư Trần Võ

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 9

DATA MANIPULATION WITH PANDAS homelessness_ind = homelessness.

sort_values("individuals")
1) Inspecting a DataFrame # Print the top few rows
print(homelessness_ind.head())
# edited/added
# Sort homelessness by descending family members
import pandas as pd
homelessness_fam = homelessness.sort_values("family_members",
homelessness = pd.read_csv('homelessness.csv', index_col=0)
ascending=False)
# Print the head of the homelessness data
# Print the top few rows
print(homelessness.head())
print(homelessness_fam.head())
# Print information about homelessness
# Sort homelessness by region, then descending family members
print(homelessness.info())
homelessness_reg_fam = homelessness.sort_values(["region", "family_members"],
# Print the shape of homelessness ascending=[True, False])
print(homelessness.shape) # Print the top few rows
# Print a description of homelessness print(homelessness_reg_fam.head())
print(homelessness.describe())
4) Subsetting columns
2) Parts of a DataFrame
# Select the individuals column
# Import pandas using the alias pd individuals = homelessness["individuals"]
import pandas as pd # Print the head of the result
# Print the values of homelessness print(individuals.head())
print(homelessness.values) # Select the state and family_members columns
# Print the column index of homelessness state_fam = homelessness[["state", "family_members"]]
print(homelessness.columns) # Print the head of the result
# Print the row index of homelessness print(state_fam.head())
print(homelessness.index) # Select only the individuals and state columns, in that order
ind_state = homelessness[["individuals", "state"]]
3) Sorting rows
# Print the head of the result
# Sort homelessness by individuals print(ind_state.head())
Subsetting rows # Add total col as sum of individuals and family_members
# Filter for rows where individuals is greater than 10000 homelessness["total"] = homelessness["individuals"] +
homelessness["family_members"]
ind_gt_10k = homelessness[homelessness["individuals"] > 10000]
# Add p_individuals col as proportion of total that are individuals
# See the result
homelessness["p_individuals"] = homelessness["individuals"] /
print(ind_gt_10k)
homelessness["total"]
# Filter for rows where region is Mountain
# See the result
mountain_reg = homelessness[homelessness["region"] == "Mountain"]
print(homelessness)
# See the result
print(mountain_reg) Combo-attack!
# Filter for rows where family_members is less than 1000 # and region is Pacific # Create indiv_per_10k col as homeless individuals per 10k state pop
fam_lt_1k_pac = homelessness[(homelessness["family_members"] < 1000) & homelessness["indiv_per_10k"] = 10000 * homelessness["individuals"] /
(homelessness["region"] == "Pacific")] homelessness["state_pop"]
# See the result # Subset rows for indiv_per_10k greater than 20
print(fam_lt_1k_pac) high_homelessness = homelessness[homelessness["indiv_per_10k"] > 20]
# Sort high_homelessness by descending indiv_per_10k
Subsetting rows by categorical variables
high_homelessness_srt = high_homelessness.sort_values("indiv_per_10k",
# Subset for rows in South Atlantic or Mid-Atlantic regions ascending=False)
south_mid_atlantic = homelessness[(homelessness["region"] == "South Atlantic") | # From high_homelessness_srt, select the state and indiv_per_10k cols
(homelessness["region"] == "Mid-Atlantic")]
result = high_homelessness_srt[["state", "indiv_per_10k"]]
# See the result
# See the result
print(south_mid_atlantic)
print(result)
# The Mojave Desert states
canu = ["California", "Arizona", "Nevada", "Utah"] Mean and median
# Filter for rows in the Mojave Desert states # edited/added
mojave_homelessness = homelessness[homelessness["state"].isin(canu)] sales = pd.read_csv('sales_subset.csv', index_col=0)
# See the result # Print the head of the sales DataFrame
print(mojave_homelessness) print(sales.head())

Adding new columns

# Print the info about the sales DataFrame print(sales[["temperature_c", "fuel_price_usd_per_l", "unemployment"]].agg([iqr,
np.median]))
print(sales.info())
# Print the mean of weekly_sales Cumulative statistics
print(sales["weekly_sales"].mean())
# edited/added
# Print the median of weekly_sales
sales_1_1 = sales[(sales["department"] == 1) & (sales["store"] == 1)]
print(sales["weekly_sales"].median())
# Sort sales_1_1 by date
Summarizing dates sales_1_1 = sales_1_1.sort_values("date")
# Get the cumulative sum of weekly_sales, add as cum_weekly_sales col
# Print the maximum of the date column
sales_1_1["cum_weekly_sales"] = sales_1_1["weekly_sales"].cumsum()
print(sales["date"].max())
# Get the cumulative max of weekly_sales, add as cum_max_sales col
# Print the minimum of the date column
sales_1_1["cum_max_sales"] = sales_1_1["weekly_sales"].cummax()
print(sales["date"].min())
# See the columns you calculated
Efficient summaries print(sales_1_1[["date", "weekly_sales", "cum_weekly_sales", "cum_max_sales"]])

# A custom IQR functiondef iqr(column):

Dropping duplicates
return column.quantile(0.75) - column.quantile(0.25)
# Print IQR of the temperature_c column  Remove rows of sales with duplicate pairs of store and type and save
as store_types and print the head.
print(sales["temperature_c"].agg(iqr))  Remove rows of sales with duplicate pairs of store and department and save
# A custom IQR functiondef iqr(column): as store_depts and print the head.
return column.quantile(0.75) - column.quantile(0.25)
# Drop duplicate store/type combinations
# Update to print IQR of temperature_c, fuel_price_usd_per_l, & unemployment
store_types = sales.drop_duplicates(subset=["store", "type"])
print(sales[["temperature_c", "fuel_price_usd_per_l", "unemployment"]].agg(iqr))
print(store_types.head())
# Import NumPy and create custom IQR functionimport numpy as npdef
iqr(column): # Drop duplicate store/department combinations
return column.quantile(0.75) - column.quantile(0.25) store_depts = sales.drop_duplicates(subset=["store", "department"])
# Update to print IQR and median of temperature_c, fuel_price_usd_per_l, & print(store_depts.head())
unemployment # Subset the rows where is_holiday is True and drop duplicate dates
holiday_dates = sales[sales["is_holiday"]].drop_duplicates(subset="date") sales_C = sales[sales["type"] == "C"]["weekly_sales"].sum()
# Print date col of holiday_dates # Get proportion for each type
print(holiday_dates["date"]) sales_propn_by_type = [sales_A, sales_B, sales_C] / sales_all
print(sales_propn_by_type)
Counting categorical variables
Calculations with .groupby()
# Count the number of stores of each type
store_counts = store_types["type"].value_counts() # Group by type; calc total weekly sales
print(store_counts) sales_by_type = sales.groupby("type")["weekly_sales"].sum()
# Get the proportion of stores of each type # Get proportion for each type
store_props = store_types["type"].value_counts(normalize=True) sales_propn_by_type = sales_by_type / sum(sales_by_type)
print(store_props) print(sales_propn_by_type)
# Count the number of each department number and sort # Group by type and is_holiday; calc total weekly sales
dept_counts_sorted = store_depts["department"].value_counts(sort=True) sales_by_type_is_holiday = sales.groupby(["type", "is_holiday"])
["weekly_sales"].sum()
print(dept_counts_sorted)
print(sales_by_type_is_holiday)
# Get the proportion of departments of each number and sort
dept_props_sorted = store_depts["department"].value_counts(sort=True, Multiple grouped summaries
normalize=True)
print(dept_props_sorted) # Import numpy with the alias npimport numpy as np
# For each store type, aggregate weekly_sales: get min, max, mean, and median
What percent of sales occurred at each store type? sales_stats = sales.groupby("type")["weekly_sales"].agg([np.min, np.max, np.mean,
np.median])
# Calc total weekly sales
# Print sales_stats
sales_all = sales["weekly_sales"].sum()
print(sales_stats)
# Subset for type A stores, calc total weekly sales
# For each store type, aggregate unemployment and fuel_price_usd_per_l: get min,
sales_A = sales[sales["type"] == "A"]["weekly_sales"].sum()
max, mean, and median
# Subset for type B stores, calc total weekly sales
unemp_fuel_stats = sales.groupby("type")[["unemployment",
sales_B = sales[sales["type"] == "B"]["weekly_sales"].sum() "fuel_price_usd_per_l"]].agg([np.min, np.max, np.mean, np.median])
# Subset for type C stores, calc total weekly sales # Print unemp_fuel_stats
print(unemp_fuel_stats) # edited/added
temperatures = pd.read_csv('temperatures.csv', index_col=0)
Pivoting on one variable
temperatures['date'] = pd.to_datetime(temperatures['date'],
# Pivot for mean weekly_sales for each store type infer_datetime_format=True)

mean_sales_by_type = sales.pivot_table(values="weekly_sales", index="type") # Look at temperatures

# Print mean_sales_by_type print(temperatures)

print(mean_sales_by_type) # Set the index of temperatures to city

# Import NumPy as npimport numpy as np temperatures_ind = temperatures.set_index("city")

# Pivot for mean and median weekly_sales for each store type # Look at temperatures_ind

mean_med_sales_by_type = sales.pivot_table("weekly_sales", "type", aggfunc = print(temperatures_ind)

[np.mean, np.median]) # Reset the temperatures_ind index, keeping its contents
# Print mean_med_sales_by_type print(temperatures_ind.reset_index())
print(mean_med_sales_by_type) # Reset the temperatures_ind index, dropping its contents
# Pivot for mean weekly_sales by store type and holiday print(temperatures_ind.reset_index(drop=True))
mean_sales_by_type_holiday = sales.pivot_table("weekly_sales", "type",
"is_holiday") Subsetting with .loc[]
# Print mean_sales_by_type_holiday
# Make a list of cities to subset on
print(mean_sales_by_type_holiday)
cities = ["Moscow", "Saint Petersburg"]
# Subset temperatures using square brackets
Fill in missing values and sum values with pivot tables
print(temperatures[temperatures["city"].isin(cities)])
# Print mean weekly_sales by department and type; fill missing values with 0
# Subset temperatures_ind using .loc[]
print(sales.pivot_table(values="weekly_sales", index="department",
columns="type", fill_value=0)) print(temperatures_ind.loc[cities])

# Print the mean weekly_sales by department and type; fill missing values with 0s;
Setting multi-level indexes
sum all rows and cols
print(sales.pivot_table(values="weekly_sales", index="department", # Index temperatures by country & city
columns="type", fill_value=0, margins =True)) temperatures_ind = temperatures.set_index(["country", "city"])
# List of tuples: Brazil, Rio De Janeiro & Pakistan, Lahore
Setting and removing indexes
rows_to_keep = [("Brazil", "Rio De Janeiro"), ("Pakistan", "Lahore")] print(temperatures_srt.loc[:, "date":"avg_temp_c"])
# Subset for rows to keep # Subset in both directions at once
print(temperatures_ind.loc[rows_to_keep]) print(temperatures_srt.loc[("India", "Hyderabad"):("Iraq", "Baghdad"),
"date":"avg_temp_c"])
Sorting by index values
Slicing time series
# Sort temperatures_ind by index values
# Use Boolean conditions to subset temperatures for rows in 2010 and 2011
print(temperatures_ind.sort_index())
temperatures_bool = temperatures[(temperatures["date"] >= "2010-01-01") &
# Sort temperatures_ind by index values at the city level
(temperatures["date"] <= "2011-12-31")]
print(temperatures_ind.sort_index(level="city"))
print(temperatures_bool)
# Sort temperatures_ind by country then descending city
# Set date as the index and sort the index
print(temperatures_ind.sort_index(level=["country", "city"], ascending = [True,
temperatures_ind = temperatures.set_index("date").sort_index()
False]))
# Use .loc[] to subset temperatures_ind for rows in 2010 and 2011
Slicing index values print(temperatures_ind.loc["2010":"2011"])

# Sort the index of temperatures_ind # Use .loc[] to subset temperatures_ind for rows from Aug 2010 to Feb 2011

temperatures_srt = temperatures_ind.sort_index() print(temperatures_ind.loc["2010-08":"2011-02"])

# Subset rows from Pakistan to Russia

Subsetting by row/column number
print(temperatures_srt.loc["Pakistan":"Russia"])
# Get 23rd row, 2nd column (index 22, 1)
# Try to subset rows from Lahore to Moscow
print(temperatures.iloc[22, 1])
print(temperatures_srt.loc["Lahore":"Moscow"])
# Use slicing to get the first 5 rows
# Subset rows from Pakistan, Lahore to Russia, Moscow
print(temperatures.iloc[:5])
print(temperatures_srt.loc[("Pakistan", "Lahore"):("Russia", "Moscow")])
# Use slicing to get columns 3 to 4
Slicing in both directions print(temperatures.iloc[:, 2:4])

# Subset rows from India, Hyderabad to Iraq, Baghdad # Use slicing in both directions at once

print(temperatures_srt.loc[("India", "Hyderabad"):("Iraq", "Baghdad")]) print(temperatures.iloc[:5, 2:4])

# Subset columns from date to avg_temp_c

Pivot temperature by city and year
# Add a year column to temperatures # edited/added
temperatures["year"] = temperatures["date"].dt.year import urllib.requestimport pickle
# Pivot avg_temp_c by country and city vs year avocados = pd.read_pickle("avoplotto.pkl")
temp_by_country_city_vs_year = temperatures.pivot_table("avg_temp_c", index = # Import matplotlib.pyplot with alias pltimport matplotlib.pyplot as plt
["country", "city"], columns = "year")
# Look at the first few rows of data
# See the result
print(avocados.head())
print(temp_by_country_city_vs_year)
# Get the total number of avocados sold of each size
nb_sold_by_size = avocados.groupby("size")["nb_sold"].sum()
Subsetting pivot tables
# Create a bar plot of the number of avocados sold by size
# Subset for Egypt to India
nb_sold_by_size.plot(kind="bar")
temp_by_country_city_vs_year.loc["Egypt":"India"]
# Show the plot
# Subset for Egypt, Cairo to India, Delhi
plt.show()
temp_by_country_city_vs_year.loc[("Egypt", "Cairo"):("India", "Delhi")]
# Subset for Egypt, Cairo to India, Delhi, and 2005 to 2010 Changes in sales over time
temp_by_country_city_vs_year.loc[("Egypt", "Cairo"):("India", "Delhi"),
# Import matplotlib.pyplot with alias pltimport matplotlib.pyplot as plt
"2005":"2010"]
# Get the total number of avocados sold on each date
Calculating on a pivot table nb_sold_by_date = avocados.groupby("date")["nb_sold"].sum()

# Get the worldwide mean temp by year # Create a line plot of the number of avocados sold by date

mean_temp_by_year = temp_by_country_city_vs_year.mean() nb_sold_by_date.plot(kind="line")

# Filter for the year that had the highest mean temp # Show the plot

print(mean_temp_by_year[mean_temp_by_year == mean_temp_by_year.max()]) plt.show()

# Get the mean temp by city

Avocado supply and demand
mean_temp_by_city = temp_by_country_city_vs_year.mean(axis="columns")
# Scatter plot of avg_price vs. nb_sold with title
# Filter for the city that had the lowest mean temp
avocados.plot(x="nb_sold", y="avg_price", kind="scatter", title="Number of
print(mean_temp_by_city[mean_temp_by_city == mean_temp_by_city.min()])
avocados sold vs. average price")
# Show the plot
Which avocado size is most popular?
plt.show() plt.show()

Price of conventional vs. organic avocados Finding missing values

# Histogram of conventional avg_price # edited/added

avocados[avocados["type"] == "conventional"]["avg_price"].hist() avocados_2016 = pd.read_csv('avocados_2016.csv')
# Histogram of organic avg_price cols_with_missing = ['small_sold', 'large_sold', 'xl_sold']
avocados[avocados["type"] == "organic"]["avg_price"].hist() # Import matplotlib.pyplot with alias pltimport matplotlib.pyplot as plt
# Add a legend # Check individual values for missing values
plt.legend(["conventional", "organic"]) print(avocados_2016.isna())
# Show the plot # Check each column for missing values
plt.show() print(avocados_2016.isna().any())
# Modify histogram transparency to 0.5 # Bar plot of missing values by variable
avocados[avocados["type"] == "conventional"]["avg_price"].hist(alpha=0.5) avocados_2016.isna().sum().plot(kind="bar")
# Modify histogram transparency to 0.5 # Show plot
avocados[avocados["type"] == "organic"]["avg_price"].hist(alpha=0.5) plt.show()
# Add a legend
Removing missing values
plt.legend(["conventional", "organic"])
# Show the plot # Remove rows with missing values
plt.show() avocados_complete = avocados_2016.dropna()
# Modify bins to 20 # Check if any columns contain missing values
avocados[avocados["type"] == "conventional"]["avg_price"].hist(bins=20, print(avocados_complete.isna().any())
alpha=0.5)
# Modify bins to 20 Replacing missing values
avocados[avocados["type"] == "organic"]["avg_price"].hist(bins= 20, alpha=0.5) # List the columns with missing values
# Add a legend cols_with_missing = ["small_sold", "large_sold", "xl_sold"]
plt.legend(["conventional", "organic"]) # Create histograms showing the distributions cols_with_missing
# Show the plot avocados_2016[cols_with_missing].hist()
# Show the plot # Read CSV as DataFrame called airline_bumping
plt.show() airline_bumping = pd.read_csv('airline_bumping.csv') # edited/added
# Take a look at the DataFrame
List of dictionaries
print(airline_bumping.head())
# Create a list of dictionaries with new data # For each airline, select nb_bumped and total_passengers and sum
avocados_list = [ airline_totals = airline_bumping.groupby("airline")[["nb_bumped",
{"date": "2019-11-03", "small_sold": 10376832, "large_sold": 7835071}, "total_passengers"]].sum()

{"date": "2019-11-10", "small_sold": 10717154, "large_sold": 8561348}, # Create new col, bumps_per_10k: no. of bumps per 10k passengers for each airline

] airline_totals["bumps_per_10k"] = airline_totals["nb_bumped"] /
airline_totals["total_passengers"] * 10000
# Convert list into DataFrame
# Print airline_totals
avocados_2019 = pd.DataFrame(avocados_list)
print(airline_totals)
# Print the new DataFrame
# Print airline_totals
print(avocados_2019)
print(airline_totals)
Dictionary of lists
DataFrame to CSV
# Create a dictionary of lists with new data
# Create airline_totals_sorted
avocados_dict = {
airline_totals_sorted = airline_totals.sort_values("bumps_per_10k",
"date": ["2019-11-17", "2019-12-01"], ascending=False)
"small_sold": [10859987, 9291631], # Print airline_totals_sorted
"large_sold": [7674135, 6238096] print(airline_totals_sorted)
} # Save as airline_totals_sorted.csv
# Convert dictionary into DataFrame airline_totals_sorted.to_csv("airline_totals_sorted.csv")
avocados_2019 = pd.DataFrame(avocados_dict)
# Print the new DataFrame
print(avocados_2019)

CSV to DataFrame

Python Cheatsheet
No ratings yet
Python Cheatsheet
2 pages
Python & Pandas Cheat Sheet Guide
100% (2)
Python & Pandas Cheat Sheet Guide
5 pages
Pandas Cheat Sheet for Data Manipulation
No ratings yet
Pandas Cheat Sheet for Data Manipulation
1 page
Disabled Toilet Alarm Setup Guide
100% (1)
Disabled Toilet Alarm Setup Guide
3 pages
Pandas For Python Pro Level Cheat Sheet
No ratings yet
Pandas For Python Pro Level Cheat Sheet
14 pages
Goldenmorning Electronic: Product Name: Rgs15128128Wr000 Product No.: Gme128128-01
No ratings yet
Goldenmorning Electronic: Product Name: Rgs15128128Wr000 Product No.: Gme128128-01
34 pages
Data Science Cheat Sheet: KEY Imports
100% (1)
Data Science Cheat Sheet: KEY Imports
1 page
Terror Casualty Attack
No ratings yet
Terror Casualty Attack
6 pages
Pandas Notes
No ratings yet
Pandas Notes
4 pages
T.ms6586.u705 + 25-DB5414-X2P1 Shg6002c-173e Lc-60ui9362e
100% (1)
T.ms6586.u705 + 25-DB5414-X2P1 Shg6002c-173e Lc-60ui9362e
54 pages
The Pixar Story
No ratings yet
The Pixar Story
5 pages
Python Pandas: 12 Data Manipulation Techniques
100% (2)
Python Pandas: 12 Data Manipulation Techniques
19 pages
Python Data Science 101
100% (1)
Python Data Science 101
41 pages
Python Interviews
No ratings yet
Python Interviews
154 pages
Pandas for Data Analysis Enthusiasts
No ratings yet
Pandas for Data Analysis Enthusiasts
9 pages
Project Intern - Jupyter Notebook
No ratings yet
Project Intern - Jupyter Notebook
16 pages
Pandas
No ratings yet
Pandas
5 pages
Data Manipulation With Pandas - Yulei's Sandbox
No ratings yet
Data Manipulation With Pandas - Yulei's Sandbox
18 pages
Practical - With Solution - XII - IP
No ratings yet
Practical - With Solution - XII - IP
13 pages
S1-K12 Laser Service Manual
No ratings yet
S1-K12 Laser Service Manual
10 pages
Essential R Commands Guide
No ratings yet
Essential R Commands Guide
11 pages
Python Finance & Trading Guide
No ratings yet
Python Finance & Trading Guide
11 pages
External
No ratings yet
External
11 pages
The Data Science Process
100% (1)
The Data Science Process
53 pages
Practice Questions2
No ratings yet
Practice Questions2
2 pages
Engineering Mathematics
100% (1)
Engineering Mathematics
14 pages
Chap 06
No ratings yet
Chap 06
52 pages
Experiment No 11
No ratings yet
Experiment No 11
19 pages
WEBINTEL GUIDED LAB ACTIVITY Introduction To Pandas
No ratings yet
WEBINTEL GUIDED LAB ACTIVITY Introduction To Pandas
1 page
IARPA Cyber-Attack Automated Unconventional Sensor Environment (CAUSE)
No ratings yet
IARPA Cyber-Attack Automated Unconventional Sensor Environment (CAUSE)
93 pages
Cheat Sheet Pandas
No ratings yet
Cheat Sheet Pandas
4 pages
Cheat Sheet
No ratings yet
Cheat Sheet
12 pages
12 IP Practial Programs 2025-26
No ratings yet
12 IP Practial Programs 2025-26
10 pages
Data Cheat Sheet
No ratings yet
Data Cheat Sheet
2 pages
Pandas Syntax Revision For ML
No ratings yet
Pandas Syntax Revision For ML
10 pages
File Ip
No ratings yet
File Ip
22 pages
Group 3 Report
No ratings yet
Group 3 Report
66 pages
Chap 04
100% (1)
Chap 04
72 pages
Pandas
No ratings yet
Pandas
13 pages
Lab Record IP
No ratings yet
Lab Record IP
13 pages
100 Pandas Puzzles
No ratings yet
100 Pandas Puzzles
20 pages
Python Workshops: Data Analysis & Visualization
No ratings yet
Python Workshops: Data Analysis & Visualization
43 pages
Pandas
No ratings yet
Pandas
20 pages
Fundamental - Python
No ratings yet
Fundamental - Python
3 pages
Analysis 1 Midterm (Update)
No ratings yet
Analysis 1 Midterm (Update)
11 pages
Data Manipulation With Pandas
No ratings yet
Data Manipulation With Pandas
39 pages
Pandas - Cheatsheet
No ratings yet
Pandas - Cheatsheet
4 pages
Pandas PDF
No ratings yet
Pandas PDF
6 pages
Commands SQL, Python (BASICS)
No ratings yet
Commands SQL, Python (BASICS)
7 pages
Python For Data Science 1662157639
No ratings yet
Python For Data Science 1662157639
6 pages
Untitled 5
No ratings yet
Untitled 5
10 pages
Pandas Cheat Sheet Free Resources At: Dataquest - Io/guide
No ratings yet
Pandas Cheat Sheet Free Resources At: Dataquest - Io/guide
7 pages
Data Analysis Tools
No ratings yet
Data Analysis Tools
26 pages
Content Pandas Cheat Sheet
No ratings yet
Content Pandas Cheat Sheet
9 pages
Pandas Commands
No ratings yet
Pandas Commands
3 pages
Exp3 Python
No ratings yet
Exp3 Python
15 pages
Justenoughpython Pandas 220915 175329
No ratings yet
Justenoughpython Pandas 220915 175329
64 pages
Edp 3
No ratings yet
Edp 3
16 pages
Numpy Boolean Indexing: Filter
No ratings yet
Numpy Boolean Indexing: Filter
39 pages
Pandas, Numpy, Matplotlib
No ratings yet
Pandas, Numpy, Matplotlib
11 pages
Real Estate
No ratings yet
Real Estate
10 pages
Cheat Python
No ratings yet
Cheat Python
8 pages
Chap 01
No ratings yet
Chap 01
37 pages
Latitude 3350 14291 - Loveland - Skl-U - A00 - 0918
No ratings yet
Latitude 3350 14291 - Loveland - Skl-U - A00 - 0918
105 pages
Pandas
No ratings yet
Pandas
25 pages
Pandas & PyNumS Essentials
No ratings yet
Pandas & PyNumS Essentials
10 pages
Xii Ip Practical List 2022-23-1
No ratings yet
Xii Ip Practical List 2022-23-1
23 pages
Pdf24 Merged
No ratings yet
Pdf24 Merged
132 pages
Class XII-IP-Practical File 1
No ratings yet
Class XII-IP-Practical File 1
28 pages
Programs of Python Pandas
No ratings yet
Programs of Python Pandas
15 pages
Lab 2 - Behavioral Level, RTL, and Gate Level Design
No ratings yet
Lab 2 - Behavioral Level, RTL, and Gate Level Design
3 pages
Chord Implementation Using RMI
0% (1)
Chord Implementation Using RMI
8 pages
Creation of Series Using List, Dictionary & Ndarray
No ratings yet
Creation of Series Using List, Dictionary & Ndarray
65 pages
Muhammad Danish Afif Bin Rosman Resume As of Aug 2022
No ratings yet
Muhammad Danish Afif Bin Rosman Resume As of Aug 2022
1 page
E-Guard: Home Security for Cairo
No ratings yet
E-Guard: Home Security for Cairo
23 pages
10 2015 Anal1 ch2 Par2
No ratings yet
10 2015 Anal1 ch2 Par2
94 pages
Chap 02
No ratings yet
Chap 02
51 pages
Daniel B. Botkin - Forest Dynamics - An Ecological Model (1993) PDF
No ratings yet
Daniel B. Botkin - Forest Dynamics - An Ecological Model (1993) PDF
326 pages
Corp Internet Banking FAQs
No ratings yet
Corp Internet Banking FAQs
2 pages
Resume Limpia Banerjee
No ratings yet
Resume Limpia Banerjee
3 pages
NYL - Midterm Review
No ratings yet
NYL - Midterm Review
62 pages
77 9097
No ratings yet
77 9097
75 pages
Lecture 12 - Chapter 17 - Oligopoly
No ratings yet
Lecture 12 - Chapter 17 - Oligopoly
35 pages
Harnessing The Reasoning Economy A Survey of Efficient Reasoning For Large Language Models
No ratings yet
Harnessing The Reasoning Economy A Survey of Efficient Reasoning For Large Language Models
24 pages
Now and Get: Best VTU Student Companion You Can Get
No ratings yet
Now and Get: Best VTU Student Companion You Can Get
5 pages
Power Supply Unit Ps-203-60A: Unicont SPB LTD
No ratings yet
Power Supply Unit Ps-203-60A: Unicont SPB LTD
7 pages
Introduction To Python
No ratings yet
Introduction To Python
14 pages
Computer Hardware Price List
No ratings yet
Computer Hardware Price List
7 pages
Block Retráctil
No ratings yet
Block Retráctil
1 page
Gamayas Portfolio
No ratings yet
Gamayas Portfolio
17 pages
Python Data Science Toolbox
No ratings yet
Python Data Science Toolbox
17 pages
Python Data Science Toolbox Guide
No ratings yet
Python Data Science Toolbox Guide
16 pages
Python Data Science Toolbox
No ratings yet
Python Data Science Toolbox
14 pages
Python Iterators & Iterables Guide
No ratings yet
Python Iterators & Iterables Guide
14 pages
Python Basics: Variables and Lists
No ratings yet
Python Basics: Variables and Lists
13 pages
Esther Joy. M: Resume
No ratings yet
Esther Joy. M: Resume
7 pages
Pandas
No ratings yet
Pandas
9 pages
Pandas
No ratings yet
Pandas
9 pages
Cybersecurity Case Studies
No ratings yet
Cybersecurity Case Studies
7 pages
Zoom Client For Meetings: Step 1: Download Zoom Download Application From The Following Link
No ratings yet
Zoom Client For Meetings: Step 1: Download Zoom Download Application From The Following Link
6 pages
EAadhaar 0648019028606520240216115645 26022024194147
No ratings yet
EAadhaar 0648019028606520240216115645 26022024194147
1 page
Maxillary Teeth Esthetic Proportions
No ratings yet
Maxillary Teeth Esthetic Proportions
5 pages
Log
No ratings yet
Log
4 pages

Numpy

Uploaded by

Numpy

Uploaded by

DATA MANIPULATION WITH PANDAS homelessness_ind = homelessness.

Adding new columns

# A custom IQR functiondef iqr(column):

mean_sales_by_type = sales.pivot_table(values="weekly_sales", index="type") # Look at temperatures

# Print mean_sales_by_type print(temperatures)

print(mean_sales_by_type) # Set the index of temperatures to city

# Import NumPy as npimport numpy as np temperatures_ind = temperatures.set_index("city")

mean_med_sales_by_type = sales.pivot_table("weekly_sales", "type", aggfunc = print(temperatures_ind)

temperatures_srt = temperatures_ind.sort_index() print(temperatures_ind.loc["2010-08":"2011-02"])

# Subset rows from Pakistan to Russia

print(temperatures_srt.loc[("India", "Hyderabad"):("Iraq", "Baghdad")]) print(temperatures.iloc[:5, 2:4])

# Subset columns from date to avg_temp_c

mean_temp_by_year = temp_by_country_city_vs_year.mean() nb_sold_by_date.plot(kind="line")

print(mean_temp_by_year[mean_temp_by_year == mean_temp_by_year.max()]) plt.show()

# Get the mean temp by city

Price of conventional vs. organic avocados Finding missing values

# Histogram of conventional avg_price # edited/added

You might also like