In this notebook, you will learn how to create choropleth maps using different classification methods available in the mapclassify package of the Python Spatial Analysis Library (PySAL).
The examples in this notebook are adapted from the electronic book "Geographic Data Science with Python" by Sergio Rey, Dani Arribas-Bel, and Levi Wolf, which can be found at https://geographicdata.science/book/intro.html. Specifically, we will focus on the section titled "Choropleth Mapping" under "PART II - Spatial Data Analysis."
Our goal is to explore regional income data for the 32 Mexican states used in the paper by [RSastreGutierrez10]. The variable we will be analyzing is per capita gross domestic product for 1940 (PCGDP1940).
By the end of this notebook, you should have a better understanding of how to create choropleth maps using different classification methods and be able to apply this knowledge to your own spatial data analyses.
import seaborn
import pandas as pd
import geopandas
import pysal
import numpy
import matplotlib.pyplot as plt
mx = geopandas.read_file("data/mexico/mexicojoin.shp")
mx.head()
selected = "PCGDP1940" #"%_manufacturing"
mx[["NAME", selected]]
# Plot histogram
ax = seaborn.histplot(mx[selected], bins=20)
# Add rug on horizontal axis
seaborn.rugplot(mx[selected], height=0.05, color="red", ax=ax);
selected ="PCGDP1940"
mx[selected].describe()
import mapclassify
ei5 = mapclassify.EqualInterval(mx[selected], k=5)
ei5
ax = mx.plot(
figsize=(10,10),
column=selected, # Data to plot
scheme="EqualInterval", # Classification scheme
cmap="YlGn", # Color palette
legend=True, # Add legend
)
ax.set_axis_off();
q5 = mapclassify.Quantiles(mx[selected], k=5)
q5
ax = mx.plot(
figsize=(10,10),
column=selected, # Data to plot
scheme="Quantiles", # Classification scheme
cmap="YlGn", # Color palette
legend=True, # Add legend
)
ax.set_axis_off();
msd = mapclassify.StdMean(mx[selected])
msd
ax = mx.plot(
figsize=(10,10),
column=selected, # Data to plot
scheme="StdMean", # Classification scheme
cmap="YlGn", # Color palette
legend=True, # Add legend
)
ax.set_axis_off();
mb5 = mapclassify.MaximumBreaks(mx[selected], k=5)
mb5
ax = mx.plot(
figsize=(10,10),
column=selected, # Data to plot
scheme="MaximumBreaks", # Classification scheme
cmap="YlGn", # Color palette
legend=True, # Add legend
)
ax.set_axis_off();
ht = mapclassify.HeadTailBreaks(mx[selected])
ht
ax = mx.plot(
figsize=(10,10),
column=selected, # Data to plot
scheme="HeadTailBreaks", # Classification scheme
cmap="YlGn", # Color palette
legend=True, # Add legend
)
ax.set_axis_off();
numpy.random.seed(12345)
jc5 = mapclassify.JenksCaspall(mx[selected], k=5)
jc5
ax = mx.plot(
figsize=(10,10),
column=selected, # Data to plot
scheme="JenksCaspall", # Classification scheme
cmap="YlGn", # Color palette
legend=True, # Add legend
)
ax.set_axis_off();
numpy.random.seed(12345)
fj5 = mapclassify.FisherJenks(mx[selected], k=5)
fj5
ax = mx.plot(
figsize=(10,10),
column=selected, # Data to plot
scheme="FisherJenks", # Classification scheme
cmap="YlGn", # Color palette
legend=True, # Add legend
)
ax.set_axis_off();
mp5 = mapclassify.MaxP(mx[selected], k=5)
mp5
ax = mx.plot(
figsize=(10,10),
column=selected, # Data to plot
scheme="MaxP", # Classification scheme
cmap="YlGn", # Color palette
legend=True, # Add legend
)
ax.set_axis_off();
# Bunch classifier objects
class5 = ei5, q5, msd, mb5, ht, jc5, fj5, mp5
# Collect ADCM for each classifier
fits = numpy.array([c.adcm for c in class5])
# Convert ADCM scores to a DataFrame
adcms = pd.DataFrame(fits)
# Add classifier names
adcms["classifier"] = [c.name for c in class5]
# Add column names to the ADCM
adcms.columns = ["ADCM", "Classifier"]
ax = seaborn.barplot(
y="Classifier", x="ADCM", data=adcms, palette="Pastel1"
)