Siddhesh R. Kudale
# Importing Shapefile with subway station data
import geopandas as gpd
fp = "Map1/geo_export_9a6789b4-3db4-46c2-980b-3342a7a7b6cf.shp"
data = gpd.read_file(fp)
# Importing Shapefile with NYC borough boundary data
bdry = "Boundary/geo_export_927131ad-d55f-48e0-8a33-8a784b0af41a.shp"
bdata = gpd.read_file(bdry)
bdata = bdata.drop([3])
data['lon'] = data.geometry.x
data['lat'] = data.geometry.y
%matplotlib inline
# Importing station-wise ridership data (open source; retrieved and cleaned from http://web.mta.info/nyct/facts/ridership/ridership_sub_annual.htm)
import pandas as pd
df = pd.read_excel('Map1/MTA Ridership.xlsx')
# Joining both datasets to continue
joined = pd.merge(df, data, left_on="Station", right_on="name")
joined.head()
# Importing essential libraries
import numpy as np
import pandas as pd
import glob
import adjustText as aT
from pykrige.ok import OrdinaryKriging
from pykrige.kriging_tools import write_asc_grid
from shapely.geometry import Point, LineString, Polygon
import pykrige.kriging_tools as kt
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.patches import Path, PathPatch
# Interpolation
lons=np.array(joined['lon'])
lats=np.array(joined['lat'])
zdata=np.array(joined[2017])
print(zdata)
for r in range(len(zdata)):
if zdata[r]>100000:
zdata[r] = 100000
print (zdata)
# Setting bounds
xmin, ymin, xmax, ymax = bdata.total_bounds
xmin = xmin-0.01
xmax = xmax+0.01
ymin = ymin-0.01
ymax = ymax+0.01
grid_lon = np.linspace(xmin, xmax, 200)
grid_lat = np.linspace(ymin, ymax, 200)
# Spatial Kriging by linear method
OK = OrdinaryKriging(lons, lats, zdata, variogram_model='linear', verbose=True, enable_plotting=False,nlags=20)
z1, ss1 = OK.execute('grid', grid_lon, grid_lat)
print (z1)
# Importing Shapefile with census tract boundaries
fp1 = "Map2/Classmaps01.shp"
data1 = gpd.read_file(fp1)
data1 = data1.to_crs("EPSG:4326")
# Clipping tract boundaries by NYC borough boundary shapefile
clip = gpd.clip(data1, bdata)
# Making the final interpolated plot
xintrp, yintrp = np.meshgrid(grid_lon, grid_lat)
fig, ax = plt.subplots(figsize=(50,30))
boundarygeom = bdata.geometry
contour = plt.contourf(xintrp, yintrp, z1,len(z1),cmap=plt.cm.jet,alpha = 1)
plt.colorbar(contour)
bdata.plot(ax=ax, color='white', alpha = 0.4, linewidth=3.5, edgecolor='black', zorder = 40)
npts = len(lons)
plt.scatter(lons, lats, marker='.', c='black', s=npts/4)
plt.xticks(fontsize = 20, rotation=90)
plt.yticks(fontsize = 20)
plt.title('Spatial interpolation of ridership with linear kriging (%d points)' %npts, fontsize = 40)
clip.boundary.plot(ax=ax, alpha = 0.1, linewidth=0.5, edgecolor='black', zorder =40)
from shapely.geometry import Point
bdata["rep"] = bdata["geometry"].representative_point()
bdata_points = bdata.copy()
bdata_points.set_geometry("rep", inplace = True)
texts = []
for x, y, label in zip(bdata_points.geometry.x, bdata_points.geometry.y, bdata_points["boro_name"]):
texts.append(plt.text(x, y, label, fontsize = 25))
aT.adjust_text(texts, force_points=0.3, force_text=0.8, expand_points=(1,1), expand_text=(1,1),
arrowprops=dict(arrowstyle="-", color='black', lw=0.5))
plt.show()
# Importing Shapefile with census tract boundaries
fp1 = "Map2/Classmaps01.shp"
data1 = gpd.read_file(fp1)
# Importing excel with census tract IDs and ridership data for various modes
df2 = pd.read_excel('Map2/Output1.xlsx')
# Retaining non-zero values only
df2 = df2[(df2['estimate'] > 0)]
# Grouping by tract to sum all modes for each tract
total = df2.groupby('GEOID').sum('estimate')
total = total.merge(df2, on = 'GEOID')
total = total[["GEOID", "NAME","estimate_x"]]
total = total[(total['estimate_x'] < 30000)]
total.head(27000)
data1 = data1[~data1['GEOID'].isnull()]
data1['GEOID'] = data1['GEOID'].astype(str).str.replace('.', '')
data1 = data1[data1['GEOID'].str.isnumeric()]
data1[['GEOID']] = data1[['GEOID']].astype(int)
data1.to_string('GEOID')
total.to_string('GEOID')
total = total.merge(data1, on='GEOID')
from geopandas import GeoDataFrame
total = GeoDataFrame(total)
total = total.to_crs("EPSG:4326")
clipped = gpd.clip(total, bdata)
clipped.head()
# Making the final plot for tractwise trip generation
fig, ax = plt.subplots(figsize=(50,30))
clipped.plot(column='estimate_x', ax=ax, cmap='Spectral_r', legend=True)
bdata.plot(ax=ax, color='white', alpha = 0.4, linewidth=3.5, edgecolor='black', zorder = 40)
clipped.boundary.plot(ax=ax, color = 'grey', alpha = 0.2, linewidth=0.2, edgecolor='black',zorder =40)
plt.scatter(lons, lats, marker='.', c='darkred', s=npts/4)
bdata["rep"] = bdata["geometry"].representative_point()
bdata_points = bdata.copy()
bdata_points.set_geometry("rep", inplace = True)
texts = []
for x, y, label in zip(bdata_points.geometry.x, bdata_points.geometry.y, bdata_points["boro_name"]):
texts.append(plt.text(x, y, label, fontsize = 25))
aT.adjust_text(texts, force_points=0.3, force_text=0.8, expand_points=(1,1), expand_text=(1,1),
arrowprops=dict(arrowstyle="-", color='black', lw=0.5))
plt.title('Tractwise trip generation in New York CBSA', fontsize = 40)
plt.xticks(fontsize = 20, rotation=90)
plt.yticks(fontsize = 20)
clipped2 = clipped
clipped2.head()
clipped3 = clipped2.drop(columns=['STATEFP', 'COUNTYFP','TRACTCE','NAME_y','NAMELSAD','MTFCC','FUNCSTAT','ALAND','AWATER'])
clipped3.head()
# Making a plot showing centroid locations for each census tract
fig, ax = plt.subplots(figsize=(50,30))
bdata.plot(ax=ax, color='white', alpha = 0.4, linewidth=3.5, edgecolor='black', zorder = 40)
clipped.boundary.plot(ax=ax, color = 'grey', alpha = 0.2, linewidth=0.2, edgecolor='black',zorder =40)
bdata["rep"] = bdata["geometry"].representative_point()
bdata_points = bdata.copy()
bdata_points.set_geometry("rep", inplace = True)
texts = []
for x, y, label in zip(bdata_points.geometry.x, bdata_points.geometry.y, bdata_points["boro_name"]):
texts.append(plt.text(x, y, label, fontsize = 25))
clipped["rp"] = clipped.representative_point()
clipped_point = clipped.copy()
clipped_point.set_geometry("rp", inplace = True)
clipped.rp.plot(ax=ax, color = 'maroon')
plt.title('Centroids', fontsize = 40)
plt.xticks(fontsize = 20, rotation=90)
plt.yticks(fontsize = 20)
# Making a plot showing interpolated data and extraction points
xintrp, yintrp = np.meshgrid(grid_lon, grid_lat)
fig, ax = plt.subplots(figsize=(50,30))
contour = plt.contour(xintrp, yintrp, z1,len(z1),cmap=plt.cm.jet,alpha = 1)
plt.colorbar(contour)
boundarygeom = bdata.geometry
bdata.plot(ax=ax, color='white', alpha = 0.4, linewidth=3.5, edgecolor='black', zorder = 40)
clipped.rp.plot(ax=ax, color = 'black')
npts = len(lons)
plt.title('Interpolated map and centroids', fontsize = 40)
plt.xticks(fontsize = 20, rotation=90)
plt.yticks(fontsize = 20)
plt.show()
clipped3.to_excel(r'Data.xlsx', index = False, header=True)
dataf = pd.read_excel('Dataf.xlsx')
dataf.head()
# Making a scatter plot to show the difference in estimation
plt.figure(figsize=(15,8))
plt.scatter(dataf['estimate_x'],dataf['estimate_y'],alpha=0.15, color = 'pink')
plt.title("Actual versus Estimated Ridership plot")
z = np.polyfit(dataf['estimate_x'],dataf['estimate_y'], 1)
p = np.poly1d(z)
plt.plot(dataf['estimate_x'],p(dataf['estimate_x']),"r--")
plt.xlabel("Actual Trip generation from each census tract (all modes)")
plt.ylabel("Estimates from the Ridership heatmap")
plt.show