PostGIS - Using SQLAlchemy, GeoAlchemy, and GeoPandas¶
PostGIS is an open source spatial database. I am using the Python SQLAlchemy library to connect to and execute spatial and non-spatial queries from this database
import sqlalchemy as db
import pandas as pd
# import geopandas as gpd
import matplotlib.pyplot as plt
%matplotlib inline
engine = db.create_engine('postgresql+psycopg2://postgres:postgres@localhost:5432/nyc')
con = engine.connect()
View some tables in the database
metadata = db.MetaData()
nyc_census_blocks_tb = db.Table('nyc_census_blocks', metadata,
autoload=True,autoload_with=engine)
/Users/atma6951/anaconda3/envs/geopandasenv/lib/python3.6/site-packages/sqlalchemy/dialects/postgresql/base.py:2963: SAWarning: Did not recognize type 'geometry' of column 'geom' "Did not recognize type '%s' of column '%s'" % (attype, name)
print(nyc_census_blocks_tb.columns.keys())
['id', 'geom', 'blkid', 'popn_total', 'popn_white', 'popn_black', 'popn_nativ', 'popn_asian', 'popn_other', 'boroname']
Querying¶
query = nyc_census_blocks_tb.select()
query
<sqlalchemy.sql.selectable.Select at 0x118204908; Select object>
query_result = con.execute(query)
query_result
<sqlalchemy.engine.result.ResultProxy at 0x11826e0f0>
query_result_set = query_result.fetchall()
len(query_result_set)
38794
query_result_set[:3]
[(1, '010600002026690000010000000103000000010000000A00000051AC161881A22141A31409CF1F2A51415F4321458DA2214100102A3F1D2A51418C34807C0BA221414E3E89F5122A51417 ... (74 characters truncated) ... 02141A00099C72F2A51412365B4789AA021419F60A7BB342A514160E3E8FA66A0214118B4C0CE402A5141EA4BF3EEC7A12141A3023D61452A514151AC161881A22141A31409CF1F2A5141', '360850009001000', 97, 51, 32, 1, 5, 8, 'Staten Island'), (2, '0106000020266900000100000001030000000100000007000000083B4A6F79A8214127EC57B49926514151B51BB7CEA72141B2EAD6F38A2651416F429640B9A72141449FCB1C89265141163AA64D56A72141B89E2B7C9B26514150509213EDA72141DCC9A351A826514184FA4C6017A82141B9AE24F0AB265141083B4A6F79A8214127EC57B499265141', '360850020011000', 66, 52, 2, 0, 7, 5, 'Staten Island'), (3, '010600002026690000010000000103000000010000000600000082DCED72969D2141563247C49E2651417C120440079D214123319BFC8626514179D4895B6A9C2141F3667FC995265141C0428AC2C29C214159EB5C75AC265141CB126202D69C214180215728B126514182DCED72969D2141563247C49E265141', '360850040001000', 62, 14, 18, 2, 25, 3, 'Staten Island')]
Querying with pandas¶
nyc_census_blocks_df = pd.read_sql(query, con)
nyc_census_blocks_df.head()
id | geom | blkid | popn_total | popn_white | popn_black | popn_nativ | popn_asian | popn_other | boroname | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 010600002026690000010000000103000000010000000A... | 360850009001000 | 97 | 51 | 32 | 1 | 5 | 8 | Staten Island |
1 | 2 | 0106000020266900000100000001030000000100000007... | 360850020011000 | 66 | 52 | 2 | 0 | 7 | 5 | Staten Island |
2 | 3 | 0106000020266900000100000001030000000100000006... | 360850040001000 | 62 | 14 | 18 | 2 | 25 | 3 | Staten Island |
3 | 4 | 010600002026690000010000000103000000010000000A... | 360850074001000 | 137 | 92 | 12 | 0 | 13 | 20 | Staten Island |
4 | 5 | 0106000020266900000100000001030000000100000014... | 360850096011000 | 289 | 230 | 0 | 0 | 32 | 27 | Staten Island |
Querying with geopandas¶
nyc_census_blocks_gpd = gpd.read_postgis(sql = query, con = con)
nyc_census_blocks_gpd.head()
id | geom | blkid | popn_total | popn_white | popn_black | popn_nativ | popn_asian | popn_other | boroname | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | (POLYGON ((577856.5470479821 4499583.234929237... | 360850009001000 | 97 | 51 | 32 | 1 | 5 | 8 | Staten Island |
1 | 2 | (POLYGON ((578620.7173632095 4495974.817866362... | 360850020011000 | 66 | 52 | 2 | 0 | 7 | 5 | Staten Island |
2 | 3 | (POLYGON ((577227.2244709881 4495995.066845497... | 360850040001000 | 62 | 14 | 18 | 2 | 25 | 3 | Staten Island |
3 | 4 | (POLYGON ((579037.0332016965 4494421.769816227... | 360850074001000 | 137 | 92 | 12 | 0 | 13 | 20 | Staten Island |
4 | 5 | (POLYGON ((577652.4825280879 4494975.052285533... | 360850096011000 | 289 | 230 | 0 | 0 | 32 | 27 | Staten Island |
nyc_census_blocks_gpd.head(50).plot()
<matplotlib.axes._subplots.AxesSubplot at 0x126067240>
nyc_census_blocks_gpd.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1260aaac8>
Where clauses¶
Note, both pandas and geopandas read the full data by design. But we can design a sql query to meet our requirements and pass them to the pandas and geopandas read_sql
, read_postgis
methods.
# Get only Staten Island boroughs
# raw_sql = SELECT * FROM nyc_census_blocks_tb WHERE boroname = 'Staten Island'
query_boro = db.select([nyc_census_blocks_tb]).where(nyc_census_blocks_tb.columns.boroname=='Staten Island')
gpd.read_postgis(sql = query_boro, con = con).shape
(5037, 10)
# AND clause
# raw_sql = SELECT * FROM nyc_.. WHERE boroname='Staten Island' AND popn_white < popn_black
query_boro_and = db.select([nyc_census_blocks_tb]).where(
db.and_(nyc_census_blocks_tb.columns.boroname=='Staten Island',
nyc_census_blocks_tb.columns.popn_white < nyc_census_blocks_tb.columns.popn_black))
gpd.read_postgis(sql=query_boro_and, con=con).plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1280acac8>
Where clauses geometry functions¶
Note, here, I am not rewriting SQL into SQLAlchemy. I apparently need to use geoalchemy
to make use of geometry functions as SQLAlchemy knows only of standard db functions.
I also learnt I can simply pass raw SQL query into sqlalchemy and it works. Thus, below, I am able to pass raw sql which contains the geometry functions ST_Area()
and compute the result.
Note: Using Geopandas to connect requires geometry column. Since aggregations don't yeild geometries, I am reading the result using pandas.
raw_query = "SELECT Sum(ST_Area(geom))/4047 FROM nyc_neighborhoods WHERE boroname ='Manhattan';"
# con.execute("SELECT Sum(ST_Area(geom))/4047 FROM nyc_neighborhoods WHERE boroname ='Manhattan';").fetchall()
pd.read_sql(raw_query, con)
?column? | |
---|---|
0 | 13965.320122 |
Connect to Zesty db¶
engine = db.create_engine('postgresql+psycopg2://postgres:engineTest888@localhost:5555/zesty')
con = engine.connect()
metadata = db.MetaData()
properties_tb = db.Table('properties', metadata,
autoload=True,autoload_with=engine)
/Users/atma6951/anaconda3/envs/geopandasenv/lib/python3.6/site-packages/sqlalchemy/dialects/postgresql/base.py:2963: SAWarning: Did not recognize type 'geography' of column 'geocode_geo' "Did not recognize type '%s' of column '%s'" % (attype, name) /Users/atma6951/anaconda3/envs/geopandasenv/lib/python3.6/site-packages/sqlalchemy/dialects/postgresql/base.py:2963: SAWarning: Did not recognize type 'geography' of column 'parcel_geo' "Did not recognize type '%s' of column '%s'" % (attype, name) /Users/atma6951/anaconda3/envs/geopandasenv/lib/python3.6/site-packages/sqlalchemy/dialects/postgresql/base.py:2963: SAWarning: Did not recognize type 'geography' of column 'building_geo' "Did not recognize type '%s' of column '%s'" % (attype, name)
Display image¶
query = "SELECT properties.image_url FROM properties WHERE properties.id='f1650f2a99824f349643ad234abff6a2'"
query_result = con.execute(query).fetchall()
query_result
[('https://storage.googleapis.com/engineering-test/images/f1650f2a99824f349643ad234abff6a2.tif',)]
query_result[0][0]
'https://storage.googleapis.com/engineering-test/images/f1650f2a99824f349643ad234abff6a2.tif'
import requests, shutil
response = requests.get(query_result[0][0], stream=True)
if response.status_code == 200:
with open('img.tif', 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
del response