00 Logon.ipynb
in JupyterLab.import pandas as pd
from hana_ml import dataframe as dfh
hana_cloud_endpoint="<uuid>.hana.trial-eu10.hanacloud.ondemand.com:443"
hana_cloud_host, hana_cloud_port=hana_cloud_endpoint.split(":")
cchc=dfh.ConnectionContext(port=hana_cloud_port,
address=hana_cloud_host,
user='HANAML',
password='Super$ecr3t!',
encrypt=True
)
05 Multimodel.ipynb
where we include the previous one as the first step.%run "./00 Logon.ipynb"
cchc.connection.isconnected()
import geopandas
import contextily as ctx
import hana_ml.graph.hana_graph
hana_ml.graph.hana_graph.discover_graph_workspaces(cchc)
hgws_airroutes=(
hana_ml.graph.hana_graph
.create_hana_graph_from_existing_workspace(
cchc,
workspace_name='AIRROUTES_DFH')
)
dfh_ports=cchc.table("PORTS", geo_cols={"POINT_LON_LAT_GEO":"4326"})
dfh_routes=cchc.table("ROUTES")
dfh_routes_strings=cchc.sql(
dfh_routes.alias('E')
.join(dfh_ports.alias('F'),'E."FROM"=F."ID"', select=['E.*',('POINT_LON_LAT_GEO','POINT_FROM')])
.alias('E2')
.join(dfh_ports.alias('T'),'E2."TO"=T."ID"',
select=[('E2.ID','ID'), 'FROM', 'TO', 'DIST',
('ST_MakeLine("POINT_FROM", "POINT_LON_LAT_GEO")', 'LINE')]
).select_statement, geo_cols={'LINE': 4326}
)
dfh_routes_strings.head(3).collect()
ST_MakeLine()
method to create a string connecting two ports. To make it possible we join the edges table with two instances or port tables to read a location of FROM and TO nodes.DIST
-- the distance loaded from the source CSV file. We just do not know in what unit this distance was measured. Let's use SAP HANA capabilities to calculate lengths of generated spatial lines in kilometers and in miles.dfh_routes_strings.select(
"*",
('ROUND("LINE".ST_Length(\'foot\')/5280)', "CONV_M"),
('ROUND("LINE".ST_Length(\'kilometer\'))', "CONV_KM")
).head(3).collect()
DIST
must be in miles as it is not exactly, but very closely matches the distance converted to miles by SAP HANA.ST_UNITS_OF_MEASURE
contains predefined foot
unit, but not mile
.cchc.table("ST_UNITS_OF_MEASURE", schema="PUBLIC").collect()
shortest_path()
method, which I am going to use to calculate the shortest path from WRO
(Wrocław, Poland) to GKA
(Goroka, Papua New Guinea) on the other side of the globe.path_wro_to_gka_hops=hgws_airroutes.shortest_path(
source=str(hgws_airroutes.vertices_hdf.filter("CODE='WRO'").select('ID').collect().values[0][0]),
target=str(hgws_airroutes.vertices_hdf.filter("CODE='GKA'").select('ID').collect().values[0][0]),
direction='OUTGOING'
)
print("Number of connectons: {}".format(int(path_wro_to_gka_hops.weight())))
display(path_wro_to_gka_hops.edges().set_index('EDGE_ORDER'))
display(path_wro_to_gka_hops.vertices().set_index('VERTEX_ORDER'))
dfg_
for the selected nodes and for the spatial aggregation of connecting lines using ST_CollectAggr()
.dfg_shortest_path_hops = geopandas.GeoDataFrame(
dfh_ports
.filter('ID IN ('+path_wro_to_gka_hops.vertices().ID.astype(str).str.cat(sep=',')+')')
.select("ID", "CODE", "CITY", "POINT_LON_LAT_GEO").collect(),
geometry='POINT_LON_LAT_GEO', crs="EPSG:4326"
)
dfg_shortest_path_string = geopandas.GeoDataFrame(
cchc.sql(
dfh_routes_strings
.filter('ID IN ('+path_wro_to_gka_hops.edges().ID.astype(str).str.cat(sep=',')+')')
.agg([('ST_CollectAggr', 'LINE', 'LINE_PATH')])
.select_statement,
geo_cols={'LINE_PATH':4326}).collect(),
geometry='LINE_PATH', crs="EPSG:4326"
)
fig_shortest_path=dfg_shortest_path_hops.to_crs(epsg=3857).plot(
figsize=(20, 15), zorder=3,
alpha=1, color='w', edgecolor='r', markersize=300
)
fig_shortest_path.set_axis_off()
dfg_shortest_path_hops.to_crs(epsg=3857).apply(
lambda port: fig_shortest_path.annotate(port.CITY,
xy=port.POINT_LON_LAT_GEO.coords[0],
xytext=(-10, 15),
textcoords="offset points",
fontsize="x-large",
color="b"
), axis=1
)
dfg_shortest_path_string.to_crs(epsg=3857).plot(ax=fig_shortest_path, alpha=1, edgecolor='r', zorder=1)
ctx.add_basemap(ax=fig_shortest_path, source=ctx.providers.OpenStreetMap.Mapnik)
weight
to supply the value of a column DIST
.path_wro_to_gka_dist=hgws_airroutes.shortest_path(
source=str(hgws_airroutes.vertices_hdf.filter("CODE='WRO'").select('ID').collect().values[0][0]),
target=str(hgws_airroutes.vertices_hdf.filter("CODE='GKA'").select('ID').collect().values[0][0]),
direction='OUTGOING',
weight='DIST'
)
print("Total distance: {}".format(int(path_wro_to_gka_dist.weight())))
# display(path_wro_to_gka_dist.edges().set_index('EDGE_ORDER'))
# display(path_wro_to_gka_dist.vertices().set_index('VERTEX_ORDER'))
dfg_shortest_path_dist_hops = geopandas.GeoDataFrame(
dfh_ports
.filter('ID IN ('+path_wro_to_gka_dist.vertices().ID.astype(str).str.cat(sep=',')+')')
.select("ID", "CODE", "CITY", "POINT_LON_LAT_GEO").collect(),
geometry='POINT_LON_LAT_GEO', crs="EPSG:4326"
)
dfg_shortest_path_dist_string = geopandas.GeoDataFrame(
cchc.sql(dfh_routes_strings
.filter('ID IN ('+path_wro_to_gka_dist.edges().ID.astype(str).str.cat(sep=',')+')')
.agg([('ST_UnionAggr', 'LINE', 'LINE_PATH')])
.select_statement,
geo_cols={'LINE_PATH':4326}).collect(), geometry='LINE_PATH', crs="EPSG:4326")
fig_shortest_path_dist=dfg_shortest_path_dist_hops.to_crs(epsg=3857).plot(
figsize=(20, 15), zorder=3,
alpha=1, color='w', edgecolor='r', markersize=300
)
fig_shortest_path_dist.set_axis_off()
dfg_shortest_path_dist_hops.to_crs(epsg=3857).apply(
lambda port: fig_shortest_path_dist.annotate(port.CITY,
xy=port.POINT_LON_LAT_GEO.coords[0],
xytext=(-10, 15), textcoords="offset points",
fontsize="x-large", color="b"
), axis=1
)
dfg_shortest_path_dist_string.to_crs(epsg=3857).plot(ax=fig_shortest_path_dist, alpha=1, edgecolor='r', zorder=1)
ctx.add_basemap(ax=fig_shortest_path_dist, source=ctx.providers.OpenStreetMap.Mapnik)
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
12 | |
10 | |
9 | |
7 | |
7 | |
7 | |
6 | |
6 | |
5 | |
4 |