APL Time Series Forecast using a Segmented Measure

marc_daniau · ‎05-13-2022

The latest release of the Automated Predictive Library (APL) introduces the capability to build several time series models at once from a segmented measure like Sales by Store for example or Profit by Product. No need any more to define a loop in your SQL code or Python code. Just tell APL what column represents the segment in your dataset. You can also specify how many HANA tasks to run in parallel for a faster execution.

This new capability requires HANA ML 2.13 and APL 2209.

Let’s see how it works in Python and then in SQL.

From a Jupyter notebook, we first define the HANA dataframe for the input series.

from hana_ml import dataframe as hd

conn = hd.ConnectionContext(userkey='MLMDA_KEY')

series_in = conn.table('TS_PRODUCT_SALES', schema='USER_APL')

series_in.head(5).collect()

How many segments do we have?

series_in.distinct(cols='Product').collect()

We run a fit_predict by product with 4 parallel tasks...

col_segment= 'Product'

from hana_ml.algorithms.apl.time_series import AutoTimeSeries

apl_model = AutoTimeSeries(time_column_name= 'Month', target= 'Quantity', horizon=6,

                           segment_column_name= col_segment, max_tasks= 4)

apl_model.set_params(extra_applyout_settings={'APL/ApplyExtraMode': 'First Forecast with Stable Components and Residues and Error Bars'})

series_out = apl_model.fit_predict(data = series_in)

and check the status of each task.

my_filter = "\"KEY\" in ('AplTaskStatus')"

df = apl_model.get_summary().filter(my_filter).select('OID','VALUE').collect()

df.columns = [col_segment, 'Task Status']

df.style.hide(axis='index')

What went wrong with Earrings?

my_filter = "LEVEL = 0 and OID = 'Earrings' and ORIGIN =''"

df = apl_model.get_fit_operation_log().filter(my_filter).select('OID','MESSAGE').collect()

df.columns = [col_segment, 'Log Text']

df.style.hide(axis='index')

What are the model components for the other three segments?

df = apl_model.get_model_components()

df.style.hide(axis='index')

We look at their forecasting accuracy.

my_filter = "\"Partition\" = 'Validation'"

df = apl_model.get_debrief_report('TimeSeries_Performance').filter(my_filter).select('Oid','MAPE','RMSE','MASE').collect()

df.columns = [col_segment,'MAPE','RMSE','MASE']

df.style.hide(axis='index')

Since HANA ML 2.16, APL builds reports and charts automatically. We must give the segment value we want to analyze, for example: Tie.

seg_value = "Tie"

apl_model.build_report(segment_name=seg_value)

apl_model.generate_notebook_iframe_report()

Each tab provides, for a given segment, detailed information on the time series model .

The report can be saved as HTML.

apl_model.generate_html_report('My_APL_Report')

We conclude this article with a code sample for our readers working with the SQL interface.

--- Input Series

drop view "TS_SORTED";

create view "TS_SORTED" as select * from APL_SAMPLES.PRODUCTS_SALES order by 1,2;



--- Output Series

drop table FORECAST_OUT;

create table FORECAST_OUT (

  "Product"  nvarchar(25),

  "Month" DAYDATE,

  "Quantity" integer,

  "kts_1" DOUBLE

);



--- Persisted Tables for Debrief

drop table "INDICATORS";

create table "INDICATORS" like "SAP_PA_APL"."sap.pa.apl.base::BASE.T.INDICATORS";



drop table "DEBRIEF_METRIC";

create table "DEBRIEF_METRIC" like "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_METRIC_OID";



drop table "DEBRIEF_PROPERTY";

create table "DEBRIEF_PROPERTY" like "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_PROPERTY_OID";



--- Procedure

DO BEGIN

    declare header "SAP_PA_APL"."sap.pa.apl.base::BASE.T.FUNCTION_HEADER";

    declare config "SAP_PA_APL"."sap.pa.apl.base::BASE.T.OPERATION_CONFIG_EXTENDED";   

    declare var_desc "SAP_PA_APL"."sap.pa.apl.base::BASE.T.VARIABLE_DESC_OID";      

    declare var_role "SAP_PA_APL"."sap.pa.apl.base::BASE.T.VARIABLE_ROLES_WITH_COMPOSITES_OID";      

    declare out_log   "SAP_PA_APL"."sap.pa.apl.base::BASE.T.OPERATION_LOG";      

    declare out_sum   "SAP_PA_APL"."sap.pa.apl.base::BASE.T.SUMMARY";      

    declare out_indic "SAP_PA_APL"."sap.pa.apl.base::BASE.T.INDICATORS";      

    declare out_debrief_metric "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_METRIC_OID";      

    declare out_debrief_property "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_PROPERTY_OID";

   

    :header.insert(('Oid', 'All Products'));

    :header.insert(('LogLevel', '2'));

    :header.insert(('MaxTasks', '4'));  --  PARALLEL TASKS



    :config.insert(('APL/SegmentColumnName', 'Product',null));   --  THE SEGMENT

    :config.insert(('APL/Horizon', '6',null));

    :config.insert(('APL/TimePointColumnName', 'Month',null));

    :config.insert(('APL/LastTrainingTimePoint', '2021-12-01 00:00:00',null));

	:config.insert(('APL/DecomposeInfluencers','true',null));



    :var_role.insert(('Month', 'input',null,null,null));

    :var_role.insert(('Quantity', 'target',null,null,null));



    "SAP_PA_APL"."sap.pa.apl.base::FORECAST_AND_DEBRIEF"(

	:header, :config, :var_desc, :var_role, 'USER_APL','TS_SORTED', 'USER_APL','FORECAST_OUT',out_log,out_sum,out_indic,out_debrief_metric,out_debrief_property);



	select * from :out_log;

	select * from :out_sum where key in ('AplTaskStatus','AplTotalElapsedTime') order by 1,2;

	

    insert into  "USER_APL"."INDICATORS"        select * from :out_indic;

    insert into  "USER_APL"."DEBRIEF_METRIC"    select * from :out_debrief_metric;

    insert into  "USER_APL"."DEBRIEF_PROPERTY"  select * from :out_debrief_property; 

END;

To know more about APL

APL Time Series Forecast using a Segmented Measure

Get Your SAP HANA Idea Incubator Badge Today!

SCN Mission - SAP HANA Quiz Challenge is now retired

Share your #HANAStory and Win