C4.5 – Prediction with Decision Trees – PAL

Dear All,

I would like to share below with new commers who are started learning or going to work with PAL.

Its very easy and user friendly code. It will help them to start with PAL and motivate them to go further. If you face so many problems in start, it demotivates .

–DECISION TREES WITH C4.5 CLAIMS_DATA — CREATION OF A DT CODE–

SET SCHEMA ZIA_PAL;     –WRITE YOUR SCHEMA NAME HERE

CREATE TABLE CLAIM_Z (ID INTEGER, POLICY VARCHAR(10), AGE INTEGER, AMOUNT INTEGER, OCCUPATION VARCHAR(10), FRAUD VARCHAR(10));

INSERT INTO CLAIM_Z VALUES (1, ‘Auto’, 44, 4400, ‘IT’, ‘No’);

INSERT INTO CLAIM_Z VALUES (2, ‘Auto’, 40, 4400, ‘IT’, ‘No’);

INSERT INTO CLAIM_Z VALUES (3, ‘Auto’, 40, 1500, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (4, ‘Auto’, 42, 5500, ‘IT’, ‘No’);

INSERT INTO CLAIM_Z VALUES (5, ‘Auto’, 22, 800, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (6, ‘Auto’, 24, 1300, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (7, ‘Auto’, 40, 1000, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (8, ‘Auto’, 39, 1100, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (9, ‘Auto’, 22, 800, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (10, ‘Auto’, 24, 1300, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (11, ‘Auto’, 42, 400, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (12, ‘Auto’, 40, 400, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (13, ‘Auto’, 45, 1500, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (14, ‘Auto’, 35, 5500, ‘IT’, ‘No’);

INSERT INTO CLAIM_Z VALUES (15, ‘Auto’, 18, 800, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (16, ‘Auto’, 44, 1300, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (17, ‘Auto’, 30, 6000, ‘IT’, ‘No’);

INSERT INTO CLAIM_Z VALUES (18, ‘Auto’, 33, 4500, ‘IT’, ‘No’);

INSERT INTO CLAIM_Z VALUES (19, ‘Auto’, 21, 700, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (20, ‘Auto’, 38, 600, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (21, ‘Auto’, 39, 600, ‘IT’, ‘Yes’);

INSERT INTO CLAIM_Z VALUES (21, ‘Auto’, 38, 500, ‘IT’, ‘Yes’);

— NEED TO DONE THIS ONLY ONCE: AFTER THAT YOU CAN PUT THIS IN COMMENTS

— C4.5 ALGORITHM SETUP —

— NEED TO DROP ALL TABLES WHICH WE WILL CREATE LATER ON. NEED TO RUN THIS EVERY TIME

DROP TYPE PAL_T_DT_DATA_Z;

DROP TYPE PAL_T_DT_PARAMS_Z;

DROP TYPE PAL_T_DT_MODEL_JSON_Z;

DROP TYPE PAL_T_DT_MODEL_PMML_Z;

CREATE TYPE PAL_T_DT_DATA_Z AS TABLE (POLICY VARCHAR(10), AGE INTEGER, AMOUNT INTEGER, OCCUPATION VARCHAR(10), FRAUD VARCHAR(10));

CREATE TYPE PAL_T_DT_PARAMS_Z AS TABLE (NAME VARCHAR(60), INTARGS INTEGER, DOUBLEARGS DOUBLE, STRINGARGS VARCHAR (100));

CREATE TYPE PAL_T_DT_MODEL_JSON_Z AS TABLE (ID INTEGER, JSONMODEL VARCHAR(5000));

CREATE TYPE PAL_T_DT_MODEL_PMML_Z AS TABLE (ID INTEGER, PMMLMODEL VARCHAR(5000));

CREATE COLUMN TABLE PAL_DT_SIGNATURE_Z (ID INTEGER, TYPENAME VARCHAR(100), DIRECTION VARCHAR(100));

INSERT INTO PAL_DT_SIGNATURE_Z VALUES (1, ‘PAL_T_DT_DATA_Z’, ‘in’);

INSERT INTO PAL_DT_SIGNATURE_Z VALUES (2, ‘PAL_T_DT_PARAMS_Z’, ‘in’);

INSERT INTO PAL_DT_SIGNATURE_Z VALUES (3, ‘PAL_T_DT_MODEL_JSON_Z’, ‘out’);

INSERT INTO PAL_DT_SIGNATURE_Z VALUES (4, ‘PAL_T_DT_MODEL_PMML_Z’, ‘out’);

— DROP PROCEDURE AND 4 TYPES ASSOCIATED WITH IT:

DROP PROCEDURE _SYS_AFL.PAL_DT_Z;

DROP TYPE _SYS_AFL.PAL_DT_Z__TT_P1;

DROP TYPE _SYS_AFL.PAL_DT_Z__TT_P2;

DROP TYPE _SYS_AFL.PAL_DT_Z__TT_P3;

DROP TYPE _SYS_AFL.PAL_DT_Z__TT_P4;

CALL SYSTEM.AFL_WRAPPER_GENERATOR (‘PAL_DT_Z’, ‘AFLPAL’, ‘CREATEDT’, PAL_DT_SIGNATURE_Z);–‘PAL_DT_Z’IS USER DEFINED PROCEDURE NAME: IT CAN BE ANY NAME

— CREATION OF VIEW TO GET DATA FROM TABLE

DROP VIEW V_DT_DATA_Z;

CREATE VIEW V_DT_DATA_Z AS

SELECT POLICY, AGE, AMOUNT, OCCUPATION, FRAUD

FROM CLAIM_Z;

DROP TABLE  DT_PARAMS_Z;

DROP TABLE  DT_MODEL_JSON_Z;

DROP TABLE  DT_MODEL_PMML_Z;

CREATE COLUMN TABLE DT_PARAMS_Z LIKE PAL_T_DT_PARAMS_Z;

CREATE COLUMN TABLE DT_MODEL_JSON_Z LIKE PAL_T_DT_MODEL_JSON_Z;

CREATE COLUMN TABLE DT_MODEL_PMML_Z LIKE PAL_T_DT_MODEL_PMML_Z;

INSERT INTO DT_PARAMS_Z VALUES (‘PERCENTAGE’, null, 1.0, null);

INSERT INTO DT_PARAMS_Z VALUES (‘THREAD_NUMBER’, 2, null, null);

INSERT INTO DT_PARAMS_Z VALUES (‘PERCENTAGE’, null, 1.0, null);

INSERT INTO DT_PARAMS_Z VALUES (‘MIN_NUMS_RECORDS’, 1, null, null);

INSERT INTO DT_PARAMS_Z VALUES (‘IS_SPLIT_MODEL’, 0, null, null);

INSERT INTO DT_PARAMS_Z VALUES (‘PMML_EXPORT’, 1, null, null);

INSERT INTO DT_PARAMS_Z VALUES (‘CONTINUOUS_COL’, 1, 30, null);

— FINALLAY RUNNING

TRUNCATE TABLE DT_MODEL_JSON_Z;

TRUNCATE TABLE DT_MODEL_PMML_Z;

CALL _SYS_AFL.PAL_DT_Z (V_DT_DATA_Z, DT_PARAMS_Z, DT_MODEL_JSON_Z, DT_MODEL_PMML_Z) WITH OVERVIEW;

— –DECISION TREES WITH C4.5 CLAIMS_DATA –PREDICTION CODE —

SET SCHEMA ZIA_PAL;–WRITE YOUR SCHEMA NAME HERE

— C4.5 PREDICTION ALGORITHM SETUP —

DROP TYPE PAL_T_DTP_DATA_Z;

DROP TYPE PAL_T_DTP_PREDICT_Z;

DROP TABLE PAL_DTP_SIGNATURE_Z;

CREATE TYPE PAL_T_DTP_DATA_Z AS TABLE (ID INTEGER, POLICY VARCHAR(10), AGE INTEGER, AMOUNT INTEGER, OCCUPATION VARCHAR(10));

CREATE TYPE PAL_T_DTP_PREDICT_Z AS TABLE (ID INTEGER, FRAUD VARCHAR(10));

CREATE COLUMN TABLE PAL_DTP_SIGNATURE_Z (ID INTEGER, TYPENAME VARCHAR(100), DIRECTION VARCHAR(100));

INSERT INTO PAL_DTP_SIGNATURE_Z VALUES (1, ‘PAL_T_DTP_DATA_Z’, ‘in’);

INSERT INTO PAL_DTP_SIGNATURE_Z VALUES (2, ‘PAL_T_DT_PARAMS_Z’, ‘in’);

INSERT INTO PAL_DTP_SIGNATURE_Z VALUES (3, ‘PAL_T_DT_MODEL_JSON_Z’, ‘in’);

INSERT INTO PAL_DTP_SIGNATURE_Z VALUES (4, ‘PAL_T_DTP_PREDICT_Z’, ‘out’);

/DROP PROCEDURE _SYS_AFL.PAL_DTP_Z;

DROP TYPE _SYS_AFL.PAL_DTP_Z__TT_P1;

DROP TYPE _SYS_AFL.PAL_DTP_Z__TT_P2;

DROP TYPE _SYS_AFL.PAL_DTP_Z__TT_P3;

DROP TYPE _SYS_AFL.PAL_DTP_Z__TT_P4;*/

CALL SYSTEM.AFL_WRAPPER_ERASER (‘PAL_DTP_Z’);

CALL SYSTEM.AFL_WRAPPER_GENERATOR (‘PAL_DTP_Z’, ‘AFLPAL’, ‘PREDICTWITHDT’, PAL_DTP_SIGNATURE_Z);–‘PAL_DTP_Z’IS USER DEFINED PROCEDURE NAME: IT CAN BE ANY NAME

DROP TABLE DTP_DATA_Z;

DROP TABLE DTP_PARAMS_Z;

DROP TABLE DTP_PREDICT_Z;

CREATE COLUMN TABLE DTP_DATA_Z LIKE PAL_T_DTP_DATA_Z;

CREATE COLUMN TABLE DTP_PARAMS_Z LIKE PAL_T_DT_PARAMS_Z;

CREATE COLUMN TABLE DTP_PREDICT_Z LIKE PAL_T_DTP_PREDICT_Z;

INSERT INTO DTP_DATA_Z VALUES (1, ‘Auto’, 35, 4000, ‘IT’);

INSERT INTO DTP_DATA_Z VALUES (2, ‘Auto’, 40, 800, ‘IT’);

INSERT INTO DTP_DATA_Z VALUES (3, ‘Auto’, 40, 4000, ‘IT’);

INSERT INTO DTP_DATA_Z VALUES (4, ‘Auto’, 40, 3400, ‘IT’);

INSERT INTO DTP_DATA_Z VALUES (5, ‘Auto’, 38, 500, ‘IT’);

INSERT INTO DTP_DATA_Z VALUES (6, ‘Auto’, 40, 3500, ‘IT’);

INSERT INTO DTP_DATA_Z VALUES (7, ‘Auto’, 38, 2500, ‘IT’);

INSERT INTO DTP_DATA_Z VALUES (8, ‘Auto’, 38, 3500, ‘IT’);

INSERT INTO DTP_PARAMS VALUES (‘THREAD_NUMBER’, 2, null, null);

TRUNCATE TABLE DTP_PREDICT_Z;

CALL _SYS_AFL.PAL_DTP_Z (DTP_DATA_Z, DTP_PARAMS_Z, DT_MODEL_JSON_Z, DTP_PREDICT_Z) WITH OVERVIEW;

SELECT * FROM DTP_PREDICT_Z;

Almost all all of this information is available on PAL handbook by SAP.

Best Regards,

Zaib Attique Zia