Skip to Content

翻译自:Generate Sample Data in SAP HANA through Stored Procedures and SQL

多亏了SAP HANA开发者社区的积极帮助,像是SAP HANA One Developer EditionHosted Sandbox,越来越多的人开始了解SAP HANA开发环境。最先被提出的问题是,我怎么样生成一些样本数据以供分析呢?同时,这边有许多选项包括平面文件上传以及eFlighteFashion模式,我想要提供一个额外的方法,它是通过储存过程和SQL来实现的。通过这个方法,你可以很简单的复制并粘贴下面的代码,它会生成一些样本维度表和事实表。你可以坚持使用下面代码里默认的数字,或者通过调用储存过程的方程来按你自己的需求修改数值。这段代码有很详细的解释,如果需要的话也很容易被改编成你所需要的样子。


一些需要注意的地方。最初我是为了回答论坛上的一个问题,问如何生成大数据。Lars Breddemann重点提出了RAND()方程只能在Revision 46及以后的版本使用。所以,下面的代码在之前的版本都没有办法运行。另外,这种方法提供了任意值并且不会创建供有意义的分析来使用的实际数据分配。我在寻找一些方法来迫使数据向更实际的分配偏斜。当我完成之后我会更新这篇博文。对此有任何的建议请尽管提。此外,感谢Lars的代码使得整个过程都被监控。它非常好用,我在任何时候都能用得到。


— ==============================================================================

— Cleanup

— ==============================================================================

— Execute the following statements only if you are re-running the process

— to generate sample data. If this is the first time you are running the process

— you can igore these 7 statements.

DROP TABLE SALES_F;

DROP TABLE BUSINESS_UNIT_D;

DROP TABLE SUPPLIER_D;

DROP TABLE MATERIAL_D;

DROP PROCEDURE BUILD_SUPPLIER_TABLE;

DROP PROCEDURE BUILD_MATERIAL_TABLE;

DROP PROCEDURE BUILD_FACT_TABLE;

— ==============================================================================

— Create our Dimension and Fact Tables

— ==============================================================================

CREATE COLUMN TABLE “SALES_F” (“SALES_ORDER_NBR” BIGINT CS_FIXED NOT NULL ,

       “CALENDAR_DAY” DAYDATE CS_DAYDATE,

       “BUSINESS_UNIT_ID” BIGINT CS_FIXED,

       “MATERIAL_ID” BIGINT CS_FIXED,

       “SUPPLIER_ID” BIGINT CS_FIXED,

       “UNIT_PRICE” DOUBLE CS_DOUBLE,

       “QUANTITY_SOLD” DOUBLE CS_DOUBLE,

       PRIMARY KEY (“SALES_ORDER_NBR”));

CREATE COLUMN TABLE “BUSINESS_UNIT_D” (“BUSINESS_UNIT_ID” BIGINT CS_FIXED NOT NULL ,

       “BUSINESS_UNIT_CODE” NVARCHAR(5),

       “BUSINESS_UNIT_DESC” NVARCHAR(256),

       “PARENT_BUSINESS_UNIT_ID” BIGINT CS_FIXED,

       “PARENT_BUSINESS_UNIT_CODE” NVARCHAR(5),

       PRIMARY KEY (“BUSINESS_UNIT_ID”));

CREATE COLUMN TABLE “SUPPLIER_D” (“SUPPLIER_ID” BIGINT CS_FIXED,

       “SUPPLIER_DESC” VARCHAR(60),

       PRIMARY KEY(“SUPPLIER_ID”));

CREATE COLUMN TABLE “MATERIAL_D” (“MATERIAL_ID” BIGINT CS_FIXED,

       “SKU” VARCHAR(16),

       “MATERIAL_GROUP” VARCHAR(60),

       PRIMARY KEY(“MATERIAL_ID”));

— ==============================================================================

— Create our temporary tables

— ==============================================================================

— Temporary Tables for the Supplier Dimension

— These tables are used in the Supplier Stored Procedure

CREATE COLUMN TABLE ADJECTIVE (ID INTEGER, WORD VARCHAR(60), PRIMARY KEY (“ID”));

CREATE COLUMN TABLE NOUN (ID INTEGER, WORD VARCHAR(60), PRIMARY KEY (“ID”));

CREATE COLUMN TABLE SUP_TYPE (ID INTEGER, WORD VARCHAR(60), PRIMARY KEY (“ID”));

— Temporary Table for the Material Table

— This table is used to assign a Material Group to the Material Table

CREATE COLUMN TABLE MAT_GROUP (ID INTEGER, WORD VARCHAR(60), PRIMARY KEY (“ID”));

— ==============================================================================

— Populate Our Business Unit Table

— ==============================================================================

INSERT INTO “BUSINESS_UNIT_D”

VALUES(1,‘BU1’,‘Business Unit 1’,0,);

INSERT INTO “BUSINESS_UNIT_D”

VALUES(2,‘BU2’,‘Business Unit 2’,1,‘BU1’);

INSERT INTO “BUSINESS_UNIT_D”

VALUES(3,‘BU3’,‘Business Unit 3’,1,‘BU1’);

INSERT INTO “BUSINESS_UNIT_D”

VALUES(4,‘BU4’,‘Business Unit 4’,2,‘BU2’);

INSERT INTO “BUSINESS_UNIT_D”

VALUES(5,‘BU5’,‘Business Unit 5’,3,‘BU3’);

INSERT INTO “BUSINESS_UNIT_D”

VALUES(6,‘BU6’,‘Business Unit 6’,3,‘BU4’);

INSERT INTO “BUSINESS_UNIT_D”

VALUES(7,‘BU7’,‘Business Unit 7’,4,‘BU4’);

INSERT INTO “BUSINESS_UNIT_D”

VALUES(8,‘BU8’,‘Business Unit 6’,4,‘BU4’);

— ==============================================================================

— Populate Our Lookup Tables used to generate dummy names

— ==============================================================================

— Populate Adjective Temporary Table

INSERT INTO ADJECTIVE VALUES(1, ‘Great’);

INSERT INTO ADJECTIVE VALUES(2, ‘Modern’);

INSERT INTO ADJECTIVE VALUES(3, ‘Fast’);

INSERT INTO ADJECTIVE VALUES(4, ‘Proud’);

INSERT INTO ADJECTIVE VALUES(5, ‘Solid’);

INSERT INTO ADJECTIVE VALUES(6, ‘Broad’);

INSERT INTO ADJECTIVE VALUES(7, ‘Elegant’);

INSERT INTO ADJECTIVE VALUES(8, ‘Fancy’);

INSERT INTO ADJECTIVE VALUES(9, ‘Mysterious’);

INSERT INTO ADJECTIVE VALUES(10, ‘Fantastic’);

— Populate Noun Temporary Table

INSERT INTO NOUN VALUES(1, ‘Factory’);

INSERT INTO NOUN VALUES(2, ‘Offices’);

INSERT INTO NOUN VALUES(3, ‘Industry’);

INSERT INTO NOUN VALUES(4, ‘Station’);

INSERT INTO NOUN VALUES(5, ‘Restaurant’);

INSERT INTO NOUN VALUES(6, ‘Buildings’);

INSERT INTO NOUN VALUES(7, ‘Mall’);

INSERT INTO NOUN VALUES(8, ‘Studio’);

INSERT INTO NOUN VALUES(9, ‘Stockbrokers’);

INSERT INTO NOUN VALUES(10, ‘Academy’);

— Populate Supplier Type Temporary Table

INSERT INTO SUP_TYPE VALUES(1, ‘Limited’);

INSERT INTO SUP_TYPE VALUES(2, ‘Pty Ltd’);

INSERT INTO SUP_TYPE VALUES(3, ‘Partnership’);

INSERT INTO SUP_TYPE VALUES(4, ‘Group’);

INSERT INTO SUP_TYPE VALUES(5, ‘Trust’);

INSERT INTO SUP_TYPE VALUES(6, ‘Collective’);

INSERT INTO SUP_TYPE VALUES(7, ‘Consortium’);

INSERT INTO SUP_TYPE VALUES(8, ‘Inc.’);

INSERT INTO SUP_TYPE VALUES(9, ‘Traders’);

INSERT INTO SUP_TYPE VALUES(10, ‘Franchise’);

— Populated Material Group Temporary Table

INSERT INTO MAT_GROUP VALUES(1, ‘Engine’);

INSERT INTO MAT_GROUP VALUES(2, ‘Exterior’);

INSERT INTO MAT_GROUP VALUES(3, ‘Interior’);

INSERT INTO MAT_GROUP VALUES(4, ‘Accesories’);

INSERT INTO MAT_GROUP VALUES(5, ‘Electrical’);

INSERT INTO MAT_GROUP VALUES(6, ‘Components’);

INSERT INTO MAT_GROUP VALUES(7, ‘Finishing’);

INSERT INTO MAT_GROUP VALUES(8, ‘Hydraulics’);

INSERT INTO MAT_GROUP VALUES(9, ‘Liquids’);

INSERT INTO MAT_GROUP VALUES(10, ‘Extras’);

— ==============================================================================

— Create the Stored Procedure to build the Supplier Table

— ==============================================================================

— This procedure will generate a dummy supplier name from the temporary tables and

— allows an integer argument to be used to set the number of rows to be generated.

CREATE PROCEDURE BUILD_SUPPLIER_TABLE (IN NMBR INT) LANGUAGE SQLSCRIPT AS

CNTR INTEGER;

BEGIN

CNTR := 0;

WHILE CNTR < :NMBR DO

INSERT INTO SUPPLIER_D

SELECT :CNTR,

(SELECT TOP 1 WORD FROM ADJECTIVE WHERE ID = SUBSTR(ROUND(RAND() * 9, 0 ),1,1) + 1 ORDER BY WORD)  || ‘ ‘ ||

(SELECT TOP 1 WORD FROM NOUN WHERE ID = SUBSTR(ROUND(RAND() * 9, 0 ),1,1) + 1 ORDER BY WORD) ||  ‘ ‘ ||

(SELECT TOP 1 WORD FROM SUP_TYPE WHERE ID = SUBSTR(ROUND(RAND() * 9, 0 ),1,1) + 1 ORDER BY WORD)  AS SUPDESC

FROM DUMMY;    

CNTR := CNTR + 1;

END WHILE;

END;

— ==============================================================================

— Call the Supplier Table Stored Procedure

— ==============================================================================

— To build the supplier table, use the following command. The number in brackets determines

— the number of rows that will be populated.

CALL BUILD_SUPPLIER_TABLE(1000);

— ==============================================================================

— Create the Stored Procedure to build the Material Table

— ==============================================================================

— This procedure will generate the material table. A random SKU number

— is generated. the procedure will also assign a random material group

— to each generated SKU.

CREATE PROCEDURE BUILD_MATERIAL_TABLE (IN NMBR INT) LANGUAGE SQLSCRIPT AS

CNTR INTEGER;

BEGIN

CNTR := 0;

WHILE CNTR < :NMBR DO

INSERT INTO MATERIAL_D

SELECT :CNTR,

       ‘SKU’ || LPAD(ROUND((RAND() * 1000000),0),7,‘0000000’) as SKU,

(SELECT TOP 1 WORD FROM MAT_GROUP WHERE ID = SUBSTR(ROUND(RAND() * 9, 0 ),1,1) + 1 ORDER BY WORD)  AS MATERIAL

FROM DUMMY;    

CNTR := CNTR + 1;

END WHILE;

END;

— ==============================================================================

— Call the Material Table Stored Procedure

— ==============================================================================

— To build the material table, use the following command. The number in brackets determines

— the number of rows that will be populated.

CALL BUILD_MATERIAL_TABLE(10000);

— ==============================================================================

— Create the Stored Procedure to build the Fact Table

— ==============================================================================

— This stored procedure builds the Fact table. It creates a random date based

— on a start date of 01 Jan 2011 with a random number of days add between 0

— and 730. These values can be changed.

— The remaining dimension key values are random integers based on the number

— of rows in each dimension table.

CREATE PROCEDURE BUILD_FACT_TABLE (IN NMBR INT) LANGUAGE SQLSCRIPT AS

CNTR INTEGER;

BEGIN

CNTR := 0;

WHILE CNTR < :NMBR DO

INSERT INTO SALES_F

SELECT :CNTR,

ADD_DAYS (TO_DATE (‘2011-01-01’, ‘YYYY-MM-DD’), RAND() * 730),

         ROUND((RAND() * (SELECT COUNT(*) FROM BUSINESS_UNIT_D)), 0 ),

         ROUND((RAND() * (SELECT COUNT(*) FROM MATERIAL_D)), 0 ),

         ROUND((RAND() * (SELECT COUNT(*) FROM SUPPLIER_D)), 0 ),

         ROUND(RAND() * 1000,2),

         ROUND(RAND() * 100,0)

FROM DUMMY;

CNTR := CNTR + 1;

— Monitoring via m_session_context (Credit to Lars Breddemann)

      IF MOD(:CNTR, 1000) = 0 THEN

EXEC ‘set session ”BUILD_FACT_TABLE_PROGRESS” = ”’||:CNTR||‘ of ‘|| :NMBR || ”’ ‘;

      END IF;

— Monitoring via m_session_context    

END WHILE;

END;

— ==============================================================================

— Call the Fact Table Stored Procedure

— ==============================================================================

— To build the material table, use the following command. The number in brackets determines

— the number of rows that will be populated.

CALL BUILD_FACT_TABLE(100000);

— ==============================================================================

— Monitor Progress

— ==============================================================================

— The building of the fact table can take a considerable amount of time. The progress

— can be monitored by issuing the following SQL statement. Credit goes to Lars Breddemann

— for this functionality.

SELECT *

FROM “PUBLIC”.“M_SESSION_CONTEXT”

WHERE KEY = ‘BUILD_FACT_TABLE_PROGRESS’;

— ==============================================================================

— Cleanup

— ==============================================================================

— Now that our dimension and fact tables have been created, we can clean up

— the temporary tables that were used in generating our sample data.

DROP TABLE ADJECTIVE;

DROP TABLE NOUN;

DROP TABLE SUP_TYPE;

DROP TABLE MAT_GROUP;

— ==============================================================================

— Verify the tables have been created and number of rows created.

— ==============================================================================

SELECT ‘BUSINESS_UNIT_D’ AS TABLE_NAME,

COUNT(*)

FROM BUSINESS_UNIT_D

UNION

SELECT ‘MATERIAL_D’ AS TABLE_NAME,

COUNT(*)

FROM MATERIAL_D

UNION

SELECT ‘SUPPLIER_D’ AS TABLE_NAME,

COUNT(*)

FROM SUPPLIER_D

UNION

SELECT ‘SALES_F’ AS TABLE_NAME,

COUNT(*)

FROM SALES_F

To report this post you need to login first.

Be the first to leave a comment

You must be Logged on to comment or reply to a post.

Leave a Reply