# -----------------
# Introduction
# -----------------
# Datasphere (DSP) View generation based on Remote Tables
# This Python script generates design time SQL views in DSP for each Remote Table (RT) that
# resides in a given space and then pushes the view definition to DSP via the DSP Command Line Interface.
# The view is "exposed" so that it can be accessed from the Open SQL schema
#
# Author: Sefan Linders
# Changelog:
# - 16/5/2023: Initial version
# - 19/7/2023: Changed from dwc to datasphere commands
# - 6/11/2023: Adapted to DSP CLI command changes, wait function set to 0
# Prereqs
# - Install Datasphere CLI (npm install -g @sap/datasphere-cli)
# - Create Database Analysis User (with Space Schema access) on Datasphere for connection to HDB. https://help.sap.com/docs/SAP_DATASPHERE/9f804b8efa8043539289f42f372c4862/c28145bcb76c4415a1ec6265dd...
# - Prepare DSP CLI secrets file according to https://help.sap.com/docs/SAP_DATASPHERE/d0ecd6f297ac40249072a44df0549c1a/eb7228a171a842fa84e48c899d...
# - If you want to avoid a login process, use the following instructions and adapt code under "Logon procedure": https://help.sap.com/docs/SAP_DATASPHERE/d0ecd6f297ac40249072a44df0549c1a/eb7228a171a842fa84e48c899d...
# - To use the hdbcli package, install using pip: pip3 install hdbcli
# -----------------------------------------------------------
# Static variables, these you have to adjust
# -----------------------------------------------------------
# dsp
dsp_host = 'https://<your-host-url>' # Placeholder for the SAP Datasphere URL
dsp_space = '<your-dsp-space>' # Placeholder for the technical space name
# hdb
hdb_address='<your-hdb-address>' # Placeholder for the SAP HANA database (hdb) address
hdb_port=443 # Port for the hdb, usually 443
hdb_user='<your-hdb-user>' # Placeholder for the hdb user
hdb_password='<your-hdb-password>' # Placeholder for the hdb password
# settings
view_suffix = '_V' # Suffix for the generated views
secrets_file = '<path-to-your>/dsp_cli_secrets.json' # Path for the secrets file
deploy_wait_time_in_seconds = 0 # wait time between object deployments, to prevent overload. After DSP fix May '23, this seems not needed anymore.
export_folder_path = '<path-to-your>/scripts/dsp_cli_view_generation/' # Path to export folder
# -----------------------------------------------------------
# Package import
# -----------------------------------------------------------
import subprocess # For OS commands on DSP CLI
import json # For handling the CSN files which are in JSON format
import time # For wait function to let an object deploy finish before starting the following
from hdbcli import dbapi # To connect to SAP HANA Database underneath SAP Datasphere to fetch Remote Table metadata
# -----------------
# print versions of relevant components
# -----------------
command = 'node --version'
print(command)
subprocess.run(command, shell=True)
command = 'npm --version'
print(command)
subprocess.run(command, shell=True)
command = 'datasphere -version'
print(command)
subprocess.run(command, shell=True)
# -----------------------------------------------------------
# CLI logon procedure with oAuth authentication to DSP CLI
# -----------------------------------------------------------
# logout is needed to have the login consider new creds file, e.g., in case it is replaced with new client id/secret
command = f'datasphere logout'
print(command)
subprocess.run(command, shell=True)
# login
command = f'datasphere login --host {dsp_host} --secrets-file {secrets_file}'
print(command)
subprocess.run(command, shell=True)
# Optional command to debug or to get the access and refresh token to avoid login command (see header comments)
command = 'datasphere config secrets show'
print(command)
subprocess.run(command, shell=True)
# -----------------------------------------------------------
# Wait function, used to prevent overload of space deployment
# -----------------------------------------------------------
def wait(seconds):
for i in range(seconds):
if i > 0 and i % 10 == 0:
print()
print('.', end='', flush=True)
time.sleep(1)
print()
# -----------------------------------------------------------
# This function takes a Remote Table (RT) CSN as input and transforms it to a SQL View CSN
# -----------------------------------------------------------
def remote_table_to_view(rt_csn):
rt_name = list(rt_csn['definitions'].keys())[0]
view_name = rt_name + view_suffix
elements = rt_csn["definitions"][rt_name]["elements"]
elements_string = ', '.join(f'"{element}"' for element in elements)
view_csn = {
"definitions": {
view_name: {
"kind": "entity",
"elements": elements,
"query": {
"SELECT": {
"from": {"ref": [rt_name]},
"columns": [{"ref": [element]} for element in elements]
}
},
"@EndUserText.label": view_name,
"@ObjectModel.modelingPattern": {"#": "DATA_STRUCTURE"},
"@ObjectModel.supportedCapabilities": [{"#": "DATA_STRUCTURE"}],
"@DataWarehouse.consumption.external": True,
"@DataWarehouse.sqlEditor.query": f"SELECT {elements_string}\nFROM \"{rt_name}\""
}
}
}
return view_csn
# -----------------------------------------------------------
# Generates space csn (including view definition) with view csn as input
# -----------------------------------------------------------
def generate_space_csn_with_view(view_csn):
space_csn = {}
space_csn = { dsp_space : view_csn }
return space_csn
# -----------------------------------------------------------
# Write space definition to csn file and return file name
# -----------------------------------------------------------
def write_space_csn(space_csn):
space_csn_pretty = json.dumps(space_csn, indent=4)
view_name = next(iter(space_csn[next(iter(space_csn.keys()))]["definitions"].keys()))
space_csn_file = f'{export_folder_path}space_{dsp_space}_object_{view_name}.csn'
with open(space_csn_file, 'w') as f:
f.write(space_csn_pretty)
return space_csn_file
# -----------------------------------------------------------
# Push view csn to DSP with space definition csn as input
# -----------------------------------------------------------
def push_space_csn_to_DSP(space_csn_file):
command = f'datasphere spaces create --host {dsp_host} --space {dsp_space} --file-path {space_csn_file} --force-definition-deployment --verbose'
print(command)
subprocess.run(command, shell=True)
# -----------------------------------------------------------
# Fetch RT metadata from HANA DB, and for each RT create a view and push this to DSP
# -----------------------------------------------------------
# Connect to HDB
conn = dbapi.connect(
address=hdb_address,
port=hdb_port,
user=hdb_user,
password=hdb_password
)
cursor = conn.cursor()
# select statement to fetch remote table csn's. Selection on highest ARTIFACT_VERSION for each object.
st = f'''
SELECT A.ARTIFACT_NAME, A.CSN, A.ARTIFACT_VERSION
FROM "{dsp_space}$TEC"."$$DeployArtifacts$$" A
INNER JOIN (
SELECT ARTIFACT_NAME, MAX(ARTIFACT_VERSION) AS MAX_ARTIFACT_VERSION
FROM "{dsp_space}$TEC"."$$DeployArtifacts$$"
WHERE PLUGIN_NAME='remoteTable'
GROUP BY ARTIFACT_NAME
) B
ON A.ARTIFACT_NAME = B.ARTIFACT_NAME
AND A.ARTIFACT_VERSION = B.MAX_ARTIFACT_VERSION;
'''
print('>>> SELECT statement to fetch remote table definitions')
print(st)
cursor.execute(st)
# Loop over the remote tables, create a view definition, and push its csn to DSP
rows = cursor.fetchall()
conn.close()
total_rows = len(rows)
print ('Total rows: ' + str(total_rows))
for i, row in enumerate(rows):
rt_name = row[0]
csn = row[1]
# Load remote table csn and print it
rt_csn = json.loads(csn)
rt_csn_pretty = json.dumps(rt_csn, indent=4)
print('>>> Remote table csn of: ' + rt_name)
print(rt_csn_pretty)
# Get view definition based on remote table and print it
view_csn = remote_table_to_view(rt_csn)
view_csn_pretty = json.dumps(view_csn, indent=4)
print('>>> Generated view csn of: ' + rt_name)
print(view_csn_pretty)
# Generate space csn
space_csn_new = generate_space_csn_with_view(view_csn)
space_csn_new_pretty = json.dumps(space_csn_new, indent=4)
print('>>> Generated space csn of: ' + rt_name)
print(space_csn_new_pretty)
# Write space csn to file
space_file_write = write_space_csn(space_csn_new)
# Push space csn to DSP
push_space_csn_to_DSP(space_file_write)
# Check if it's the last row
if i < total_rows - 1:
# Wait to prevent overload of space deployment
print(f'waiting {deploy_wait_time_in_seconds} seconds for space deployer to finish previous deploy')
wait(deploy_wait_time_in_seconds)
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
38 | |
19 | |
13 | |
13 | |
11 | |
10 | |
10 | |
10 | |
8 | |
8 |