Visualizing a PAL decision tree using d3

In this tutorial we will visualize a Hana PAL decision tree using d3.js.

For most visualization purposes, it is most convenient to use SAP UI5 and SAP Lumira. At the moment however, these solutions do not offer a possibility to visualize a decision tree which was determined by one of the decision tree algorithms in SAP Hana. In this tutorial, we will create a visualization of such a tree using d3.js.

  1. Parse a tree model created by Hana PAL
  2. Create a visualization using d3.js
  3. Add functionality to the visualization


Prerequisities

In order to visualize a decision tree that has been created by Hana PAL, the output of the algorithm has to be set to PMML. Information on how this can be done and a decision tree sample can be found in the Hana PAL guide.

What is PMML?

PMML is short for predictive model markup language, an xml standard which contains information about a predictive model. In our case, this is a decision tree. The definitions of the standard can be found on the website of the data mining group at dmg.org.

The reason why we use PMML is that it can be created by all of the decision tree algorithms available in the Hana PAL library (C4.5, CART, CHAID). On the other hand, since PMML is an XML format, the browser DOM-parser can be used to parse the tree.

Parse a tree model created by Hana PAL

At first we need to make the PMML document accessible through JavaScript.

We create a new XSJS Project in our repository and call it “vis_tree”.

(For information on how to set up a XSJS project, refer to:  Creating an SAP HANA XS Application)

In this project we create a new XSJS file and call it “getPmml.xsjs”. Edit that file and put the following lines of code in it (Where <SCHEMA NAME> has to be changed to your schema name and <PMML MODEL TABLE> to the name of your PMML output table):

try

// open a connection to the database and select the PMML model column from the table

       var conn = $.db.getConnection();

       var pstmt = conn.prepareStatement(“SELECT  PMML_MODEL FROM \”<SCHEMA

NAME>\”.\”<PMML MODEL TABLE>\” “);

       var rs = pstmt.executeQuery();

       var data =“”;

// iterate through the rows, dependent on the settings, the pmml is stored in multiple rows, however this is not the case in our example 

      

while (rs.next()) {

              data += rs.getString(1);

       }

  

// set the response

       $.response.contentType = “text/xml”;

       $.response.setBody(data);

       rs.close();

       pstmt.close();

       conn.close();

} catch (e) {

       $.response.setBody(e.message);

       $.response.status = $.net.http.INTERNAL_SERVER_ERROR;

}

If you now point your Browser to “http://<SERVER IP>:8000/vis_tree/getPmml.xsjs”

You should see an output as the following:

/wp-content/uploads/2015/08/xml_775703.png

As you can see, the decision tree is stored in the <TreeModel> node. We want to parse this tree into a JSON object, which satisfies the condition that children of a tree are stored in a list called “children”. This JSON tree structure, together with a “name” in each node, is commonly used for d3.js and referred to as flare.json. We will utilize d3.layout and thus need a flare like structure, though the “name” string is not needed, thus we will not use a name string, but put all of the information in each node of the tree.

In our project we create a new file and call it “pmmlTree2Flare.js”. At first we need to pull the XML file from the xsjs script, then we will parse the recursively in a JSON object.

function getFlare() {

       var flareNode = {};

       var xmlHttp = new XMLHttpRequest();

       xmlHttp.open(“GET”, “./getPmml.xsjs”, false);

       xmlHttp.send(null);

       var pmml = xmlHttp.responseText;

       var xmlDoc;

       if (window.DOMParser) {

              var parser = new DOMParser();

              xmlDoc = parser.parseFromString(pmml, “text/xml”);

       } else // code for IE

       {

              xmlDoc = new ActiveXObject(“Microsoft.XMLDOM”);

              xmlDoc.async = false;

              xmlDoc.loadXML(pmml);

       }

       var flare = pmml2Flare(xmlDoc.getElementsByTagName(“TreeModel”)[0].getElementsByTagName(“Node”)[0],

              flareNode);

       return flare;

}

function pmml2Flare(pmmlNode, flareNode) {

       // Fill the node with data

       flareNode[“name”] = pmmlNode.getAttribute(“id”);

       flareNode[“scoreDistribution”] = getScoreDistribution(pmmlNode);

       flareNode[“score”] = pmmlNode.getAttribute(“score”);

       flareNode[“recordCount”] = pmmlNode.getAttribute(“recordCount”);

       flareNode[“predicateList”] = getPredicates(pmmlNode);

       if (pmmlNode.getElementsByTagName(“Node”).length === 0) {

              return flareNode;

       }

       flareNode[“children”] = [];

       // Create an array of all nodes who are top level children of the active

       // node

       for (var i = 0; i < pmmlNode.getElementsByTagName(“Node”).length; i++) {

              if (pmmlNode.getElementsByTagName(“Node”)[i].parentNode === pmmlNode) {

                     var node = {};

                     flareNode.children.push(node);

                     pmml2Flare(pmmlNode.getElementsByTagName(“Node”)[i], node);

              }

       }

       // If there are no children this means it is an endnode, we delete the

       // children array and attach the score.

       return flareNode;

}

function getScoreDistribution(node) {

       var scoreList = [];

       var scoreDistribution = node.getElementsByTagName(“ScoreDistribution”);

       for (var i = 0; i < scoreDistribution.length; i++) {

              if (scoreDistribution[i].parentNode === node) {

                     scoreList.push({

                           value: scoreDistribution[i].getAttribute(“value”),

                           recordCount: scoreDistribution[i].getAttribute(“recordCount”),

                           confidence: scoreDistribution[i].getAttribute(“confidence”)

                     });

              }

       }

       return scoreList;

}

//if the predicate is compound, we have to figure out the simple predicates in

//the compound

function getPredicates(node) {

       var predicateList = {

              predicates: []

       };

       var compound = node.getElementsByTagName(“CompoundPredicate”)[0];

       if (!compound || compound.parentNode !== node) {

              if (node.getElementsByTagName(“SimplePredicate”).length === 0) {

                     return;

              }

              predicateList.predicates.push(predicate2Json(node

                     .getElementsByTagName(“SimplePredicate”)[0]));

       } else {

              for (var j = 0; j < compound.getElementsByTagName(“SimplePredicate”).length; j++) {

                     predicateList.predicates.push(predicate2Json(compound

                           .getElementsByTagName(“SimplePredicate”)[j]));

                     predicateList.operator = compound.getAttribute(“booleanOperator”);

              }

       }

       return predicateList;

}

function predicate2Json(simplePredicate) {

       var predicate = {};

       predicate.field = simplePredicate.getAttribute(“field”);

       predicate.operator = simplePredicate.getAttribute(“operator”);

       predicate.value = simplePredicate.getAttribute(“value”);

       return predicate;

}

 

Now we have a simple parser to pull out the XML data into a simple flare.json object.

What is left is to use d3 to create a simple tree in html.



Create a visualization using d3.js



What is d3?

D3.js is a javascript library, which makes it easy to correlate data and XML. This can be used to manipulate SVG images dynamically according to a dataset. The basic idea is to select (or create) XML nodes and then bind data to them, which is simply done by calling selectAll(<Node_Id>).data(<dataset>).

Afterwards, each of the XML nodes can be manipulated according to the data.

What is d3.layout?

D3.layout is a part of d3, which makes it easy to calculate common graphic structures.

In our example, we want to create a tree, thus we would have to calculate the position of each node according to the depth of the tree and the maximum number of nodes on one level. D3.layouts does all that work and returns x and y values for each node, as well as the depth and parent and child nodes.

In order to create a visualization we create a new file in our repository named “createSvg.js”.

We will use a <div> pane in our html which has the id “viz”. To keep things simple, we first create the most basic tree:

function createSvg(treeData) {

       // Create a svg canvas and shift it away from the left border, such that the

       // first circle is still visible

       var vis = d3.select(“#viz”).append(“svg:svg”).attr(“width”, width).attr(

                     “height”, height).append(“svg:g”).attr(“transform”,

                     “translate(10, 0)”);

       // Create a tree layout using d3.layout

       var tree = d3.layout.tree().size([ width – 300, height – 200 ]);

       var diagonal = d3.svg.diagonal()

       // in order to have a left to right tree, we have to change x and y axis

       .projection(function(d) {

              return [ d.y, d.x ];

       });

       // call tree.nodes in order to compute the x and y position of each subnode

       // in the tree

       var nodes = tree.nodes(treeData);

       // call tree.link to create an array of the links between the nodes

       var links = tree.links(nodes);

       // draw a link for each of the links

       var link = vis.selectAll(“link”).data(links).enter().append(“svg:path”)

                     .attr(“class”, “link”).attr(“d”, diagonal)

       // return the computed position of each node, relative to the drawing pane

       var node = vis.selectAll(“node”).data(nodes).enter().append(“svg:g”)

                     .attr(“transform”, function(d) {

                           return “translate(“ + d.y + “,” + d.x + “)”;

                     })

       // draw a circle for every node

       node.append(“svg:circle”).attr(“r”, 3.5);

       // attach a simple text to each node, which states the splitting condition

       // of the node, and the score for an endnode

       node

                     .append(“svg:text”)

                     .attr(“dx”, 8)

                     .attr(“dy”, 3)

                     .attr(“text-anchor”, function(d) {

                           return “start”;

                     })

                     .text(

                                  function(d) {

                                         var name = “”;

                                         if (d.predicateList) {

                                                for (var j = 0;

j < d.predicateList.predicates.length; j++) {

                                                       name +=

d.predicateList.predicates[j].field

                                                                     + ” “                                                               + d.predicateList.predicates[j].operator

                                                                     + ” “                                                               + d.predicateList.predicates[j].value;

                                                       if (d.predicateList.operator) {

                                                              name += ” “ +

d.predicateList.operator

                                                                     + ” “;

                                                       }

                                                }

                                         }

                                         if (!d.children) {

                                                name += ” => “ + d.score;

                                         }

                                    

                                         return name;

                                  })

}

Now let’s put things together: create a new file in the repository called “simpleTree.html” and put the following into it:

<!DOCTYPE HTML>

<html>

<head>

<meta http-equiv=“X-UA-Compatible” content=“IE=edge”>

<style type=“text/css”>

  1. path.link {

       fill: none;

       stroke: #F0AB00;

       stroke-width: 1.5px;

}

text {

       font-size: 11px;

}

</style>

<script src=“./.lib/d3.js” charset=“utf-8”></script>

<script src=“./.lib/d3.layout.js” charset=“utf-8”></script>

<script src=“pmmlTree2Flare.js”></script>

<script src=“createSvg.js”></script>    

</head>

<body>

       <div id=“viz”></div>

       <script type=“text/javascript”>

              createSvg(getFlare());           

       </script>

</body>

</html>

Point your browser to “http://<YOUR SERVER>/vis_tree/simpleTree.html”

You should see the following:

simpleTree.png

 

As you can see our basic tree looks rather ugly, and is inconvenient for bigger tree structures. Nevertheless,  parsing the tree structure in a flare.json object allows us to reuse any given example of a tree which is written in d3. Though remember that we have not set the “name” string in our flare.json.

In the next step, we use an existing code for a collapsible tree and add a mouseover text which contains all of the information we have.

Add functionality to the visualization

Our goal is to add two features:

  1. Make the tree collapsible, so that bigger tree structures are convenient to view
  2. Make all the information visible in the tree structure

In the end our tree structure will look as follows, blue nodes indicate the existence of children, a click on a blue node will expand the top level children. The blue box is visible when hovering over a node, it shows every bit of information associated with that node.

The tags on each node show the most possible outcome value on the branch of the tree.

complexTree.png

For a collapsible tree, a good tutorial can be found at

http://www.d3noob.org/2014/01/tree-diagrams-in-d3js_11.html

In order to create a mouseover textbox, we create a <div> object containing all the information and append it in each node. The following is the complete source, which can be tested by copying and pasting it in createSvg.js.

 

Collapsible Tree with textboxes:

function complexTree(flare) {

       var m = [20, 120, 20, 120],

              w = 1280 – m[1] – m[3],

              h = 800 – m[0] – m[2],

              i = 0,

              root;

       var tree = d3.layout.tree().size([h, w]);

       var diagonal = d3.svg.diagonal().projection(function(d) {

              return [d.y, d.x];

       });

       var vis = d3.select(“#tree”).append(“svg:svg”).attr(“width”,

                     w + m[1] + m[3]).attr(“height”, h + m[0] + m[2]).append(“svg:g”)

              .attr(“transform”, “translate(“ + m[3] + “,” + m[0] + “)”);

       // This function produces a div text box, which we will append to each node

       function tooltip(d) {

              var textdiv = d3.select(“body”).append(“div”).style(“position”,

                           “absolute”).style(“z-index”, “10”).style(“opacity”, 0).style(

                           “background-color”, “rgb(176, 196, 222)”).attr(“id”, “toolbox”)

                     .text(“Score: “ + d.score);

              textdiv.append(“br”);

              textdiv.append(‘tspan’).text(“Record Count : “ + d.recordCount).append(

                     “br”);

              if (d.predicateList.operator) {

                     textdiv.append(‘tspan’).text(

                           “Predicate Logic : “ + d.predicateList.operator).append(

                           “br”);

              }

              for (var i = 0; i < d.predicateList.predicates.length; i++) {

                     textdiv.append(‘tspan’).text(function() {

                                  var text = d.predicateList.predicates[i].field + ” “ + d.predicateList.predicates[i].operator + ” “;

                                  if (d.predicateList.predicates[i].value) {

                                         text += d.predicateList.predicates[i].value;

                                  }

                                  return text;

                           }

                     ).append(“br”);

              }

              for (var i = 0; i < d.scoreDistribution.length; i++) {

                     textdiv.append(‘tspan’).text(

                           “Score for “ + d.scoreDistribution[i].value + “: records “ + d.scoreDistribution[i].recordCount + “, confidence: “ + d.scoreDistribution[

                                  i].confidence).append(“br”);

              }

              return textdiv;

       }

       root = flare;

       root.x0 = h / 2;

       root.y0 = 0;

       function toggleAll(d) {

              if (d.children) {

                     d.children.forEach(toggleAll);

                     toggle(d);

              }

       }

       // Initialize the display to show a few nodes.

       root.children.forEach(toggleAll);

       update(root);

       function update(source) {

              var duration = d3.event && d3.event.altKey ? 5000 : 500;

              // Compute the new tree layout.

              var nodes = tree.nodes(root).reverse();

              // Normalize for fixed-depth

              // —————————————————————–

              nodes.forEach(function(d) {

                     d.y = d.depth * 180;

              });

              // Update the nodes…

              var node = vis.selectAll(“g.node”).data(nodes, function(d) {

                     return d.id || (d.id = ++i);

              });

              // Enter any new nodes at the parent’s previous position.

              var nodeEnter = node.enter().append(“svg:g”).attr(“class”, “node”)

                     .attr(“transform”, function(d) {

                           return “translate(“ + source.y0 + “,” + source.x0 + “)”;

                     }).on(“click”, function(d) {

                           toggle(d);

                           update(d);

                     }).on(“mouseover”, function(d) {

                           if (!d.tooltip) {

                                  d.tooltip = tooltip(d);

                           }

                           d.tooltip.style(“visibility”, “visible”);

                           return d.tooltip.transition().style(“opacity”, 0.9);

                     }).on(

                           “mousemove”,

                           function(d) {

                                  return d.tooltip.style(“top”, (event.pageY – 10) + “px”).style(“left”, (event.pageX + 10) + “px”);

                           }).on(“mouseout”, function(d) {

                           d.tooltip.transition().style(“opacity”, 0).duration(1000);

                           return d.tooltip.style(“visibility”, “hidden”);

                     });

              nodeEnter.append(“svg:rect”).attr(“height”, 0).attr(“width”, 0).attr(

                     “transform”, function(d) {

                           var length;

                           d.score ? length = d.score.length * 5 + 20 : length = 30;

                           return “translate(-“ + length / 2 + “,-20)”;

                     }).style(“fill”, function(d) {

                     return d._children ? “lightsteelblue” : “#fff”;

              });

              nodeEnter.append(“svg:text”).attr(“text-anchor”, “middle”).text(

                     function(d) {

                           return d.score;

                     }).style(“fill-opacity”, 0);

              // Transition nodes to their new position.

              var nodeUpdate = node.transition().duration(duration).attr(“transform”,

                     function(d) {

                           return “translate(“ + d.y + “,” + d.x + “)”;

                     });

              nodeUpdate.select(“rect”).attr(“height”, 30)

                     .transition().duration(duration / 4).attr(“width”, function(d) {

                           return d.score ? d.score.length * 5 + 20 : 30;

                     }).style(“fill”, function(d) {

                           return d._children ? “lightsteelblue” : “#fff”;

                     });

              nodeUpdate.select(“text”).transition().duration(duration / 2).style(“fill-opacity”, 1);

              // Transition exiting nodes to the parent’s new position.

              var nodeExit = node.exit();

              nodeExit.select(“rect”)

                     .transition().duration(duration / 2).attr(

                           “width”, 0)

                     .transition().duration(duration / 2).attr(

                           “height”, 0);

              nodeExit.transition().duration(duration).attr(“transform”,

                     function(d) {

                           return “translate(“ + source.y + “,” + source.x + “)”;

                     }).remove();

              nodeExit.select(“text”).style(“fill-opacity”, 0);

              // Update the links…

              var link = vis.selectAll(“path.link”).data(tree.links(nodes),

                     function(d) {

                           return d.target.id;

                     });

              // Enter any new links at the parent’s previous position.

              link.enter().insert(“svg:path”, “g”).attr(“class”, “link”).attr(“d”,

                     function(d) {

                           var o = {

                                  x: source.x0,

                                  y: source.y0

                           };

                           return diagonal({

                                  source: o,

                                  target: o

                           });

                     }).transition().duration(duration).attr(“d”, diagonal);

              // Transition links to their new position.

              link.transition().duration(duration).attr(“d”, diagonal);

              // Transition exiting nodes to the parent’s new position.

              link.exit().transition().duration(duration).attr(“d”, function(d) {

                     var o = {

                           x: source.x,

                           y: source.y

                     };

                     return diagonal({

                           source: o,

                           target: o

                     });

              }).remove();

              // Stash the old positions for transition.

              nodes.forEach(function(d) {

                     d.x0 = d.x;

                     d.y0 = d.y;

              });

       }

       // If the children are currently visible, we move them to a

       function toggle(d) {

              if (d.children) {

                     d._children = d.children;

                     d.children = null;

              } else {

                     d.children = d._children;

                     d._children = null;

              }

       }

}

To report this post you need to login first.

2 Comments

You must be Logged on to comment or reply to a post.

  1. Isuru Warnakulasooriya

    Thank you very much for this excellent article! It was very helpful.

    One thing I would like to point out though is that I had to change the following line from

         d3.select(“#tree”)

    to

         d3.select(“#viz”)

    in order to get the complex tree working.

    (0) 
  2. Nupur Gawali

    Hi all,

    I tried to implement the scenario on retail data. I have PMML output which needs to be plotted in tree diagram. But my final output is displaying blank screen. Could you suggest as error received at debugging is : Not Found (404)

     

    Thanks

     

    (0) 

Leave a Reply