> sudo yum update
> sudo yum install R
> cd $VORA_SPARK_HOME/R
> sudo bash install-dev.bash
> sparkR --jars ${VORA_SPARK_HOME}/lib/spark-sap-datasources-1.X.YY-assembly.jar
> library(SparkRVora, lib.loc = c(file.path(Sys.getenv("VORA_SPARK_HOME"),"R","lib")))
> sqlCtx <- sparkRVora.init(sc)
16/08/17 19:26:09 INFO SapSQLContext: SapSQLContext [version: 1.3.88] created
> library(SparkR, lib.loc = c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib")))
> library(SparkRVora, lib.loc = c(file.path(Sys.getenv("VORA_SPARK_HOME"),"R","lib")))
> sc <- sparkR.init(master="local[*]", sparkJars=c(file.path(Sys.getenv("VORA_SPARK_HOME"), "lib", "spark-sap-datasources-1.3.88-assembly.jar")))
> sqlCtx <- sparkRVora.init(sc)
> sql(sqlCtx, 'REGISTER ALL TABLES USING com.sap.spark.vora OPTIONS (eagerLoad "false")') #this registers all tables from VORA catalogue in the SparkSQL context
> df <- sql(sqlCtx, 'SELECT * FROM NATION') #display table content (assuming that the NATION table exists in catalogue)
> print(df) #this shows table schema
> head(df, 10) #this shows first 10 rows of data
> sql(sqlCtx, 'CREATE TABLE VoraTable(id integer, val double, mdate date, number decimal(6,2)) USING com.sap.spark.vora OPTIONS (files "/user/vora/test_int_double_date_dec2.csv")') #load table from a CSV-File on HDFS
> sparkDataFrame <- sql(sqlCtx, 'SELECT * FROM VoraTable')
> RDataFrame <- as.data.frame(sparkDataFrame) #convert to R dataframe
> plot(RDataFrame$id, RDataFrame$val) #plot variables
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
21 | |
16 | |
13 | |
13 | |
10 | |
10 | |
7 | |
7 | |
7 | |
7 |