%spark val Container = "journeyC" val Directory = "weather" sqlContext.setConf("spark.sql.shuffle.partitions", "4") var wdf: org.apache.spark.sql.DataFrame = null if( "swift://bdcsce.default".contains("swift") ){ println("Running on OCI-C"); //We will use the bdfs (alluxio) cached file system to access our object store data... wdf = sqlContext.read.format("com.databricks.spark.csv").option("header", "true").option("inferSchema","true").load("swift://bdcsce.default/"+Directory+"/raw/201612-weather.csv") } else { println("Running on OCI"); wdf = sqlContext.read.format("com.databricks.spark.csv").option("header", "true").load("swift://bdcsce.default/"+Directory+"/raw/201612-weather.csv") } // If you get this error message: // java.lang.IllegalStateException: Cannot call methods on a stopped SparkContext. // Then go to the Settings tab, then click on Notebook. Then restart the Notebook. This will restart your SparkContext println("Here is the schema detected from the CSV") wdf.printSchema() println("..") println("# of rows: %s".format( wdf.count() )) println("..") wdf.createOrReplaceTempView("weather_temp") println("done")