%spark


val Container = "journeyC"
val Directory = "weather"

sqlContext.setConf("spark.sql.shuffle.partitions", "4")

var wdf: org.apache.spark.sql.DataFrame = null

if( "swift://bdcsce.default".contains("swift")  ){
         println("Running on OCI-C");
   //We will use the bdfs (alluxio) cached file system to access our object store data...
   wdf = sqlContext.read.format("com.databricks.spark.csv").option("header", "true").option("inferSchema","true").load("swift://bdcsce.default/"+Directory+"/raw/201612-weather.csv")
} else {
         println("Running on OCI");
   wdf = sqlContext.read.format("com.databricks.spark.csv").option("header", "true").load("swift://bdcsce.default/"+Directory+"/raw/201612-weather.csv")
}

// If you get this error message:
// java.lang.IllegalStateException: Cannot call methods on a stopped SparkContext.
// Then go to the Settings tab, then click on Notebook.  Then restart the Notebook.  This will restart your SparkContext


println("Here is the schema detected from the CSV")
wdf.printSchema()
println("..")

println("# of rows: %s".format(
  wdf.count() 
)) 
println("..")

wdf.createOrReplaceTempView("weather_temp")
println("done")