%sh CONTAINER=bdcsce DIRECTORY=weather FILENAME=201612-weather # Set http proxy if required #export http_proxy= export HADOOP_ROOT_LOGGER=WARN test -e $DIRECTORY || mkdir $DIRECTORY cd $DIRECTORY rm $FILENAME.csv echo "Downloading $FILENAME.csv. This may take a minute." wget -O $FILENAME.csv ${Weather_Data_URL=https://raw.githubusercontent.com/millerhoo/journey2-new-data-lake/master/workshops/journey2-new-data-lake/files/1090166.csv} echo "." echo "." echo "." ls -ltr $FILENAME.csv echo "." echo "." echo "." echo "Storing file to Object Storage. This may take a few minutes." # we use the hadoop swift:// driver to interact with the Object Store. # The .default configuration of the swift driver is the object store connection you defined when you created the BDCSCE instance. echo "List the directory. directory should be empty or missing" hadoop fs -ls swift://bdcsce.default/$DIRECTORY echo "Make the raw directory in Object Store" hadoop fs -mkdir -p swift://bdcsce.default/$DIRECTORY/raw echo "Copy First File to Object Store. May take a minute" hadoop fs -put $FILENAME.csv swift://bdcsce.default/$DIRECTORY/raw/$FILENAME.csv echo "Validate by listing the csv file that got copied to Object Store" hadoop fs -ls swift://bdcsce.default/$DIRECTORY/raw echo "." echo "." echo "Quick glance at first few lines of weather file..." head $FILENAME.csv echo "." echo "." echo "done"