运行Zeppelin自带的Basic Features (Spark)案例。
本案例以银行分析数据bank.csv为例(下载地址https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv),分析客户年龄分布特征。
import org.apache.commons.io.IOUtils
importjava.net.URL
importjava.nio.charset.Charset
// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext)// So you don't need create them manually// load bank data
val bankText = sc.parallelize(
IOUtils.toString(newURL("https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv"),
Charset.forName("utf8")).split("\n"))caseclassBank(age: Integer, job: String, marital: String, education: String, balance: Integer)
val bank = bankText.map(s => s.split(";")).filter(s =>s(0)!="\"age\"").map(
s =>Bank(s(0).toInt,s(1).replaceAll("\"",""),s(2).replaceAll("\"",""),s(3).replaceAll("\"",""),s(5).replaceAll("\"","").toInt
)).toDF()
bank.registerTempTable("bank")