@hadoopMan
2019-03-25T14:33:48.000000Z
字数 935
阅读 1232
spark
想学习spark,hadoop,kafka等大数据框架,请加群459898801,满了之后请加2群224209501。后续文章会陆续公开。
cd /opt/modules/idea-IC-141.178.9/
bin/idea.sh
前提是
1,启动hdfs的namenode,datanode。
2,启动spark的master和slaves,本地测试无需启动。
package sparkIDEProgram
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
/**
* Created by hadoop on 1/1/16.
*/
object SimpleApp {
def main(args: Array[String]) {
val logFile = "hdfs://spark.learn.com:8020/user/hadoop/spark/input/wc.input" // Should be some file on your system
val conf = new SparkConf()
.setAppName("Simple Application")
.setMaster("spark://spark.learn.com:7077")//本地测试需要修改为local
val sc = new SparkContext(conf)
val logData = sc.textFile(logFile, 2).cache()
val numAs = logData.filter(line => line.contains("a")).count()
val numBs = logData.filter(line => line.contains("b")).count()
println("Lines with a: %s, Lines with b: %s".format(numAs, numBs))
sc.stop()
}
}
bin/spark-submit /opt/sparkTest/sparkTest.jar