Tuesday, February 19, 2019

Spark Streaming - Display the processed Streaming Result as Dataframe

Spark Streaming - Display the processed Streaming Result as Dataframe



import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.{Seconds,StreamingContext}
import org.apache.spark.{SparkConf,SparkContext}

object Streaming1{
def main(args:Array[String]):Unit = {

  // Create a configuration object

  val conf = new SparkConf()
  conf.set("spark.master","local[2]")
  conf.set("spark.app.name","streamingApp1")
  conf.set("spark.streaming.blockInterval","1000ms")
 
  //creation of spark context object
  val sc = new SparkContext(conf)
 
  //creation of sqlContext
  val sqlContext = new SQLContext(sc)
  import sqlContext.implicits._
 
  // Creation of Streaming Context

  val ssc = new StreamingContext(sc,Seconds(2))
 
  //Receiver Port
  val d1 = ssc.socketTextStream("localhost",3456)
 
  // DStream (sequence of RDDs) into Dataframe
  d1.foreachRDD { x =>
   val df = x.toDF
   df.show
  }
  ssc.start()
  ssc.awaitTermination()
 
}
}

// start netcat and open port 3456
hadoop@hadoop:~/Desktop/vow$ nc -lk 3456
i love india
who is that guy

//start spark shell
$ spark-shell --master local[2]

//stop existing SparkContext
scala> sc.stop

//load spark (scala) file
scala> :load streamingDF.sc

// call the main method
scala> Streaming1.main(null)

// output
+---------------+
|          value|
+---------------+
|   i love india|
|who is that guy|
+---------------+

+-----+
|value|
+-----+
+-----+

+-----+
|value|
+-----+
+-----+


No comments:

Post a Comment

Python Challenges Program

Challenges program: program 1: #Input :ABAABBCA #Output: A4B3C1 str1="ABAABBCA" str2="" d={} for x in str1: d[x]=d...