sql-streaming-sqs/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/sqs/SqsSourceExample.scala - bahir - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.bahir.examples.sql.streaming.sqs

 import scala.util.Random

 import org.apache.spark.sql.SparkSession

  /**
   * Example to read files from S3 using SQS Source and write results to Memory Sink
   *
   * Usage: SqsSourceExample <Sample Record Path to infer schema> <SQS Queue URL> <File Format>
   */

 object SqsSourceExample {

   def main(args: Array[String]) {

     val randomName = Random.alphanumeric.take(6).mkString("")
     val pathName = "path_" + randomName
     val queryName = "query_" + randomName
     val checkpointDir = s"/checkpoints/$pathName"
     val schemaPathString = args(0)

     val spark = SparkSession.builder().appName("SqsExample").getOrCreate()

     val schema = spark.read.json(schemaPathString).schema

     val queueUrl = args(1)

     val fileFormat = args(2)

     val inputDf = spark
       .readStream
       .format("s3-sqs")
       .schema(schema)
       .option("sqsUrl", queueUrl)
       .option("fileFormat", fileFormat)
       .option("sqsFetchIntervalSeconds", "2")
       .option("sqsLongPollingWaitTimeSeconds", "5")
       .option("maxFilesPerTrigger", "50")
       .option("ignoreFileDeletion", "true")
       .load()

     val query = inputDf
       .writeStream
       .queryName(queryName)
       .format("memory")
       .option("checkpointLocation", checkpointDir)
       .start()

     query.awaitTermination()
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.bahir.examples.sql.streaming.sqs

	import scala.util.Random

	import org.apache.spark.sql.SparkSession

	/**
	* Example to read files from S3 using SQS Source and write results to Memory Sink
	*
	* Usage: SqsSourceExample <Sample Record Path to infer schema> <SQS Queue URL> <File Format>
	*/

	object SqsSourceExample {

	def main(args: Array[String]) {

	val randomName = Random.alphanumeric.take(6).mkString("")
	val pathName = "path_" + randomName
	val queryName = "query_" + randomName
	val checkpointDir = s"/checkpoints/$pathName"
	val schemaPathString = args(0)

	val spark = SparkSession.builder().appName("SqsExample").getOrCreate()

	val schema = spark.read.json(schemaPathString).schema

	val queueUrl = args(1)

	val fileFormat = args(2)

	val inputDf = spark
	.readStream
	.format("s3-sqs")
	.schema(schema)
	.option("sqsUrl", queueUrl)
	.option("fileFormat", fileFormat)
	.option("sqsFetchIntervalSeconds", "2")
	.option("sqsLongPollingWaitTimeSeconds", "5")
	.option("maxFilesPerTrigger", "50")
	.option("ignoreFileDeletion", "true")
	.load()

	val query = inputDf
	.writeStream
	.queryName(queryName)
	.format("memory")
	.option("checkpointLocation", checkpointDir)
	.start()

	query.awaitTermination()
	}
	}