blob: b8f365809f22613dee43e35d7df548e75998ef58 [file] [log] [blame]
{
"paragraphs": [
{
"title": "Introduction",
"text": "%md\n\nThis is a tutorial note for Flink Streaming application. You can run flink scala api via `%flink` and run flink stream sql via `%flink.ssql`. We provide 2 examples in this tutorial:\n\n* Classical word count example via Flink streaming\n* We simulate a web log stream source, and then query and visualize this streaming data in Zeppelin.\n\n\nFor now, the capability of supporting flink streaming job is very limited in Zeppelin. For example, canceling job is not supported, these capability depends on flink itself, flink community is working on that for downstream project like Zeppelin.\n",
"user": "anonymous",
"dateUpdated": "2019-10-08 15:18:50.038",
"config": {
"tableHide": false,
"editorSetting": {
"language": "markdown",
"editOnDblClick": true,
"completionKey": "TAB",
"completionSupport": false
},
"colWidth": 12.0,
"editorMode": "ace/mode/markdown",
"fontSize": 9.0,
"editorHide": false,
"title": false,
"runOnSelectionChange": true,
"checkEmpty": true,
"results": {},
"enabled": true
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": [
{
"type": "HTML",
"data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eThis is a tutorial note for Flink Streaming application. You can run flink scala api via \u003ccode\u003e%flink\u003c/code\u003e and run flink stream sql via \u003ccode\u003e%flink.ssql\u003c/code\u003e. We provide 2 examples in this tutorial:\u003c/p\u003e\n\u003cul\u003e\n \u003cli\u003eClassical word count example via Flink streaming\u003c/li\u003e\n \u003cli\u003eWe simulate a web log stream source, and then query and visualize this streaming data in Zeppelin.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eFor now, the capability of supporting flink streaming job is very limited in Zeppelin. For example, canceling job is not supported, these capability depends on flink itself, flink community is working on that for downstream project like Zeppelin.\u003c/p\u003e\n\u003c/div\u003e"
}
]
},
"apps": [],
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1569491705460_-1023073277",
"id": "paragraph_1548052720723_1943177100",
"dateCreated": "2019-09-26 17:55:05.460",
"dateStarted": "2019-10-08 15:18:47.711",
"dateFinished": "2019-10-08 15:18:47.724",
"status": "FINISHED"
},
{
"title": "Configure Flink Interpreter",
"text": "%flink.conf\n\nFLINK_HOME \u003cFLINK_INSTALLATION\u003e\n# Use blink planner for flink streaming scenario\nzeppelin.flink.planner blink\n",
"user": "anonymous",
"dateUpdated": "2019-10-11 10:38:09.734",
"config": {
"editorSetting": {
"language": "text",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
},
"colWidth": 12.0,
"editorMode": "ace/mode/text",
"fontSize": 9.0,
"title": false,
"runOnSelectionChange": true,
"checkEmpty": true,
"results": {},
"enabled": true
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": []
},
"apps": [],
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1569491705461_327101264",
"id": "paragraph_1546571299955_275296580",
"dateCreated": "2019-09-26 17:55:05.461",
"dateStarted": "2019-10-08 15:21:48.153",
"dateFinished": "2019-10-08 15:21:48.165",
"status": "FINISHED"
},
{
"title": "Stream WordCount",
"text": "%flink\n\nval data \u003d senv.fromElements(\"hello world\", \"hello flink\", \"hello hadoop\")\ndata.flatMap(line \u003d\u003e line.split(\"\\\\s\"))\n .map(w \u003d\u003e (w, 1))\n .keyBy(0)\n .sum(1)\n .print\n\nsenv.execute()\n",
"user": "anonymous",
"dateUpdated": "2019-10-08 15:19:07.936",
"config": {
"editorSetting": {
"language": "scala",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
},
"colWidth": 12.0,
"editorMode": "ace/mode/scala",
"fontSize": 9.0,
"title": false,
"runOnSelectionChange": true,
"checkEmpty": true,
"results": {},
"enabled": true
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": [
{
"type": "TEXT",
"data": "\u001b[1m\u001b[34mdata\u001b[0m: \u001b[1m\u001b[32morg.apache.flink.streaming.api.scala.DataStream[String]\u001b[0m \u003d org.apache.flink.streaming.api.scala.DataStream@5a099566\n\u001b[1m\u001b[34mres0\u001b[0m: \u001b[1m\u001b[32morg.apache.flink.streaming.api.datastream.DataStreamSink[(String, Int)]\u001b[0m \u003d org.apache.flink.streaming.api.datastream.DataStreamSink@58805cef\n(hello,1)\n(world,1)\n(hello,2)\n(flink,1)\n(hello,3)\n(hadoop,1)\n\u001b[1m\u001b[34mres1\u001b[0m: \u001b[1m\u001b[32morg.apache.flink.api.common.JobExecutionResult\u001b[0m \u003d org.apache.flink.api.common.JobExecutionResult@74b3b79d\n"
}
]
},
"apps": [],
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1569491705461_377981606",
"id": "paragraph_1546571324670_-435705916",
"dateCreated": "2019-09-26 17:55:05.461",
"dateStarted": "2019-10-08 15:19:07.941",
"dateFinished": "2019-10-08 15:19:23.511",
"status": "FINISHED"
},
{
"title": "Register a Stream DataSource to simulate web log",
"text": "%flink\n\nimport org.apache.flink.streaming.api.functions.source.SourceFunction\nimport org.apache.flink.table.api.TableEnvironment\nimport org.apache.flink.streaming.api.TimeCharacteristic\nimport org.apache.flink.streaming.api.checkpoint.ListCheckpointed\nimport java.util.Collections\nimport scala.collection.JavaConversions._\n\nsenv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)\nsenv.enableCheckpointing(1000)\n\nval data \u003d senv.addSource(new SourceFunction[(Long, String)] with ListCheckpointed[java.lang.Long] {\n\n val pages \u003d Seq(\"home\", \"search\", \"search\", \"product\", \"product\", \"product\")\n var count: Long \u003d 0\n // startTime is 2018/1/1\n var startTime: Long \u003d new java.util.Date(2018 - 1900,0,1).getTime\n var sleepInterval \u003d 100\n \n override def run(ctx: SourceFunction.SourceContext[(Long, String)]): Unit \u003d {\n val lock \u003d ctx.getCheckpointLock\n \n while (count \u003c 10000000) {\n lock.synchronized({\n ctx.collect((startTime + count * sleepInterval, pages(count.toInt % pages.size)))\n count +\u003d 1\n Thread.sleep(sleepInterval)\n })\n }\n }\n\n override def cancel(): Unit \u003d {\n\n }\n\n override def snapshotState(checkpointId: Long, timestamp: Long): java.util.List[java.lang.Long] \u003d {\n Collections.singletonList(count)\n }\n\n override def restoreState(state: java.util.List[java.lang.Long]): Unit \u003d {\n state.foreach(s \u003d\u003e count \u003d s)\n }\n\n}).assignAscendingTimestamps(_._1)\n\nstenv.registerDataStream(\"log\", data, \u0027time, \u0027url, \u0027rowtime.rowtime)\n",
"user": "anonymous",
"dateUpdated": "2019-10-08 15:19:31.503",
"config": {
"editorSetting": {
"language": "scala",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
},
"colWidth": 12.0,
"editorMode": "ace/mode/scala",
"fontSize": 9.0,
"title": false,
"runOnSelectionChange": true,
"checkEmpty": true,
"results": {},
"enabled": true
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": [
{
"type": "TEXT",
"data": "import org.apache.flink.streaming.api.functions.source.SourceFunction\nimport org.apache.flink.table.api.TableEnvironment\nimport org.apache.flink.streaming.api.TimeCharacteristic\nimport org.apache.flink.streaming.api.checkpoint.ListCheckpointed\nimport java.util.Collections\nimport scala.collection.JavaConversions._\n\u001b[1m\u001b[34mres4\u001b[0m: \u001b[1m\u001b[32morg.apache.flink.streaming.api.scala.StreamExecutionEnvironment\u001b[0m \u003d org.apache.flink.streaming.api.scala.StreamExecutionEnvironment@7e0e86c0\n\u001b[33mwarning: \u001b[0mthere was one deprecation warning; re-run with -deprecation for details\n\u001b[1m\u001b[34mdata\u001b[0m: \u001b[1m\u001b[32morg.apache.flink.streaming.api.scala.DataStream[(Long, String)]\u001b[0m \u003d org.apache.flink.streaming.api.scala.DataStream@361e2c6e\n"
}
]
},
"apps": [],
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1569491705461_-1986381705",
"id": "paragraph_1546571333074_1869171983",
"dateCreated": "2019-09-26 17:55:05.461",
"dateStarted": "2019-10-08 15:19:31.507",
"dateFinished": "2019-10-08 15:19:34.040",
"status": "FINISHED"
},
{
"title": "Total Page View",
"text": "%flink.ssql(type\u003dsingle, parallelism\u003d1, refreshInterval\u003d3000, template\u003d\u003ch1\u003e{1}\u003c/h1\u003e until \u003ch2\u003e{0}\u003c/h2\u003e, enableSavePoint\u003dfalse, runWithSavePoint\u003dfalse)\n\nselect max(rowtime), count(1) from log\n",
"user": "anonymous",
"dateUpdated": "2019-10-08 15:19:41.998",
"config": {
"savepointPath": "file:/tmp/save_point/savepoint-13e681-f552013d6184",
"editorSetting": {
"language": "sql",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
},
"colWidth": 6.0,
"editorMode": "ace/mode/sql",
"fontSize": 9.0,
"editorHide": false,
"title": false,
"runOnSelectionChange": true,
"checkEmpty": true,
"results": {
"0": {
"graph": {
"mode": "table",
"height": 141.0,
"optionOpen": false
}
}
},
"enabled": true
},
"settings": {
"params": {},
"forms": {}
},
"apps": [],
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1569491705461_616808409",
"id": "paragraph_1546571459644_596843735",
"dateCreated": "2019-09-26 17:55:05.461",
"dateStarted": "2019-10-08 15:19:42.003",
"dateFinished": "2019-09-26 17:57:24.042",
"status": "ABORT"
},
{
"title": "Page View by Page",
"text": "%flink.ssql(type\u003dretract, refreshInterval\u003d2000, parallelism\u003d1, enableSavePoint\u003dfalse, runWithSavePoint\u003dfalse)\n\nselect url, count(1) as pv from log group by url",
"user": "anonymous",
"dateUpdated": "2019-10-08 15:20:02.708",
"config": {
"savepointPath": "file:/flink/save_point/savepoint-e4f781-996290516ef1",
"editorSetting": {
"language": "sql",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
},
"colWidth": 6.0,
"editorMode": "ace/mode/sql",
"fontSize": 9.0,
"editorHide": false,
"title": false,
"runOnSelectionChange": true,
"checkEmpty": true,
"results": {
"0": {
"graph": {
"mode": "multiBarChart",
"height": 198.0,
"optionOpen": false,
"setting": {
"table": {
"tableGridState": {},
"tableColumnTypeState": {
"names": {
"url": "string",
"pv": "string"
},
"updated": false
},
"tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]",
"tableOptionValue": {
"useFilter": false,
"showPagination": false,
"showAggregationFooter": false
},
"updated": false,
"initialized": false
},
"multiBarChart": {
"xLabelStatus": "default",
"rotate": {
"degree": "-45"
}
}
},
"commonSetting": {},
"keys": [],
"groups": [],
"values": []
},
"helium": {}
},
"1": {
"graph": {
"mode": "table",
"height": 86.0,
"optionOpen": false
}
}
},
"enabled": true
},
"settings": {
"params": {},
"forms": {}
},
"apps": [],
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1569491705462_1478852202",
"id": "paragraph_1546571485092_-716357716",
"dateCreated": "2019-09-26 17:55:05.462",
"dateStarted": "2019-09-26 17:56:45.502",
"dateFinished": "2019-09-26 17:57:24.042",
"status": "ABORT"
},
{
"title": "Page View Per Page for each 5 Seconds",
"text": "%flink.ssql(type\u003dts, parallelism\u003d1, refreshInterval\u003d2000, enableSavePoint\u003dfalse, runWithSavePoint\u003dfalse, threshold\u003d60000)\n\nselect TUMBLE_START(rowtime, INTERVAL \u00275\u0027 SECOND) as start_time, url, count(1) as pv from log group by TUMBLE(rowtime, INTERVAL \u00275\u0027 SECOND), url",
"user": "anonymous",
"dateUpdated": "2019-10-11 10:38:03.669",
"config": {
"editorSetting": {
"language": "sql",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
},
"colWidth": 12.0,
"editorMode": "ace/mode/sql",
"fontSize": 9.0,
"title": false,
"runOnSelectionChange": true,
"checkEmpty": true,
"results": {
"0": {
"graph": {
"mode": "lineChart",
"height": 300.0,
"optionOpen": false,
"setting": {
"table": {
"tableGridState": {},
"tableColumnTypeState": {
"names": {
"start_time": "string",
"url": "string",
"pv": "string"
},
"updated": false
},
"tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]",
"tableOptionValue": {
"useFilter": false,
"showPagination": false,
"showAggregationFooter": false
},
"updated": false,
"initialized": false
},
"lineChart": {
"rotate": {
"degree": "-45"
},
"xLabelStatus": "rotate"
}
},
"commonSetting": {},
"keys": [
{
"name": "start_time",
"index": 0.0,
"aggr": "sum"
}
],
"groups": [
{
"name": "url",
"index": 1.0,
"aggr": "sum"
}
],
"values": [
{
"name": "pv",
"index": 2.0,
"aggr": "sum"
}
]
},
"helium": {}
}
},
"enabled": true
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": [
{
"type": "TABLE",
"data": "start_time\turl\tpv\n2018-01-01 00:00:00.0\thome\t9\n2018-01-01 00:00:00.0\tsearch\t17\n2018-01-01 00:00:00.0\tproduct\t24\n2018-01-01 00:00:05.0\tsearch\t17\n2018-01-01 00:00:05.0\thome\t8\n2018-01-01 00:00:05.0\tproduct\t25\n"
}
]
},
"apps": [],
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1569491705462_-861601454",
"id": "paragraph_1546571542468_1571709353",
"dateCreated": "2019-09-26 17:55:05.462",
"status": "READY"
},
{
"text": "%flink.ssql\n",
"user": "anonymous",
"dateUpdated": "2019-09-26 17:55:05.462",
"config": {},
"settings": {
"params": {},
"forms": {}
},
"apps": [],
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1569491705462_-1279435256",
"id": "paragraph_1546571603746_-749250139",
"dateCreated": "2019-09-26 17:55:05.462",
"status": "READY"
}
],
"name": "Flink Stream Tutorial",
"id": "2ER62Y5VJ",
"defaultInterpreterGroup": "spark",
"version": "0.9.0-SNAPSHOT",
"permissions": {},
"noteParams": {},
"noteForms": {},
"angularObjects": {},
"config": {
"isZeppelinNotebookCronEnable": false
},
"info": {},
"path": "/Flink Stream Tutorial"
}