blob: 62c1400bb4e294c71e6c05ff4cacc3032c10acd2 [file] [log] [blame]
{"paragraphs":[{"text":"%INTERPRETER_NAME\nsparkR.session()\n\nfull_path <- function(file_path) {\n working_storage <- \"WORKING_STORAGE\"\n output_directory <- \"zeppelin/r\"\n protocol_name <- 'PROTOCOL_NAME'\n sprintf('%s://%s/%s/%s', protocol_name, working_storage, output_directory, file_path)\n}","dateUpdated":"2018-01-04T09:24:59+0000","config":{"colWidth":12,"editorMode":"ace/mode/text","results":{},"enabled":true,"editorSetting":{"language":"text","editOnDblClick":false}},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1515057823463_-1635169484","id":"20170329-112414_1472595813","dateCreated":"2018-01-04T09:23:43+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:372"},{"text":"%INTERPRETER_NAME\ncarriers <- read.df(full_path(\"carriers\"), \"parquet\")\ncreateOrReplaceTempView(carriers, \"carriers\")\nprintSchema(carriers)\nhead(carriers, 20)","dateUpdated":"2018-01-04T09:24:07+0000","config":{"colWidth":12,"editorMode":"ace/mode/text","results":{},"enabled":true,"editorSetting":{"language":"text","editOnDblClick":false}},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1515057823469_-1639016973","id":"20170329-112449_1638412317","dateCreated":"2018-01-04T09:23:43+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:373"},{"text":"%INTERPRETER_NAME\nairports <- read.df(full_path(\"airports\"), \"parquet\")\ncreateOrReplaceTempView(airports, \"airports\")\nprintSchema(airports)\nhead(airports, 20)","dateUpdated":"2018-01-04T09:24:08+0000","config":{"colWidth":12,"editorMode":"ace/mode/text","results":{},"enabled":true,"editorSetting":{"language":"text","editOnDblClick":false}},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1515057823470_-1637862726","id":"20170329-112510_202152993","dateCreated":"2018-01-04T09:23:43+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:374"},{"text":"%INTERPRETER_NAME\nflights <- read.df(full_path(\"flights\"), \"parquet\")\ncreateOrReplaceTempView(flights, \"flights\")\nprintSchema(flights)\nhead(flights, 10)[c(\"ArrDelay\",\"CarrierDelay\",\"WeatherDelay\",\"Distance\")]","dateUpdated":"2018-01-04T09:24:10+0000","config":{"colWidth":12,"editorMode":"ace/mode/text","results":{},"enabled":true,"editorSetting":{"language":"text","editOnDblClick":false}},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1515057823471_-1638247475","id":"20170329-112523_1571758659","dateCreated":"2018-01-04T09:23:43+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:375"},{"text":"%INTERPRETER_NAME\nhead(summary(limit(flights,10)))[c(\"summary\", \"ArrDelay\",\"CarrierDelay\",\"Distance\")]","dateUpdated":"2018-01-04T09:24:10+0000","config":{"colWidth":12,"editorMode":"ace/mode/text","results":{},"enabled":true,"editorSetting":{"language":"text","editOnDblClick":false}},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1515057823472_-1627859255","id":"20170329-112535_1375397859","dateCreated":"2018-01-04T09:23:43+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:376"},{"text":"%INTERPRETER_NAME\nlibrary(ggplot2)\nlibrary(reshape2)\n\ndelay_sql <- sql(\"\nSELECT SUBSTR(c.description, 0, 15) as Carrier, WorkDayDelay, WeekendDelay \nFROM\n (SELECT CEIL( AVG(f.ArrDelay + f.DepDelay) ) as WorkDayDelay, f.UniqueCarrier\n FROM flights f\n WHERE f.DayOfWeek < 6\n GROUP BY f.UniqueCarrier \n ORDER BY WorkDayDelay desc \n LIMIT 10) t\n JOIN\n (SELECT CEIL( AVG(f.ArrDelay + f.DepDelay) ) as WeekendDelay, f.UniqueCarrier\n FROM flights f\n WHERE f.DayOfWeek > 5\n GROUP BY f.UniqueCarrier) t1\n ON t.UniqueCarrier = t1.UniqueCarrier\n JOIN carriers c \n ON t.UniqueCarrier = c.code \nORDER BY WeekendDelay DESC, WorkDayDelay DESC\n\")\n\ndelay <- collect(delay_sql)\ndelay_melt <- melt(delay[c('Carrier', 'WorkDayDelay', 'WeekendDelay')])\n\ncolor_range_days <- c(\"#2966FF\", \"#61F2FF\")\n\nggplot(data=delay_melt, aes(x=Carrier, y=value, fill=variable)) +\n geom_bar(stat=\"identity\", width=.7, position=\"dodge\") +\n stat_summary(fun.y=mean, geom = \"line\", mapping = aes(group = 1), color=\"red\") +\n stat_summary(fun.y=mean, geom = \"point\", mapping = aes(group = 1), color=\"red\") +\n theme(legend.position=\"right\", axis.text.x=element_text(angle=90)) +\n labs(x=\"Carrier\", y=\"Minutes\", fill=\"Day Type\") +\n coord_fixed(ratio = .2) +\n scale_fill_manual(values=color_range_days) +\n scale_y_continuous(breaks=seq(0, 30, 5))","dateUpdated":"2018-01-04T09:24:12+0000","config":{"colWidth":12,"editorMode":"ace/mode/text","results":{},"enabled":true,"editorSetting":{"language":"text","editOnDblClick":false}},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1515057823472_-1627859255","id":"20170329-112549_2110062261","dateCreated":"2018-01-04T09:23:43+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:377"},{"text":"%INTERPRETER_NAME\ntop_flights_sql <- sql(\"\nSELECT t.cnt as FlightsAmt, carriers.description as Carrier \nFROM (\n SELECT count(*) as cnt, flights.UniqueCarrier as carrier_code \n FROM flights \n GROUP BY flights.UniqueCarrier LIMIT 6) t \nLEFT JOIN carriers \n ON t.carrier_code = carriers.code\n\")\n\ntop_flights <- collect(top_flights_sql)\n\nggplot(transform(transform(top_flights, value=FlightsAmt/sum(FlightsAmt)), labPos=cumsum(FlightsAmt)-FlightsAmt/2), \n aes(x=\"\", y = FlightsAmt, fill = Carrier)) +\n geom_bar(width = 1, stat = \"identity\") +\n coord_polar(\"y\", start=0) +\n scale_fill_brewer(palette=\"Dark2\") +\n theme_bw() +\n theme(axis.text.x=element_blank() ,panel.grid.major=element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank()) +\n geom_text(size=4, aes(y=labPos, label=scales::percent(value))) + \n geom_text(size=3, aes(x=1.8, y=labPos, label=top_flights$Carrier)) + \n theme(legend.position=\"none\")","dateUpdated":"2018-01-04T09:24:14+0000","config":{"colWidth":12,"editorMode":"ace/mode/text","results":{},"enabled":true,"editorSetting":{"language":"text","editOnDblClick":false}},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1515057823473_-1628244004","id":"20170329-112607_812774791","dateCreated":"2018-01-04T09:23:43+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:378"},{"text":"%INTERPRETER_NAME\ndistance_sql = sql(\"\nSELECT SUBSTR(c.description, 0, 15) as Carrier, COUNT(Distance) AS Distance \nFROM flights f \nJOIN carriers c \n ON f.UniqueCarrier = c.code \nGROUP BY c.description \nORDER BY distance DESC \nLIMIT 10\n\")\n\ndistance <- collect(distance_sql)\n\ndistance$Carrier <- factor(distance$Carrier, levels = distance$Carrier[order(-distance$Distance)])\n\ncolor_range <- c(\"#2966FF\", \"#2E73FF\",\"#3380FF\", \"#388CFF\", \"#3D99FF\", \"#42A6FF\", \"#47B2FF\", \"#4CBFFF\", \"#52CCFF\", \n \"#57D9FF\", \"#5CE6FF\", \"#61F2FF\", \"#66FFFF\")\n\nggplot(data=distance, aes(x=Carrier, y=Distance, fill=Carrier)) +\n geom_bar(stat=\"identity\", width=.7, position=\"dodge\") +\n theme(axis.text.x=element_text(angle=90)) +\n scale_fill_manual(values=color_range) +\n theme(legend.position=\"none\")","dateUpdated":"2018-01-04T09:24:16+0000","config":{"colWidth":12,"editorMode":"ace/mode/text","results":{},"enabled":true,"editorSetting":{"language":"text","editOnDblClick":false}},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1515057823474_-1627089757","id":"20170329-112623_1822577399","dateCreated":"2018-01-04T09:23:43+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:379"}],"name":"Flights_Visualization_SparkR","id":"2D3B7XFMR","angularObjects":{"2C6RJRBD2:shared_process":[],"2C6RJRBD1:shared_process":[]},"config":{"looknfeel":"default","personalizedMode":"false"},"info":{}}