(window.webpackJsonp=window.webpackJsonp||[]).push([[53],{qLsL:function(e,t,a){"use strict";a.r(t),a.d(t,"_frontmatter",(function(){return r})),a.d(t,"default",(function(){return b}));var n=a("k1TG"),o=a("8o2o"),i=(a("q1tI"),a("7ljp")),s=a("hhGP"),r=(a("qKvR"),{});void 0!==r&&r&&r===Object(r)&&Object.isExtensible(r)&&!r.hasOwnProperty("__filemeta")&&Object.defineProperty(r,"__filemeta",{configurable:!0,value:{name:"_frontmatter",filename:"src/pages/docs/Creating Charts and Dashboards/exploring-data.mdx"}});var l={_frontmatter:r},c=s.a;function b(e){var t=e.components,a=Object(o.a)(e,["components"]);return Object(i.b)(c,Object(n.a)({},l,a,{components:t,mdxType:"MDXLayout"}),Object(i.b)("h2",{id:"exploring-data-in-superset"},"Exploring Data in Superset"),Object(i.b)("p",null,"In this tutorial, we will introduce key concepts in Apache Superset through the exploration of a\nreal dataset which contains the flights made by employees of a UK-based organization in 2011. The\nfollowing information about each flight is given:"),Object(i.b)("ul",null,Object(i.b)("li",{parentName:"ul"},"The traveller’s department. For the purposes of this tutorial the departments have been renamed\nOrange, Yellow and Purple."),Object(i.b)("li",{parentName:"ul"},"The cost of the ticket."),Object(i.b)("li",{parentName:"ul"},"The travel class (Economy, Premium Economy, Business and First Class)."),Object(i.b)("li",{parentName:"ul"},"Whether the ticket was a single or return."),Object(i.b)("li",{parentName:"ul"},"The date of travel."),Object(i.b)("li",{parentName:"ul"},"Information about the origin and destination."),Object(i.b)("li",{parentName:"ul"},"The distance between the origin and destination, in kilometers (km).")),Object(i.b)("h3",{id:"enabling-data-upload-functionality"},"Enabling Data Upload Functionality"),Object(i.b)("p",null,"You may need to enable the functionality to upload a CSV or Excel file to your database. The following section\nexplains how to enable this functionality for the examples database."),Object(i.b)("p",null,"In the top menu, select ",Object(i.b)("strong",{parentName:"p"},"Data ‣ Databases"),". Find the ",Object(i.b)("strong",{parentName:"p"},"examples")," database in the list and\nselect the ",Object(i.b)("strong",{parentName:"p"},"Edit")," button."),Object(i.b)("img",{src:"/images/edit-record.png"}),Object(i.b)("p",null,"In the resulting modal window, switch to the ",Object(i.b)("strong",{parentName:"p"},"Extra")," tab and\ntick the checkbox for ",Object(i.b)("strong",{parentName:"p"},"Allow Data Upload"),". End by clicking the ",Object(i.b)("strong",{parentName:"p"},"Save")," button."),Object(i.b)("img",{src:"/images/add-data-upload.png"}),Object(i.b)("h3",{id:"loading-csv-data"},"Loading CSV Data"),Object(i.b)("p",null,"Download the CSV dataset to your computer from\n",Object(i.b)("a",Object(n.a)({parentName:"p"},{href:"https://raw.githubusercontent.com/apache-superset/examples-data/master/tutorial_flights.csv"}),"Github"),".\nIn the Superset menu, select ",Object(i.b)("strong",{parentName:"p"},"Data ‣ Upload a CSV"),"."),Object(i.b)("img",{src:"/images/upload_a_csv.png"}),Object(i.b)("p",null,"Then, enter the ",Object(i.b)("strong",{parentName:"p"},"Table Name")," as ",Object(i.b)("em",{parentName:"p"},"tutorial_flights")," and select the CSV file from your computer."),Object(i.b)("img",{src:"/images/csv_to_database_configuration.png"}),Object(i.b)("p",null,"Next enter the text ",Object(i.b)("em",{parentName:"p"},"Travel Date")," into the ",Object(i.b)("strong",{parentName:"p"},"Parse Dates")," field."),Object(i.b)("img",{src:"/images/parse_dates_column.png"}),Object(i.b)("p",null,"Leaving all the other options in their default settings, select ",Object(i.b)("strong",{parentName:"p"},"Save")," at the bottom of the page."),Object(i.b)("h3",{id:"table-visualization"},"Table Visualization"),Object(i.b)("p",null,"You should now see ",Object(i.b)("em",{parentName:"p"},"tutorial_flights")," as a dataset in the ",Object(i.b)("strong",{parentName:"p"},"Datasets")," tab. Click on the entry to\nlaunch an Explore workflow using this dataset."),Object(i.b)("p",null,"In this section, we'll create a table visualization\nto show the number of flights and cost per travel class."),Object(i.b)("p",null,"By default, Apache Superset only shows the last week of data. In our example, we want to visualize all\nof the data in the dataset. Click the ",Object(i.b)("strong",{parentName:"p"},"Time ‣ Time Range")," section and change\nthe ",Object(i.b)("strong",{parentName:"p"},"Range Type")," to ",Object(i.b)("strong",{parentName:"p"},"No Filter"),"."),Object(i.b)("img",{src:"/images/no_filter_on_time_filter.png"}),Object(i.b)("p",null,"Click ",Object(i.b)("strong",{parentName:"p"},"Apply")," to save."),Object(i.b)("p",null,"Now, we want to specify the rows in our table by using the ",Object(i.b)("strong",{parentName:"p"},"Group by")," option. Since in this\nexample, we want to understand different Travel Classes, we select ",Object(i.b)("strong",{parentName:"p"},"Travel Class")," in this menu."),Object(i.b)("p",null,"Next, we can specify the metrics we would like to see in our table with the ",Object(i.b)("strong",{parentName:"p"},"Metrics")," option."),Object(i.b)("ul",null,Object(i.b)("li",{parentName:"ul"},Object(i.b)("inlineCode",{parentName:"li"},"COUNT(*)"),", which represents the number of rows in the table\n(in this case, quantity of flights in each Travel Class)"),Object(i.b)("li",{parentName:"ul"},Object(i.b)("inlineCode",{parentName:"li"},"SUM(Cost)"),", which represents the total cost spent by each Travel Class")),Object(i.b)("img",{src:"/images/sum_cost_column.png"}),Object(i.b)("p",null,"Finally, select ",Object(i.b)("strong",{parentName:"p"},"Run Query")," to see the results of the table."),Object(i.b)("img",{src:"/images/tutorial_table.png"}),Object(i.b)("p",null,"To save the visualization, click on ",Object(i.b)("strong",{parentName:"p"},"Save")," in the top left of the screen. In the following modal,"),Object(i.b)("ul",null,Object(i.b)("li",{parentName:"ul"},"Select the ",Object(i.b)("strong",{parentName:"li"},"Save as"),"\noption and enter the chart name as Tutorial Table (you will be able to find it again through the\n",Object(i.b)("strong",{parentName:"li"},"Charts")," screen, accessible in the top menu)."),Object(i.b)("li",{parentName:"ul"},"Select ",Object(i.b)("strong",{parentName:"li"},"Add To Dashboard")," and enter\nTutorial Dashboard. Finally, select ",Object(i.b)("strong",{parentName:"li"},"Save & Go To Dashboard"),".")),Object(i.b)("img",{src:"/images/save_tutorial_table.png"}),Object(i.b)("h3",{id:"dashboard-basics"},"Dashboard Basics"),Object(i.b)("p",null,"Next, we are going to explore the dashboard interface. If you’ve followed the previous section, you\nshould already have the dashboard open. Otherwise, you can navigate to the dashboard by selecting\nDashboards on the top menu, then Tutorial dashboard from the list of dashboards."),Object(i.b)("p",null,"On this dashboard you should see the table you created in the previous section. Select ",Object(i.b)("strong",{parentName:"p"},"Edit\ndashboard")," and then hover over the table. By selecting the bottom right hand corner of the table\n(the cursor will change too), you can resize it by dragging and dropping."),Object(i.b)("img",{src:"/images/resize_tutorial_table_on_dashboard.png"}),Object(i.b)("p",null,"Finally, save your changes by selecting Save changes in the top right."),Object(i.b)("h3",{id:"pivot-table"},"Pivot Table"),Object(i.b)("p",null,"In this section, we will extend our analysis using a more complex visualization, Pivot Table. By the\nend of this section, you will have created a table that shows the monthly spend on flights for the\nfirst six months, by department, by travel class."),Object(i.b)("p",null,"Create a new chart by selecting ",Object(i.b)("strong",{parentName:"p"},"+ ‣ Chart")," from the top right corner. Choose\ntutorial_flights again as a datasource, then click on the visualization type to get to the\nvisualization menu. Select the ",Object(i.b)("strong",{parentName:"p"},"Pivot Table")," visualization (you can filter by entering text in the\nsearch box) and then ",Object(i.b)("strong",{parentName:"p"},"Create New Chart"),"."),Object(i.b)("img",{src:"/images/create_pivot.png"}),Object(i.b)("p",null,"In the ",Object(i.b)("strong",{parentName:"p"},"Time")," section, keep the Time Column as Travel Date (this is selected automatically as we\nonly have one time column in our dataset). Then select Time Grain to be month as having daily data\nwould be too granular to see patterns from. Then select the time range to be the first six months of\n2011 by click on Last week in the Time Range section, then in Custom selecting a Start / end of 1st\nJanuary 2011 and 30th June 2011 respectively by either entering directly the dates or using the\ncalendar widget (by selecting the month name and then the year, you can move more quickly to far\naway dates)."),Object(i.b)("img",{src:"/images/select_dates_pivot_table.png"}),Object(i.b)("p",null,"Next, within the ",Object(i.b)("strong",{parentName:"p"},"Query")," section, remove the default COUNT(","*",") and add Cost, keeping the default\nSUM aggregate. Note that Apache Superset will indicate the type of the metric by the symbol on the\nleft hand column of the list (ABC for string, # for number, a clock face for time, etc.)."),Object(i.b)("p",null,"In ",Object(i.b)("strong",{parentName:"p"},"Group by")," select ",Object(i.b)("strong",{parentName:"p"},"Time"),": this will automatically use the Time Column and Time Grain\nselections we defined in the Time section."),Object(i.b)("p",null,"Within ",Object(i.b)("strong",{parentName:"p"},"Columns"),", select first Department and then Travel Class. All set – let’s ",Object(i.b)("strong",{parentName:"p"},"Run Query")," to\nsee some data!"),Object(i.b)("img",{src:"/images/tutorial_pivot_table.png"}),Object(i.b)("p",null,"You should see months in the rows and Department and Travel Class in the columns. Publish this chart\nto your existing Tutorial Dashboard you created earlier."),Object(i.b)("h3",{id:"line-chart"},"Line Chart"),Object(i.b)("p",null,"In this section, we are going to create a line chart to understand the average price of a ticket by\nmonth across the entire dataset."),Object(i.b)("p",null,"In the Time section, as before, keep the Time Column as Travel Date and Time Grain as month but this\ntime for the Time range select No filter as we want to look at entire dataset."),Object(i.b)("p",null,"Within Metrics, remove the default ",Object(i.b)("inlineCode",{parentName:"p"},"COUNT(*)")," metric and instead add ",Object(i.b)("inlineCode",{parentName:"p"},"AVG(Cost)"),", to show the mean value."),Object(i.b)("img",{src:"/images/average_aggregate_for_cost.png"}),Object(i.b)("p",null,"Next, select ",Object(i.b)("strong",{parentName:"p"},"Run Query")," to show the data on the chart."),Object(i.b)("p",null,"How does this look? Well, we can see that the average cost goes up in December. However, perhaps it\ndoesn’t make sense to combine both single and return tickets, but rather show two separate lines for\neach ticket type."),Object(i.b)("p",null,"Let’s do this by selecting Ticket Single or Return in the Group by box, and the selecting ",Object(i.b)("strong",{parentName:"p"},"Run\nQuery")," again. Nice! We can see that on average single tickets are cheaper than returns and that the\nbig spike in December is caused by return tickets."),Object(i.b)("p",null,"Our chart is looking pretty good already, but let’s customize some more by going to the Customize\ntab on the left hand pane. Within this pane, try changing the Color Scheme, removing the range\nfilter by selecting No in the Show Range Filter drop down and adding some labels using X Axis Label\nand Y Axis Label."),Object(i.b)("img",{src:"/images/tutorial_line_chart.png"}),Object(i.b)("p",null,"Once you’re done, publish the chart in your Tutorial Dashboard."),Object(i.b)("h3",{id:"markup"},"Markup"),Object(i.b)("p",null,"In this section, we will add some text to our dashboard. If you’re there already, you can navigate\nto the dashboard by selecting Dashboards on the top menu, then Tutorial dashboard from the list of\ndashboards. Got into edit mode by selecting ",Object(i.b)("strong",{parentName:"p"},"Edit dashboard"),"."),Object(i.b)("p",null,"Within the Insert components pane, drag and drop a Markdown box on the dashboard. Look for the blue\nlines which indicate the anchor where the box will go."),Object(i.b)("img",{src:"/images/blue_bar_insert_component.png"}),Object(i.b)("p",null,"Now, to edit the text, select the box. You can enter text, in markdown format (see\n",Object(i.b)("a",Object(n.a)({parentName:"p"},{href:"https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet"}),"this Markdown Cheatsheet")," for\nmore information about this format). You can toggle between Edit and Preview using the menu on the\ntop of the box."),Object(i.b)("img",{src:"/images/markdown.png"}),Object(i.b)("p",null,"To exit, select any other part of the dashboard. Finally, don’t forget to keep your changes using\n",Object(i.b)("strong",{parentName:"p"},"Save changes"),"."),Object(i.b)("h3",{id:"filter-box"},"Filter Box"),Object(i.b)("p",null,"In this section, you will learn how to add a filter to your dashboard. Specifically, we will create\na filter that allows us to look at those flights that depart from a particular country."),Object(i.b)("p",null,"A filter box visualization can be created as any other visualization by selecting ",Object(i.b)("strong",{parentName:"p"},"+ ‣ Chart"),",\nand then ",Object(i.b)("em",{parentName:"p"},"tutorial_flights")," as the datasource and Filter Box as the visualization type."),Object(i.b)("p",null,"First of all, in the ",Object(i.b)("strong",{parentName:"p"},"Time")," section, remove the filter from the Time range selection by selecting\nNo filter."),Object(i.b)("p",null,"Next, in ",Object(i.b)("strong",{parentName:"p"},"Filters Configurations")," first add a new filter by selecting the plus sign and then edit\nthe newly created filter by selecting the pencil icon."),Object(i.b)("p",null,"For our use case, it makes most sense to present a list of countries in alphabetical order. First,\nenter the column as Origin Country and keep all other options the same and then select ",Object(i.b)("strong",{parentName:"p"},"Run\nQuery"),". This gives us a preview of our filter."),Object(i.b)("p",null,"Next, remove the date filter by unchecking the Date Filter checkbox."),Object(i.b)("img",{src:"/images/filter_on_origin_country.png"}),Object(i.b)("p",null,"Finally, select ",Object(i.b)("strong",{parentName:"p"},"Save"),", name the chart as Tutorial Filter, add the chart to our existing Tutorial\nDashboard and then Save & go to dashboard. Once on the Dashboard, try using the filter to show only\nthose flights that departed from the United Kingdom – you will see the filter is applied to all of\nthe other visualizations on the dashboard."),Object(i.b)("h3",{id:"publishing-your-dashboard"},"Publishing Your Dashboard"),Object(i.b)("p",null,"If you have followed all of the steps outlined in the previous section, you should have a dashboard\nthat looks like the below. If you would like, you can rearrange the elements of the dashboard by\nselecting ",Object(i.b)("strong",{parentName:"p"},"Edit dashboard")," and dragging and dropping."),Object(i.b)("p",null,"If you would like to make your dashboard available to other users, simply select Draft next to the\ntitle of your dashboard on the top left to change your dashboard to be in Published state. You can\nalso favorite this dashboard by selecting the star."),Object(i.b)("img",{src:"/images/publish_dashboard.png"}),Object(i.b)("h3",{id:"annotations"},"Annotations"),Object(i.b)("p",null,"Annotations allow you to add additional context to your chart. In this section, we will add an\nannotation to the Tutorial Line Chart we made in a previous section. Specifically, we will add the\ndates when some flights were cancelled by the UK’s Civil Aviation Authority in response to the\neruption of the Grímsvötn volcano in Iceland (23-25 May 2011)."),Object(i.b)("p",null,"First, add an annotation layer by navigating to Manage ‣ Annotation Layers. Add a new annotation\nlayer by selecting the green plus sign to add a new record. Enter the name Volcanic Eruptions and\nsave. We can use this layer to refer to a number of different annotations."),Object(i.b)("p",null,"Next, add an annotation by navigating to Manage ‣ Annotations and then create a new annotation by\nselecting the green plus sign. Then, select the Volcanic Eruptions layer, add a short description\nGrímsvötn and the eruption dates (23-25 May 2011) before finally saving."),Object(i.b)("img",{src:"/images/edit_annotation.png"}),Object(i.b)("p",null,"Then, navigate to the line chart by going to Charts then selecting Tutorial Line Chart from the\nlist. Next, go to the Annotations and Layers section and select Add Annotation Layer. Within this\ndialogue:"),Object(i.b)("ul",null,Object(i.b)("li",{parentName:"ul"},"Name the layer as Volcanic Eruptions"),Object(i.b)("li",{parentName:"ul"},"Change the Annotation Layer Type to Event"),Object(i.b)("li",{parentName:"ul"},"Set the Annotation Source as Superset annotation"),Object(i.b)("li",{parentName:"ul"},"Specify the Annotation Layer as Volcanic Eruptions")),Object(i.b)("img",{src:"/images/annotation_settings.png"}),Object(i.b)("p",null,"Select ",Object(i.b)("strong",{parentName:"p"},"Apply")," to see your annotation shown on the chart."),Object(i.b)("img",{src:"/images/annotation.png"}),Object(i.b)("p",null,"If you wish, you can change how your annotation looks by changing the settings in the Display\nconfiguration section. Otherwise, select ",Object(i.b)("strong",{parentName:"p"},"OK")," and finally ",Object(i.b)("strong",{parentName:"p"},"Save")," to save your chart. If you keep\nthe default selection to overwrite the chart, your annotation will be saved to the chart and also\nappear automatically in the Tutorial Dashboard."),Object(i.b)("h3",{id:"advanced-analytics"},"Advanced Analytics"),Object(i.b)("p",null,"In this section, we are going to explore the Advanced Analytics feature of Apache Superset that\nallows you to apply additional transformations to your data. The three types of transformation are:"),Object(i.b)("p",null,Object(i.b)("strong",{parentName:"p"},"Setting up the base chart")),Object(i.b)("p",null,"In this section, we’re going to set up a base chart which we can then apply the different ",Object(i.b)("strong",{parentName:"p"},"Advanced\nAnalytics")," features to. Start off by creating a new chart using the same ",Object(i.b)("em",{parentName:"p"},"tutorial_flights"),"\ndatasource and the ",Object(i.b)("strong",{parentName:"p"},"Line Chart")," visualization type. Within the Time section, set the Time Range as\n1st October 2011 and 31st October 2011."),Object(i.b)("p",null,"Next, in the query section, change the Metrics to the sum of Cost. Select ",Object(i.b)("strong",{parentName:"p"},"Run Query")," to show the\nchart. You should see the total cost per day for each month in October 2011."),Object(i.b)("img",{src:"/images/advanced_analytics_base.png"}),Object(i.b)("p",null,"Finally, save the visualization as Tutorial Advanced Analytics Base, adding it to the Tutorial\nDashboard."),Object(i.b)("h3",{id:"rolling-mean"},"Rolling Mean"),Object(i.b)("p",null,"There is quite a lot of variation in the data, which makes it difficult to identify any trend. One\napproach we can take is to show instead a rolling average of the time series. To do this, in the\n",Object(i.b)("strong",{parentName:"p"},"Moving Average")," subsection of ",Object(i.b)("strong",{parentName:"p"},"Advanced Analytics"),", select mean in the ",Object(i.b)("strong",{parentName:"p"},"Rolling")," box and\nenter 7 into both Periods and Min Periods. The period is the length of the rolling period expressed\nas a multiple of the Time Grain. In our example, the Time Grain is day, so the rolling period is 7\ndays, such that on the 7th October 2011 the value shown would correspond to the first seven days of\nOctober 2011. Lastly, by specifying Min Periods as 7, we ensure that our mean is always calculated\non 7 days and we avoid any ramp up period."),Object(i.b)("p",null,"After displaying the chart by selecting ",Object(i.b)("strong",{parentName:"p"},"Run Query")," you will see that the data is less variable\nand that the series starts later as the ramp up period is excluded."),Object(i.b)("img",{src:"/images/rolling_mean.png"}),Object(i.b)("p",null,"Save the chart as Tutorial Rolling Mean and add it to the Tutorial Dashboard."),Object(i.b)("h3",{id:"time-comparison"},"Time Comparison"),Object(i.b)("p",null,"In this section, we will compare values in our time series to the value a week before. Start off by\nopening the Tutorial Advanced Analytics Base chart, by going to ",Object(i.b)("strong",{parentName:"p"},"Charts")," in the top menu and then\nselecting the visualization name in the list (alternatively, find the chart in the Tutorial\nDashboard and select Explore chart from the menu for that visualization)."),Object(i.b)("p",null,"Next, in the Time Comparison subsection of ",Object(i.b)("strong",{parentName:"p"},"Advanced Analytics"),", enter the Time Shift by typing in\n“minus 1 week” (note this box accepts input in natural language). Run Query to see the new chart,\nwhich has an additional series with the same values, shifted a week back in time."),Object(i.b)("img",{src:"/images/time_comparison_two_series.png"}),Object(i.b)("p",null,"Then, change the ",Object(i.b)("strong",{parentName:"p"},"Calculation type")," to Absolute difference and select ",Object(i.b)("strong",{parentName:"p"},"Run Query"),". We can now\nsee only one series again, this time showing the difference between the two series we saw\npreviously."),Object(i.b)("img",{src:"/images/time_comparison_absolute_difference.png"}),Object(i.b)("p",null,"Save the chart as Tutorial Time Comparison and add it to the Tutorial Dashboard."),Object(i.b)("h3",{id:"resampling-the-data"},"Resampling the data"),Object(i.b)("p",null,"In this section, we’ll resample the data so that rather than having daily data we have weekly data.\nAs in the previous section, reopen the Tutorial Advanced Analytics Base chart."),Object(i.b)("p",null,"Next, in the Python Functions subsection of ",Object(i.b)("strong",{parentName:"p"},"Advanced Analytics"),", enter 7D, corresponding to seven\ndays, in the Rule and median as the Method and show the chart by selecting ",Object(i.b)("strong",{parentName:"p"},"Run Query"),"."),Object(i.b)("img",{src:"/images/resample.png"}),Object(i.b)("p",null,"Note that now we have a single data point every 7 days. In our case, the value showed corresponds to\nthe median value within the seven daily data points. For more information on the meaning of the\nvarious options in this section, refer to the\n",Object(i.b)("a",Object(n.a)({parentName:"p"},{href:"https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html"}),"Pandas documentation"),"."),Object(i.b)("p",null,"Lastly, save your chart as Tutorial Resample and add it to the Tutorial Dashboard. Go to the\ntutorial dashboard to see the four charts side by side and compare the different outputs."))}void 0!==b&&b&&b===Object(b)&&Object.isExtensible(b)&&!b.hasOwnProperty("__filemeta")&&Object.defineProperty(b,"__filemeta",{configurable:!0,value:{name:"MDXContent",filename:"src/pages/docs/Creating Charts and Dashboards/exploring-data.mdx"}}),b.isMDXComponent=!0}}]);
//# sourceMappingURL=component---src-pages-docs-creating-charts-and-dashboards-exploring-data-mdx-1165611fc2427af2e75c.js.map