blob: 22b73862147ff37b876a1f1900378cc9edae2ee4 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#+TITLE: Query DSL
#+AUTHOR: Harish Butani
#+EMAIL: hbutani@apache.org
#+LANGUAGE: en
#+INFOJS_OPT: view:showall toc:t ltoc:t mouse:underline path:http://orgmode.org/org-info.js
#+LINK_HOME: http://home.fnal.gov/~neilsen
#+LINK_UP: http://home.fnal.gov/~neilsen/notebook
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="http://orgmode.org/org-manual.css" />
#+LaTeX_CLASS: smarticle
#+LaTeX_HEADER: \pdfmapfile{/home/neilsen/texmf/fonts/map/dvips/libertine/libertine.map}
#+LaTeX_HEADER: \usepackage[ttscale=.875]{libertine}
#+LaTeX_HEADER: \usepackage{sectsty}
#+LaTeX_HEADER: \sectionfont{\normalfont\scshape}
#+LaTeX_HEADER: \subsectionfont{\normalfont\itshape}
#+EXPORT_SELECT_TAGS: export
#+EXPORT_EXCLUDE_TAGS: noexport
#+OPTIONS: H:2 num:nil toc:nil \n:nil @:t ::t |:t ^:{} _:{} *:t TeX:t LaTeX:t
#+STARTUP: showall
#+OPTIONS: html-postamble:nil
** Example Type Definitions
#+begin_src plantuml :file class_diagram.png
scale 1300 width
note left of Trait : traits are classifications/tags attached to Instances
class Trait
Trait <|-- JDbcAccess
Trait <|-- PII
Trait <|-- Dimension
Trait <|-- Metric
Trait <|-- ETL
class Object
Object --* Trait : traits >
Object <|-- DB
Object <|-- Table
Object <|-- Column
class DB {
name : String
owner : String
}
class StorageDescriptor {
inputFormat : String
outputFormat : String
}
class Column {
name : String
dataType : String
}
class Table {
name: String
db: DB
}
Table -> StorageDescriptor : storageDesc >
Table -> DB : db >
Column *-> StorageDescriptor : storageDesc >
class LoadProcess {
name String
}
LoadProcess -* Table : inputTables >
LoadProcess -> Table : outputTable >
class View {
name String
}
View -* Table : inputTables >
#+end_src
#+CAPTION: ETL and Reporting Scenario Types
#+LABEL: fig:sampleTypeDefs
#+results:
[[file:class_diagram.png]]
** Example Instance Graph
#+begin_src dot :file instanceGraph.png :cmdline -Kdot -Tpng
digraph G {
//size ="6 6";
nodesep=.2;
//rankdir=LR;
ranksep=.25;
node [shape=record fontsize=9];
compound=true;
subgraph cluster0 {
style=bold;
label = "Sales Database"; fontsize=18;
salesDB[label="DB(sales)"]
salesFact[label="Table(sales_fact)" style=filled; color="khaki"]
salesStorage[label="Storage(text,text)"]
sales_time_id[label="time_id" shape="circle" style=filled color="peachpuff"]
sales_product_id[label="product_id" shape="circle" style=filled color="peachpuff"]
sales_customer_id[label="customer_id" shape="circle" style=filled color="peachpuff"]
sales_sales[label="sales" shape="circle" style=filled color="peachpuff"]
sales_sales_metric[label="Metric" style=filled; shape="ellipse" color="turquoise"]
salesFact -> salesDB;
salesFact -> salesStorage;
sales_time_id -> salesStorage;
sales_product_id -> salesStorage;
sales_customer_id -> salesStorage;
sales_sales -> salesStorage;
sales_sales -> sales_sales_metric;
productDim[label="Table(product_dim)" style=filled; color="khaki"]
productStorage[label="Storage(text,text)"]
product_product_id[label="product_id" shape="circle" style=filled color="peachpuff"]
product_product_name[label="product_name" shape="circle" style=filled color="peachpuff"]
product_brand_name[label="brand_name" shape="circle" style=filled color="peachpuff"]
product_dimension[label="Dimension" style=filled; shape="ellipse" color="turquoise"]
productDim -> salesDB;
productDim -> productStorage;
product_product_id -> productStorage;
product_product_name -> productStorage;
product_brand_name -> productStorage;
productDim -> product_dimension;
productDim -> salesFact [style=invis];
timeDim[label="Table(time_dim)" style=filled; color="khaki"]
timeStorage[label="Storage(text,text)"]
time_time_id[label="time_id" shape="circle" style=filled color="peachpuff"]
time_dayOfYear[label="day_of_year" shape="circle" style=filled color="peachpuff"]
time_weekDay[label="week_day" shape="circle" style=filled color="peachpuff"]
time_dimension[label="Dimension" style=filled; shape="ellipse" color="turquoise"]
timeDim -> salesDB;
timeDim -> timeStorage;
time_time_id -> timeStorage;
time_dayOfYear -> timeStorage;
time_weekDay -> timeStorage;
timeDim -> time_dimension;
timeDim -> productDim [style=invis];
customerDim[label="Table(customer_dim)" style=filled; color="khaki"]
customerStorage[label="Storage(text,text)"]
customer_customer_id[label="customer_id" shape="circle" style=filled color="peachpuff"]
customer_name[label="name" shape="circle" style=filled color="peachpuff"]
customer_address[label="address" shape="circle" style=filled color="peachpuff"]
customer_dimension[label="Dimension" style=filled; shape="ellipse" color="turquoise"]
address_pii[label="PII" style=filled; shape="ellipse" color="turquoise"]
customerDim -> salesDB;
customerDim -> customerStorage;
customer_customer_id -> customerStorage;
customer_name -> customerStorage;
customer_address -> customerStorage;
customerDim -> customer_dimension;
customer_address -> address_pii;
customerDim -> timeDim [style=invis];
//{rank=min; salesDB};
{rank=min; salesDB};
};
subgraph cluster1 {
style=bold;
label = "Reporting Database"; fontsize=18;
reportingDB[label="DB(reporting)"]
salesFactDaily[label="Table(sales_daily_mv)" style=filled; color="khaki"]
salesDailyStorage[label="Storage(orc,orc)"]
salesD_time_id[label="time_id" shape="circle" style=filled color="peachpuff"]
salesD_product_id[label="product_id" shape="circle" style=filled color="peachpuff"]
salesD_customer_id[label="customer_id" shape="circle" style=filled color="peachpuff"]
salesD_sales[label="sales" shape="circle" style=filled color="peachpuff"]
salesD_sales_metric[label="Metric" style=filled; shape="ellipse" color="turquoise"]
salesFactDaily -> reportingDB;
salesFactDaily -> salesDailyStorage;
salesD_time_id -> salesDailyStorage;
salesD_product_id -> salesDailyStorage;
salesD_customer_id -> salesDailyStorage;
salesD_sales -> salesDailyStorage;
salesD_sales -> salesD_sales_metric;
salesFactDaily -> reportingDB [style=invis];
productDimView[label="View(product_dim_v)" style=filled; color="khaki"]
productDim -> productDimView [style=dotted];
productDimView_dim[label="Dimension" style=filled; shape="ellipse" color="turquoise"]
productDimView_jdbc[label="JdbcAccess" style=filled; shape="ellipse" color="turquoise"]
productDimView -> productDimView_dim;
productDimView -> productDimView_jdbc;
productDimView -> salesFactDaily [style=invis];
customerDimView[label="View(customer_dim_v)" style=filled; color="khaki"]
customerDim -> customerDimView [style=dotted];
customerDimView_dim[label="Dimension" style=filled; shape="ellipse" color="turquoise"]
customerDimView_jdbc[label="JdbcAccess" style=filled; shape="ellipse" color="turquoise"]
customerDimView -> customerDimView_dim;
customerDimView -> customerDimView_jdbc;
customerDimView -> salesFactDaily [style=invis];
salesMonthly[label="Table(sales_monthly_mv)" style=filled; color="khaki"]
salesMonthlyStorage[label="Storage(orc,orc)"]
salesM_time_id[label="time_id" shape="circle" style=filled color="peachpuff"]
salesM_product_id[label="product_id" shape="circle" style=filled color="peachpuff"]
salesM_customer_id[label="customer_id" shape="circle" style=filled color="peachpuff"]
salesM_sales[label="sales" shape="circle" style=filled color="peachpuff"]
salesM_sales_metric[label="Metric" style=filled; shape="ellipse" color="turquoise"]
salesMonthly -> reportingDB;
salesMonthly -> salesMonthlyStorage;
salesM_time_id -> salesMonthlyStorage;
salesM_product_id -> salesMonthlyStorage;
salesM_customer_id -> salesMonthlyStorage;
salesM_sales -> salesMonthlyStorage;
salesM_sales -> salesM_sales_metric;
salesMonthly -> customerDimView [style=invis];
{rank=min; reportingDB};
};
loadSalesDaily[label="LoadProcess(loadSalesDaily)" style=filled; color="seagreen"; shape="octagon"]
loadSalesDaily_etl[label="ETL" style=filled; shape="ellipse" color="turquoise"]
salesFact -> loadSalesDaily [style=dotted];
timeDim -> loadSalesDaily [style=dotted];
loadSalesDaily -> salesFactDaily [style=dotted];
loadSalesDaily -> loadSalesDaily_etl;
loadSalesMonthly[label="LoadProcess(loadSalesMonthly)" style=filled; color="seagreen"; shape="octagon"]
loadSalesMonthly_etl[label="ETL" style=filled; shape="ellipse" color="turquoise"]
salesFactDaily -> loadSalesMonthly [style=dotted];
timeDim -> loadSalesMonthly [style=dotted];
loadSalesMonthly -> salesMonthly [style=dotted];
loadSalesMonthly -> loadSalesMonthly_etl;
}
#+end_src
#+CAPTION: ETL and Reporting Scenario
#+LABEL: fig:sampleInstanceGraph
#+results:
[[file:instanceGraph.png]]