blob: 70e637ca440827ec58b93692a0b4ddf859e128af [file] [log] [blame]
"use strict";(self.webpackChunkdocs_v_2=self.webpackChunkdocs_v_2||[]).push([[7706],{43335:function(e,t,a){a.r(t),a.d(t,{assets:function(){return p},contentTitle:function(){return l},default:function(){return b},frontMatter:function(){return s},metadata:function(){return c},toc:function(){return d}});var n=a(83117),r=a(80102),o=(a(67294),a(3905)),i=["components"],s={title:"Databricks",hide_title:!0,sidebar_position:37,version:1},l=void 0,c={unversionedId:"databases/databricks",id:"databases/databricks",title:"Databricks",description:"Databricks",source:"@site/docs/databases/databricks.mdx",sourceDirName:"databases",slug:"/databases/databricks",permalink:"/docs/databases/databricks",editUrl:"https://github.com/apache/superset/tree/master/docs/docs/databases/databricks.mdx",tags:[],version:"current",sidebarPosition:37,frontMatter:{title:"Databricks",hide_title:!0,sidebar_position:37,version:1},sidebar:"tutorialSidebar",previous:{title:"CrateDB",permalink:"/docs/databases/cratedb"},next:{title:"Firebird",permalink:"/docs/databases/firebird"}},p={},d=[{value:"Databricks",id:"databricks",level:2},{value:"Older driver",id:"older-driver",level:2},{value:"Hive",id:"hive",level:3},{value:"ODBC",id:"odbc",level:3}],u={toc:d};function b(e){var t=e.components,a=(0,r.Z)(e,i);return(0,o.kt)("wrapper",(0,n.Z)({},u,a,{components:t,mdxType:"MDXLayout"}),(0,o.kt)("h2",{id:"databricks"},"Databricks"),(0,o.kt)("p",null,"Databricks now offer a native DB API 2.0 driver, ",(0,o.kt)("inlineCode",{parentName:"p"},"databricks-sql-connector"),", that can be used with the ",(0,o.kt)("inlineCode",{parentName:"p"},"sqlalchemy-databricks")," dialect. You can install both with:"),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-bash"},'pip install "superset[databricks]"\n')),(0,o.kt)("p",null,"To use the Hive connector you need the following information from your cluster:"),(0,o.kt)("ul",null,(0,o.kt)("li",{parentName:"ul"},"Server hostname"),(0,o.kt)("li",{parentName:"ul"},"Port"),(0,o.kt)("li",{parentName:"ul"},"HTTP path")),(0,o.kt)("p",null,'These can be found under "Configuration" -> "Advanced Options" -> "JDBC/ODBC".'),(0,o.kt)("p",null,'You also need an access token from "Settings" -> "User Settings" -> "Access Tokens".'),(0,o.kt)("p",null,'Once you have all this information, add a database of type "Databricks Native Connector" and use the following SQLAlchemy URI:'),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre"},"databricks+connector://token:{access_token}@{server_hostname}:{port}/{database_name}\n")),(0,o.kt)("p",null,'You also need to add the following configuration to "Other" -> "Engine Parameters", with your HTTP path:'),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-json"},'{\n "connect_args": {"http_path": "sql/protocolv1/o/****"}\n}\n')),(0,o.kt)("h2",{id:"older-driver"},"Older driver"),(0,o.kt)("p",null,"Originally Superset used ",(0,o.kt)("inlineCode",{parentName:"p"},"databricks-dbapi")," to connect to Databricks. You might want to try it if you're having problems with the official Databricks connector:"),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-bash"},'pip install "databricks-dbapi[sqlalchemy]"\n')),(0,o.kt)("p",null,"There are two ways to connect to Databricks when using ",(0,o.kt)("inlineCode",{parentName:"p"},"databricks-dbapi"),": using a Hive connector or an ODBC connector. Both ways work similarly, but only ODBC can be used to connect to ",(0,o.kt)("a",{parentName:"p",href:"https://docs.databricks.com/sql/admin/sql-endpoints.html"},"SQL endpoints"),"."),(0,o.kt)("h3",{id:"hive"},"Hive"),(0,o.kt)("p",null,'To connect to a Hive cluster add a database of type "Databricks Interactive Cluster" in Superset, and use the following SQLAlchemy URI:'),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre"},"databricks+pyhive://token:{access_token}@{server_hostname}:{port}/{database_name}\n")),(0,o.kt)("p",null,'You also need to add the following configuration to "Other" -> "Engine Parameters", with your HTTP path:'),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-json"},'{"connect_args": {"http_path": "sql/protocolv1/o/****"}}\n')),(0,o.kt)("h3",{id:"odbc"},"ODBC"),(0,o.kt)("p",null,"For ODBC you first need to install the ",(0,o.kt)("a",{parentName:"p",href:"https://databricks.com/spark/odbc-drivers-download"},"ODBC drivers for your platform"),"."),(0,o.kt)("p",null,'For a regular connection use this as the SQLAlchemy URI after selecting either "Databricks Interactive Cluster" or "Databricks SQL Endpoint" for the database, depending on your use case:'),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre"},"databricks+pyodbc://token:{access_token}@{server_hostname}:{port}/{database_name}\n")),(0,o.kt)("p",null,"And for the connection arguments:"),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-json"},'{"connect_args": {"http_path": "sql/protocolv1/o/****", "driver_path": "/path/to/odbc/driver"}}\n')),(0,o.kt)("p",null,"The driver path should be:"),(0,o.kt)("ul",null,(0,o.kt)("li",{parentName:"ul"},(0,o.kt)("inlineCode",{parentName:"li"},"/Library/simba/spark/lib/libsparkodbc_sbu.dylib")," (Mac OS)"),(0,o.kt)("li",{parentName:"ul"},(0,o.kt)("inlineCode",{parentName:"li"},"/opt/simba/spark/lib/64/libsparkodbc_sb64.so")," (Linux)")),(0,o.kt)("p",null,"For a connection to a SQL endpoint you need to use the HTTP path from the endpoint:"),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-json"},'{"connect_args": {"http_path": "/sql/1.0/endpoints/****", "driver_path": "/path/to/odbc/driver"}}\n')))}b.isMDXComponent=!0},3905:function(e,t,a){a.d(t,{Zo:function(){return p},kt:function(){return b}});var n=a(67294);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t<arguments.length;t++){var a=null!=arguments[t]?arguments[t]:{};t%2?o(Object(a),!0).forEach((function(t){r(e,t,a[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(a)):o(Object(a)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(a,t))}))}return e}function s(e,t){if(null==e)return{};var a,n,r=function(e,t){if(null==e)return{};var a,n,r={},o=Object.keys(e);for(n=0;n<o.length;n++)a=o[n],t.indexOf(a)>=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n<o.length;n++)a=o[n],t.indexOf(a)>=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var l=n.createContext({}),c=function(e){var t=n.useContext(l),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},p=function(e){var t=c(e.components);return n.createElement(l.Provider,{value:t},e.children)},d={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,o=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),u=c(a),b=r,k=u["".concat(l,".").concat(b)]||u[b]||d[b]||o;return a?n.createElement(k,i(i({ref:t},p),{},{components:a})):n.createElement(k,i({ref:t},p))}));function b(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=a.length,i=new Array(o);i[0]=u;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s.mdxType="string"==typeof e?e:r,i[1]=s;for(var c=2;c<o;c++)i[c]=a[c];return n.createElement.apply(null,i)}return n.createElement.apply(null,a)}u.displayName="MDXCreateElement"}}]);