blob: 30f815fa150952d86e78594e0064e241d23a9912 [file] [log] [blame]
(window.webpackJsonp=window.webpackJsonp||[]).push([[63],{"HYU+":function(e,t,n){"use strict";n.r(t),n.d(t,"_frontmatter",(function(){return i})),n.d(t,"default",(function(){return p}));var a=n("k1TG"),r=n("8o2o"),s=(n("q1tI"),n("7ljp")),l=n("hhGP"),i=(n("qKvR"),{});void 0!==i&&i&&i===Object(i)&&Object.isExtensible(i)&&!i.hasOwnProperty("__filemeta")&&Object.defineProperty(i,"__filemeta",{configurable:!0,value:{name:"_frontmatter",filename:"src/pages/docs/installation/kubernetes.mdx"}});var o={_frontmatter:i},c=l.a;function p(e){var t=e.components,n=Object(r.a)(e,["components"]);return Object(s.b)(c,Object(a.a)({},o,n,{components:t,mdxType:"MDXLayout"}),Object(s.b)("h2",{id:"running-on-kubernetes"},"Running on Kubernetes"),Object(s.b)("p",null,"Running on Kubernetes is supported with the provided ",Object(s.b)("a",Object(a.a)({parentName:"p"},{href:"https://helm.sh/"}),"Helm")," chart found in the official ",Object(s.b)("a",Object(a.a)({parentName:"p"},{href:"https://apache.github.io/superset/index.yaml"}),"Superset helm repository"),"."),Object(s.b)("h3",{id:"prerequisites"},"Prerequisites"),Object(s.b)("ul",null,Object(s.b)("li",{parentName:"ul"},"A Kubernetes cluster"),Object(s.b)("li",{parentName:"ul"},"Helm installed")),Object(s.b)("h3",{id:"running"},"Running"),Object(s.b)("ol",null,Object(s.b)("li",{parentName:"ol"},"Add the Superset helm repository")),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-sh"}),'helm repo add superset https://apache.github.io/superset\n"superset" has been added to your repositories\n')),Object(s.b)("ol",null,Object(s.b)("li",{parentName:"ol"},"View charts in repo")),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-sh"}),"helm search repo superset\nNAME CHART VERSION APP VERSION DESCRIPTION\nsuperset/superset 0.1.1 1.0 Apache Superset is a modern, enterprise-ready b...\n")),Object(s.b)("ol",null,Object(s.b)("li",{parentName:"ol"},"Configure your setting overrides")),Object(s.b)("p",null,"Just like any typical Helm chart, you'll need to craft a ",Object(s.b)("inlineCode",{parentName:"p"},"values.yaml")," file that would define/override any of the values exposed into the default ",Object(s.b)("a",Object(a.a)({parentName:"p"},{href:"https://github.com/apache/superset/tree/master/helm/superset/values.yaml"}),"values.yaml"),", or from any of the dependent charts it depends on:"),Object(s.b)("ul",null,Object(s.b)("li",{parentName:"ul"},Object(s.b)("a",Object(a.a)({parentName:"li"},{href:"https://artifacthub.io/packages/helm/bitnami/redis"}),"bitnami/redis")),Object(s.b)("li",{parentName:"ul"},Object(s.b)("a",Object(a.a)({parentName:"li"},{href:"https://artifacthub.io/packages/helm/bitnami/postgresql"}),"bitnami/postgresql"))),Object(s.b)("p",null,"More info down below on some important overrides you might need."),Object(s.b)("ol",null,Object(s.b)("li",{parentName:"ol"},"Install and run")),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-sh"}),"helm upgrade --install --values my-values.yaml superset superset/superset\n")),Object(s.b)("p",null,"You should see various pods popping up, such as:"),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-sh"}),"kubectl get pods\nNAME READY STATUS RESTARTS AGE\nsuperset-celerybeat-7cdcc9575f-k6xmc 1/1 Running 0 119s\nsuperset-f5c9c667-dw9lp 1/1 Running 0 4m7s\nsuperset-f5c9c667-fk8bk 1/1 Running 0 4m11s\nsuperset-init-db-zlm9z 0/1 Completed 0 111s\nsuperset-postgresql-0 1/1 Running 0 6d20h\nsuperset-redis-master-0 1/1 Running 0 6d20h\nsuperset-worker-75b48bbcc-jmmjr 1/1 Running 0 4m8s\nsuperset-worker-75b48bbcc-qrq49 1/1 Running 0 4m12s\n")),Object(s.b)("p",null,"The exact list will depend on some of your specific configuration overrides but you should generally expect:"),Object(s.b)("ul",null,Object(s.b)("li",{parentName:"ul"},"N ",Object(s.b)("inlineCode",{parentName:"li"},"superset-xxxx-yyyy")," and ",Object(s.b)("inlineCode",{parentName:"li"},"superset-worker-xxxx-yyyy")," pods (depending on your ",Object(s.b)("inlineCode",{parentName:"li"},"replicaCount")," value)"),Object(s.b)("li",{parentName:"ul"},"1 ",Object(s.b)("inlineCode",{parentName:"li"},"superset-postgresql-0")," depending on your postgres settings"),Object(s.b)("li",{parentName:"ul"},"1 ",Object(s.b)("inlineCode",{parentName:"li"},"superset-redis-master-0")," depending on your redis settings"),Object(s.b)("li",{parentName:"ul"},"1 ",Object(s.b)("inlineCode",{parentName:"li"},"superset-celerybeat-xxxx-yyyy")," pod if you have ",Object(s.b)("inlineCode",{parentName:"li"},"supersetCeleryBeat.enabled = true")," in your values overrides")),Object(s.b)("ol",null,Object(s.b)("li",{parentName:"ol"},"Access it")),Object(s.b)("p",null,"The chart will publish appropriate services to expose the Superset UI internally within your k8s cluster. To access it externally you will have to either:"),Object(s.b)("ul",null,Object(s.b)("li",{parentName:"ul"},"Configure the Service as a ",Object(s.b)("inlineCode",{parentName:"li"},"LoadBalancer")," or ",Object(s.b)("inlineCode",{parentName:"li"},"NodePort")),Object(s.b)("li",{parentName:"ul"},"Set up an ",Object(s.b)("inlineCode",{parentName:"li"},"Ingress")," for it - the chart includes a definition, but will need to be tuned to your needs (hostname, tls, annotations etc...)"),Object(s.b)("li",{parentName:"ul"},"Run ",Object(s.b)("inlineCode",{parentName:"li"},"kubectl port-forward superset-xxxx-yyyy :8088")," to directly tunnel one pod's port into your localhost")),Object(s.b)("p",null,"Depending how you configured external access, the URL will vary. Once you've identified the appropriate URL you can log in with:"),Object(s.b)("ul",null,Object(s.b)("li",{parentName:"ul"},"user: ",Object(s.b)("inlineCode",{parentName:"li"},"admin")),Object(s.b)("li",{parentName:"ul"},"password: ",Object(s.b)("inlineCode",{parentName:"li"},"admin"))),Object(s.b)("h3",{id:"important-settings"},"Important settings"),Object(s.b)("h4",{id:"security-settings"},"Security settings"),Object(s.b)("p",null,"Default security settings and passwords are included but you ",Object(s.b)("strong",{parentName:"p"},"SHOULD")," override those with your own, in particular:"),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-yaml"}),"postgresql:\n postgresqlPassword: superset\n")),Object(s.b)("h4",{id:"dependencies"},"Dependencies"),Object(s.b)("p",null,"You can specify pip packages to be installed before startup, e.g. to install extra database drivers:"),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-yaml"}),"additionalRequirements:\n - psycopg2\n - redis\n - elasticsearch-dbapi\n - pymssql\n - gsheetsdb\n # Force verstion to work around https://github.com/betodealmeida/gsheets-db-api/issues/15\n - moz-sql-parser==4.9.21002\n # For OAuth\n - Authlib\n # For webdriver / reports\n - gevent\n")),Object(s.b)("p",null,Object(s.b)("strong",{parentName:"p"},"WARNING"),": The list will replace the default one from the default ",Object(s.b)("inlineCode",{parentName:"p"},"values.yaml")," entirely, not ",Object(s.b)("em",{parentName:"p"},"add")," to it..."),Object(s.b)("h4",{id:"superset_configpy"},"superset_config.py"),Object(s.b)("p",null,"The default ",Object(s.b)("inlineCode",{parentName:"p"},"superset_config.py")," is fairly minimal and you will very likely need to extend it. This is done by specifying one or more key/value entries in ",Object(s.b)("inlineCode",{parentName:"p"},"configOverrides"),", e.g.:"),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-yaml"}),'configOverrides:\n my_override: |\n # This will make sure the redirect_uri is properly computed, even with SSL offloading\n ENABLE_PROXY_FIX = True\n FEATURE_FLAGS = {\n "DYNAMIC_PLUGINS": True\n }\n')),Object(s.b)("p",null,"Those will be evaluated as Helm templates and therefore will be able to reference other ",Object(s.b)("inlineCode",{parentName:"p"},"values.yaml")," variables e.g. ",Object(s.b)("inlineCode",{parentName:"p"},"{{ .Values.ingress.hosts[0] }}")," will resolve to your ingress external domain."),Object(s.b)("p",null,"The entire ",Object(s.b)("inlineCode",{parentName:"p"},"superset_config.py")," will be installed as a secret, so it is safe to pass sensitive parameters directly... however it might be more readable to use secret env variables for that."),Object(s.b)("p",null,"Full python files can be provided by running ",Object(s.b)("inlineCode",{parentName:"p"},"helm upgrade --install --values my-values.yaml --set-file configOverrides.oauth=set_oauth.py")),Object(s.b)("h4",{id:"environment-variables"},"Environment Variables"),Object(s.b)("p",null,"Those can be passed as key/values either with ",Object(s.b)("inlineCode",{parentName:"p"},"extraEnv")," or ",Object(s.b)("inlineCode",{parentName:"p"},"extraSecretEnv")," if they're sensitive. They can then be referenced from ",Object(s.b)("inlineCode",{parentName:"p"},"superset_config.py")," using e.g. ",Object(s.b)("inlineCode",{parentName:"p"},'os.environ.get("VAR")'),"."),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-yaml"}),'extraEnv:\n SMTP_HOST: smtp.gmail.com\n SMTP_USER: user@gmail.com\n SMTP_PORT: "587"\n SMTP_MAIL_FROM: user@gmail.com\n\nextraSecretEnv:\n SMTP_PASSWORD: xxxx\n\nconfigOverrides:\n smtp: |\n import ast\n SMTP_HOST = os.getenv("SMTP_HOST","localhost")\n SMTP_STARTTLS = ast.literal_eval(os.getenv("SMTP_STARTTLS", "True"))\n SMTP_SSL = ast.literal_eval(os.getenv("SMTP_SSL", "False"))\n SMTP_USER = os.getenv("SMTP_USER","superset")\n SMTP_PORT = os.getenv("SMTP_PORT",25)\n SMTP_PASSWORD = os.getenv("SMTP_PASSWORD","superset")\n')),Object(s.b)("h4",{id:"system-packages"},"System packages"),Object(s.b)("p",null,"If new system packages are required, they can be installed before application startup by overriding the container's ",Object(s.b)("inlineCode",{parentName:"p"},"command"),", e.g.:"),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-yaml"}),"supersetWorker:\n command:\n - /bin/sh\n - -c\n - |\n apt update\n apt install -y somepackage\n apt autoremove -yqq --purge\n apt clean\n\n # Run celery worker\n . {{ .Values.configMountPath }}/superset_bootstrap.sh; celery --app=superset.tasks.celery_app:app worker\n")),Object(s.b)("h4",{id:"data-sources"},"Data sources"),Object(s.b)("p",null,"Data source definitions can be automatically declared by providing key/value yaml definitions in ",Object(s.b)("inlineCode",{parentName:"p"},"extraConfigs"),":"),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-yaml"}),'extraConfigs:\n datasources-init.yaml: |\n databases:\n - allow_csv_upload: true\n allow_ctas: true\n allow_cvas: true\n database_name: example-db\n extra: "{\\r\\n \\"metadata_params\\": {},\\r\\n \\"engine_params\\": {},\\r\\n \\"\\\n metadata_cache_timeout\\": {},\\r\\n \\"schemas_allowed_for_csv_upload\\": []\\r\\n\\\n }"\n sqlalchemy_uri: example://example-db.local\n tables: []\n')),Object(s.b)("p",null,"Those will also be mounted as secrets and can include sensitive parameters."),Object(s.b)("h3",{id:"configuration-examples"},"Configuration Examples"),Object(s.b)("h4",{id:"setting-up-oauth"},"Setting up OAuth"),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-yaml"}),'extraEnv:\n AUTH_DOMAIN: example.com\n\nextraSecretEnv:\n GOOGLE_KEY: xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.apps.googleusercontent.com\n GOOGLE_SECRET: xxxxxxxxxxxxxxxxxxxxxxxx\n\nconfigOverrides:\n enable_oauth: |\n # This will make sure the redirect_uri is properly computed, even with SSL offloading\n ENABLE_PROXY_FIX = True\n\n from flask_appbuilder.security.manager import (AUTH_OAUTH, AUTH_DB)\n AUTH_TYPE = AUTH_OAUTH\n OAUTH_PROVIDERS = [\n {\n "name": "google",\n "icon": "fa-google",\n "token_key": "access_token",\n "remote_app": {\n "client_id": os.getenv("GOOGLE_KEY"),\n "client_secret": os.getenv("GOOGLE_SECRET"),\n "api_base_url": "https://www.googleapis.com/oauth2/v2/",\n "client_kwargs": {"scope": "email profile"},\n "request_token_url": None,\n "access_token_url": "https://accounts.google.com/o/oauth2/token",\n "authorize_url": "https://accounts.google.com/o/oauth2/auth",\n "authorize_params": {"hd": os.getenv("AUTH_DOMAIN", "")}\n },\n }\n ]\n\n # Map Authlib roles to superset roles\n AUTH_ROLE_ADMIN = \'Admin\'\n AUTH_ROLE_PUBLIC = \'Public\'\n\n # Will allow user self registration, allowing to create Flask users from Authorized User\n AUTH_USER_REGISTRATION = True\n\n # The default user self registration role\n AUTH_USER_REGISTRATION_ROLE = "Admin"\n')),Object(s.b)("h4",{id:"enable-alerts-and-reports"},"Enable Alerts and Reports"),Object(s.b)("p",null,"For this, as per the ",Object(s.b)("a",Object(a.a)({parentName:"p"},{href:"/docs/installation/email-reports"}),"Alerts and Reports doc"),", you will need to:"),Object(s.b)("h5",{id:"install-a-supported-webdriver-in-the-celery-worker"},"Install a supported webdriver in the Celery worker"),Object(s.b)("p",null,"This is done either by using a custom image that has the webdriver pre-installed, or installing at startup time by overriding the ",Object(s.b)("inlineCode",{parentName:"p"},"command"),". Here's a working example for ",Object(s.b)("inlineCode",{parentName:"p"},"chromedriver"),":"),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-yaml"}),"supersetWorker:\n command:\n - /bin/sh\n - -c\n - |\n # Install chrome webdriver\n # See https://github.com/apache/superset/blob/4fa3b6c7185629b87c27fc2c0e5435d458f7b73d/docs/src/pages/docs/installation/email_reports.mdx\n apt update\n wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb\n apt install -y --no-install-recommends ./google-chrome-stable_current_amd64.deb\n wget https://chromedriver.storage.googleapis.com/88.0.4324.96/chromedriver_linux64.zip\n unzip chromedriver_linux64.zip\n chmod +x chromedriver\n mv chromedriver /usr/bin\n apt autoremove -yqq --purge\n apt clean\n rm -f google-chrome-stable_current_amd64.deb chromedriver_linux64.zip\n\n # Run\n . {{ .Values.configMountPath }}/superset_bootstrap.sh; celery --app=superset.tasks.celery_app:app worker\n")),Object(s.b)("h5",{id:"run-the-celery-beat"},"Run the Celery beat"),Object(s.b)("p",null,"This pod will trigger the scheduled tasks configured in the alerts and reports UI section:"),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-yaml"}),"supersetCeleryBeat:\n enabled: true\n")),Object(s.b)("h5",{id:"configure-the-appropriate-celery-jobs-and-smtpslack-settings"},"Configure the appropriate Celery jobs and SMTP/Slack settings"),Object(s.b)("pre",null,Object(s.b)("code",Object(a.a)({parentName:"pre"},{className:"language-yaml"}),"extraEnv:\n SMTP_HOST: smtp.gmail.com\n SMTP_USER: user@gmail.com\n SMTP_PORT: \"587\"\n SMTP_MAIL_FROM: user@gmail.com\n\nextraSecretEnv:\n SLACK_API_TOKEN: xoxb-xxxx-yyyy\n SMTP_PASSWORD: xxxx-yyyy\n\nconfigOverrides:\n feature_flags: |\n import ast\n\n FEATURE_FLAGS = {\n \"ALERT_REPORTS\": True\n }\n\n SMTP_HOST = os.getenv(\"SMTP_HOST\",\"localhost\")\n SMTP_STARTTLS = ast.literal_eval(os.getenv(\"SMTP_STARTTLS\", \"True\"))\n SMTP_SSL = ast.literal_eval(os.getenv(\"SMTP_SSL\", \"False\"))\n SMTP_USER = os.getenv(\"SMTP_USER\",\"superset\")\n SMTP_PORT = os.getenv(\"SMTP_PORT\",25)\n SMTP_PASSWORD = os.getenv(\"SMTP_PASSWORD\",\"superset\")\n SMTP_MAIL_FROM = os.getenv(\"SMTP_MAIL_FROM\",\"superset@superset.com\")\n\n SLACK_API_TOKEN = os.getenv(\"SLACK_API_TOKEN\",None)\n celery_conf: |\n from celery.schedules import crontab\n\n class CeleryConfig(object):\n BROKER_URL = f\"redis://{env('REDIS_HOST')}:{env('REDIS_PORT')}/0\"\n CELERY_IMPORTS = ('superset.sql_lab', )\n CELERY_RESULT_BACKEND = f\"redis://{env('REDIS_HOST')}:{env('REDIS_PORT')}/0\"\n CELERY_ANNOTATIONS = {'tasks.add': {'rate_limit': '10/s'}}\n CELERY_IMPORTS = ('superset.sql_lab', \"superset.tasks\", \"superset.tasks.thumbnails\", )\n CELERY_ANNOTATIONS = {\n 'sql_lab.get_sql_results': {\n 'rate_limit': '100/s',\n },\n 'email_reports.send': {\n 'rate_limit': '1/s',\n 'time_limit': 600,\n 'soft_time_limit': 600,\n 'ignore_result': True,\n },\n }\n CELERYBEAT_SCHEDULE = {\n 'reports.scheduler': {\n 'task': 'reports.scheduler',\n 'schedule': crontab(minute='*', hour='*'),\n },\n 'reports.prune_log': {\n 'task': 'reports.prune_log',\n 'schedule': crontab(minute=0, hour=0),\n },\n 'cache-warmup-hourly': {\n 'task': 'cache-warmup',\n 'schedule': crontab(minute='*/30', hour='*'),\n 'kwargs': {\n 'strategy_name': 'top_n_dashboards',\n 'top_n': 10,\n 'since': '7 days ago',\n },\n }\n }\n\n CELERY_CONFIG = CeleryConfig\n reports: |\n EMAIL_PAGE_RENDER_WAIT = 60\n WEBDRIVER_BASEURL = \"http://{{ template \"superset.fullname\" . }}:{{ .Values.service.port }}/\"\n WEBDRIVER_BASEURL_USER_FRIENDLY = \"https://www.example.com/\"\n WEBDRIVER_TYPE= \"chrome\"\n WEBDRIVER_OPTION_ARGS = [\n \"--force-device-scale-factor=2.0\",\n \"--high-dpi-support=2.0\",\n \"--headless\",\n \"--disable-gpu\",\n \"--disable-dev-shm-usage\",\n # This is required because our process runs as root (in order to install pip packages)\n \"--no-sandbox\",\n \"--disable-setuid-sandbox\",\n \"--disable-extensions\",\n ]\n")))}void 0!==p&&p&&p===Object(p)&&Object.isExtensible(p)&&!p.hasOwnProperty("__filemeta")&&Object.defineProperty(p,"__filemeta",{configurable:!0,value:{name:"MDXContent",filename:"src/pages/docs/installation/kubernetes.mdx"}}),p.isMDXComponent=!0}}]);
//# sourceMappingURL=component---src-pages-docs-installation-kubernetes-mdx-595694fbf1a9ad1c87e6.js.map