blob: db83522eb2bf4fb74d423f45e55138d4b1add143 [file] [log] [blame]
{"version":3,"sources":["webpack:///./src/pages/docs/installation/async_queries_celery.mdx"],"names":["_frontmatter","MDXLayout","DefaultLayout","MDXContent","components","props","mdxType","isMDXComponent"],"mappings":"wPAMaA,G,UAAe,S,sOAE5B,IAKMC,EAAYC,IACH,SAASC,EAAT,GAGZ,IAFDC,EAEC,EAFDA,WACGC,EACF,8BACD,OAAO,YAACJ,EAAD,eAAeI,EAAf,CAAsBD,WAAYA,EAAYE,QAAQ,cAC3D,iBAAQ,CACN,GAAM,4BADR,4BAGA,iBAAQ,CACN,GAAM,UADR,UAGA,4TAGA,sBAEE,iBAAQ,CACN,WAAc,MADhB,wGAIE,yBAAgB,CACd,WAAc,MADhB,iBAJF,iBAQE,yBAAgB,CACd,WAAc,MADhB,wBARF,iCAcA,iBAAQ,CACN,WAAc,MADhB,kFAKA,iBAAQ,CACN,WAAc,MADhB,mFAKF,+DACE,yBAAgB,CACd,WAAc,KADhB,iBADF,YAKE,yBAAgB,CACd,WAAc,KADhB,sBALF,mFAUA,uBAAK,mBAAU,CACX,UAAa,kBACb,WAAc,OAFb,21BAiCL,2GACA,uBAAK,mBAAU,CACX,WAAc,OADb,oFAIL,4GACA,uBAAK,mBAAU,CACX,WAAc,OADb,sDAIL,yOAGE,gBAAO,CACL,KAAQ,+CACR,WAAc,KAFhB,gDAHF,+HAUE,yBAAgB,CACd,WAAc,KADhB,sBAVF,6BAcA,uBAAK,mBAAU,CACX,UAAa,kBACb,WAAc,OAFb,sVAcL,+CACE,gBAAO,CACL,KAAQ,4CACR,WAAc,KAFhB,eADF,SAOE,gBAAO,CACL,KAAQ,wCACR,WAAc,KAFhB,WAPF,6EAaE,yBAAgB,CACd,WAAc,KADhB,uCAbF,YAiBE,yBAAgB,CACd,WAAc,KADhB,sBAjBF,sHAsBA,qBAAG,qBAAY,CACX,WAAc,KADf,oBAGH,sBAEE,iBAAQ,CACN,WAAc,MAGd,gBAAO,CACL,WAAc,MADhB,qFAGE,iBAAQ,CACN,WAAc,KADhB,qCAHF,uJAaF,iBAAQ,CACN,WAAc,MAGd,gBAAO,CACL,WAAc,MADhB,gBAGE,iBAAQ,CACN,WAAc,KADhB,+CAHF,gMAaF,iBAAQ,CACN,WAAc,MAGd,gBAAO,CACL,WAAc,MADhB,gBAGE,iBAAQ,CACN,WAAc,KADhB,2CAHF,eAOE,qBAAY,CACV,WAAc,KADhB,gCAPF,sEAgBJ,iBAAQ,CACN,GAAM,iBADR,iBAGA,qHACA,uBAAK,mBAAU,CACX,UAAa,kBACb,WAAc,OAFb,yBAKL,kDACA,uBAAK,mBAAU,CACX,WAAc,OADb,8D,oOAQTH,EAAWI,gBAAiB","file":"component---src-pages-docs-installation-async-queries-celery-mdx-42f2f7b46e8dbf32f887.js","sourcesContent":["import * as React from 'react'\n /* @jsx mdx */\nimport { mdx } from '@mdx-js/react';\n/* @jsx mdx */\n\nimport DefaultLayout from \"/Users/evan/GitHub/incubator-superset_preset/docs/node_modules/gatsby-theme-docz/src/base/Layout.js\";\nexport const _frontmatter = {};\n\nconst makeShortcode = name => function MDXDefaultShortcode(props) {\n console.warn(\"Component \" + name + \" was not imported, exported, or provided by MDXProvider as global scope\");\n return <div {...props} />;\n};\n\nconst MDXLayout = DefaultLayout;\nexport default function MDXContent({\n components,\n ...props\n}) {\n return <MDXLayout {...props} components={components} mdxType=\"MDXLayout\">\n <h2 {...{\n \"id\": \"async-queries-via-celery\"\n }}>{`Async Queries via Celery`}</h2>\n <h3 {...{\n \"id\": \"celery\"\n }}>{`Celery`}</h3>\n <p>{`On large analytic databases, it’s common to run queries that execute for minutes or hours. To enable\nsupport for long running queries that execute beyond the typical web request’s timeout (30-60\nseconds), it is necessary to configure an asynchronous backend for Superset which consists of:`}</p>\n <ul>\n\n <li {...{\n \"parentName\": \"ul\"\n }}>{`one or many Superset workers (which is implemented as a Celery worker), and can be started with\nthe `}\n <inlineCode {...{\n \"parentName\": \"li\"\n }}>{`celery worker`}</inlineCode>\n {` command, run `}\n <inlineCode {...{\n \"parentName\": \"li\"\n }}>{`celery worker --help`}</inlineCode>\n {` to view the related options.`}</li>\n\n\n <li {...{\n \"parentName\": \"ul\"\n }}>{`a celery broker (message queue) for which we recommend using Redis or RabbitMQ`}</li>\n\n\n <li {...{\n \"parentName\": \"ul\"\n }}>{`a results backend that defines where the worker will persist the query results`}</li>\n\n </ul>\n <p>{`Configuring Celery requires defining a `}\n <inlineCode {...{\n \"parentName\": \"p\"\n }}>{`CELERY_CONFIG`}</inlineCode>\n {` in your `}\n <inlineCode {...{\n \"parentName\": \"p\"\n }}>{`superset_config.py`}</inlineCode>\n {`. Both the worker\nand web server processes should have the same configuration.`}</p>\n <pre><code {...{\n \"className\": \"language-python\",\n \"parentName\": \"pre\"\n }}>{`class CeleryConfig(object):\n BROKER_URL = 'redis://localhost:6379/0'\n CELERY_IMPORTS = (\n 'superset.sql_lab',\n 'superset.tasks',\n )\n CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'\n CELERYD_LOG_LEVEL = 'DEBUG'\n CELERYD_PREFETCH_MULTIPLIER = 10\n CELERY_ACKS_LATE = True\n CELERY_ANNOTATIONS = {\n 'sql_lab.get_sql_results': {\n 'rate_limit': '100/s',\n },\n 'email_reports.send': {\n 'rate_limit': '1/s',\n 'time_limit': 120,\n 'soft_time_limit': 150,\n 'ignore_result': True,\n },\n }\n CELERYBEAT_SCHEDULE = {\n 'email_reports.schedule_hourly': {\n 'task': 'email_reports.schedule_hourly',\n 'schedule': crontab(minute=1, hour='*'),\n },\n }\n\nCELERY_CONFIG = CeleryConfig\n`}</code></pre>\n <p>{`To start a Celery worker to leverage the configuration, run the following command:`}</p>\n <pre><code {...{\n \"parentName\": \"pre\"\n }}>{`celery worker --app=superset.tasks.celery_app:app --pool=prefork -O fair -c 4\n`}</code></pre>\n <p>{`To start a job which schedules periodic background jobs, run the following command:`}</p>\n <pre><code {...{\n \"parentName\": \"pre\"\n }}>{`celery beat --app=superset.tasks.celery_app:app\n`}</code></pre>\n <p>{`To setup a result backend, you need to pass an instance of a derivative of from\ncachelib.base.BaseCache to the RESULTS_BACKEND configuration key in your superset_config.py. You can\nuse Memcached, Redis, S3 (`}\n <a {...{\n \"href\": \"https://pypi.python.org/pypi/s3werkzeugcache\",\n \"parentName\": \"p\"\n }}>{`https://pypi.python.org/pypi/s3werkzeugcache`}</a>\n {`), memory or the file system\n(in a single server-type setup or for testing), or to write your own caching interface. Your\n`}\n <inlineCode {...{\n \"parentName\": \"p\"\n }}>{`superset_config.py`}</inlineCode>\n {` may look something like:`}</p>\n <pre><code {...{\n \"className\": \"language-python\",\n \"parentName\": \"pre\"\n }}>{`# On S3\nfrom s3cache.s3cache import S3Cache\nS3_CACHE_BUCKET = 'foobar-superset'\nS3_CACHE_KEY_PREFIX = 'sql_lab_result'\nRESULTS_BACKEND = S3Cache(S3_CACHE_BUCKET, S3_CACHE_KEY_PREFIX)\n\n# On Redis\nfrom cachelib.redis import RedisCache\nRESULTS_BACKEND = RedisCache(\n host='localhost', port=6379, key_prefix='superset_results')\n`}</code></pre>\n <p>{`For performance gains, `}\n <a {...{\n \"href\": \"https://github.com/msgpack/msgpack-python\",\n \"parentName\": \"p\"\n }}>{`MessagePack`}</a>\n {` and\n`}\n <a {...{\n \"href\": \"https://arrow.apache.org/docs/python/\",\n \"parentName\": \"p\"\n }}>{`PyArrow`}</a>\n {` are now used for results serialization. This can be\ndisabled by setting `}\n <inlineCode {...{\n \"parentName\": \"p\"\n }}>{`RESULTS_BACKEND_USE_MSGPACK = False`}</inlineCode>\n {` in your `}\n <inlineCode {...{\n \"parentName\": \"p\"\n }}>{`superset_config.py`}</inlineCode>\n {`, should any\nissues arise. Please clear your existing results cache store when upgrading an existing environment.`}</p>\n <p><strong {...{\n \"parentName\": \"p\"\n }}>{`Important Notes`}</strong></p>\n <ul>\n\n <li {...{\n \"parentName\": \"ul\"\n }}>\n\n <p {...{\n \"parentName\": \"li\"\n }}>{`It is important that all the worker nodes and web servers in the Superset cluster `}\n <em {...{\n \"parentName\": \"p\"\n }}>{`share a common\nmetadata database`}</em>\n {`. This means that SQLite will not work in this context since it has limited\nsupport for concurrency and typically lives on the local file system.`}</p>\n\n </li>\n\n\n <li {...{\n \"parentName\": \"ul\"\n }}>\n\n <p {...{\n \"parentName\": \"li\"\n }}>{`There should `}\n <em {...{\n \"parentName\": \"p\"\n }}>{`only be one instance of celery beat running`}</em>\n {` in your entire setup. If not,\nbackground jobs can get scheduled multiple times resulting in weird behaviors like duplicate\ndelivery of reports, higher than expected load / traffic etc.`}</p>\n\n </li>\n\n\n <li {...{\n \"parentName\": \"ul\"\n }}>\n\n <p {...{\n \"parentName\": \"li\"\n }}>{`SQL Lab will `}\n <em {...{\n \"parentName\": \"p\"\n }}>{`only run your queries asynchronously if`}</em>\n {` you enable `}\n <strong {...{\n \"parentName\": \"p\"\n }}>{`Asynchronous Query Execution`}</strong>\n {`\nin your database settings (Sources > Databases > Edit record).`}</p>\n\n </li>\n\n </ul>\n <h3 {...{\n \"id\": \"celery-flower\"\n }}>{`Celery Flower`}</h3>\n <p>{`Flower is a web based tool for monitoring the Celery cluster which you can install from pip:`}</p>\n <pre><code {...{\n \"className\": \"language-python\",\n \"parentName\": \"pre\"\n }}>{`pip install flower\n`}</code></pre>\n <p>{`You can run flower using:`}</p>\n <pre><code {...{\n \"parentName\": \"pre\"\n }}>{`celery flower --app=superset.tasks.celery_app:app\n`}</code></pre>\n\n </MDXLayout>;\n}\n;\nMDXContent.isMDXComponent = true;\n "],"sourceRoot":""}