blob: 1054c85b7613d366f6fc0a240a6d28a4ca8d11c4 [file] [log] [blame]
{"version":3,"sources":["webpack:///./src/pages/docs/installation/async_queries_celery.mdx"],"names":["_frontmatter","layoutProps","MDXLayout","DefaultLayout","MDXContent","components","props","mdxType","parentName","isMDXComponent"],"mappings":"wPAQaA,G,UAAe,S,sOAC5B,IAAMC,EAAc,CAClBD,gBAEIE,EAAYC,IACH,SAASC,EAAT,GAGZ,IAFDC,EAEC,EAFDA,WACGC,EACF,8BACD,OAAO,YAACJ,EAAD,eAAeD,EAAiBK,EAAhC,CAAuCD,WAAYA,EAAYE,QAAQ,cAG5E,iBAAQ,CACN,GAAM,4BADR,4BAGA,iBAAQ,CACN,GAAM,UADR,UAGA,4TAGA,sBACE,kBAAIC,WAAW,MAAf,wGACA,0BAAYA,WAAW,MAAvB,iBADA,iBAC4E,0BAAYA,WAAW,MAAvB,wBAD5E,iCAEA,kBAAIA,WAAW,MAAf,kFACA,kBAAIA,WAAW,MAAf,mFAEF,+DAA8C,0BAAYA,WAAW,KAAvB,iBAA9C,YAAoH,0BAAYA,WAAW,KAAvB,sBAApH,mFAEA,uBAAK,gCAAMA,WAAW,OAAU,CAC5B,UAAa,oBADZ,21BAgCL,2GACA,uBAAK,gCAAMA,WAAW,OAAU,IAA3B,oFAEL,4GACA,uBAAK,gCAAMA,WAAW,OAAU,IAA3B,sDAEL,yOAEwB,6BAAGA,WAAW,KAAQ,CAC1C,KAAQ,iDADY,gDAFxB,+HAMF,0BAAYA,WAAW,KAAvB,sBANE,6BAOA,uBAAK,gCAAMA,WAAW,OAAU,CAC5B,UAAa,oBADZ,sVAaL,+CAA8B,6BAAGA,WAAW,KAAQ,CAChD,KAAQ,8CADkB,eAA9B,SAGF,6BAAGA,WAAW,KAAQ,CAChB,KAAQ,0CADd,WAHE,6EAMkB,0BAAYA,WAAW,KAAvB,uCANlB,YAM8G,0BAAYA,WAAW,KAAvB,sBAN9G,sHAQA,qBAAG,sBAAQA,WAAW,KAAnB,oBACH,sBACE,kBAAIA,WAAW,MACb,iBAAGA,WAAW,MAAd,qFAAyG,kBAAIA,WAAW,KAAf,qCAAzG,uJAIF,kBAAIA,WAAW,MACb,iBAAGA,WAAW,MAAd,gBAAoC,kBAAIA,WAAW,KAAf,+CAApC,gMAIF,kBAAIA,WAAW,MACb,iBAAGA,WAAW,MAAd,gBAAoC,kBAAIA,WAAW,KAAf,2CAApC,eAAuH,sBAAQA,WAAW,KAAnB,gCAAvH,sEAIJ,iBAAQ,CACN,GAAM,iBADR,iBAGA,qHACA,uBAAK,gCAAMA,WAAW,OAAU,CAC5B,UAAa,oBADZ,yBAIL,kDACA,uBAAK,gCAAMA,WAAW,OAAU,IAA3B,8D,oOAMTJ,EAAWK,gBAAiB","file":"component---src-pages-docs-installation-async-queries-celery-mdx-49245f277bd031c09f2f.js","sourcesContent":["import * as React from 'react'\n /* @jsx mdx */\nimport { mdx } from '@mdx-js/react';\n/* @jsxRuntime classic */\n\n/* @jsx mdx */\n\nimport DefaultLayout from \"/home/runner/work/superset/superset/docs/node_modules/gatsby-theme-docz/src/base/Layout.js\";\nexport const _frontmatter = {};\nconst layoutProps = {\n _frontmatter\n};\nconst MDXLayout = DefaultLayout;\nexport default function MDXContent({\n components,\n ...props\n}) {\n return <MDXLayout {...layoutProps} {...props} components={components} mdxType=\"MDXLayout\">\n\n\n <h2 {...{\n \"id\": \"async-queries-via-celery\"\n }}>{`Async Queries via Celery`}</h2>\n <h3 {...{\n \"id\": \"celery\"\n }}>{`Celery`}</h3>\n <p>{`On large analytic databases, it’s common to run queries that execute for minutes or hours. To enable\nsupport for long running queries that execute beyond the typical web request’s timeout (30-60\nseconds), it is necessary to configure an asynchronous backend for Superset which consists of:`}</p>\n <ul>\n <li parentName=\"ul\">{`one or many Superset workers (which is implemented as a Celery worker), and can be started with\nthe `}<inlineCode parentName=\"li\">{`celery worker`}</inlineCode>{` command, run `}<inlineCode parentName=\"li\">{`celery worker --help`}</inlineCode>{` to view the related options.`}</li>\n <li parentName=\"ul\">{`a celery broker (message queue) for which we recommend using Redis or RabbitMQ`}</li>\n <li parentName=\"ul\">{`a results backend that defines where the worker will persist the query results`}</li>\n </ul>\n <p>{`Configuring Celery requires defining a `}<inlineCode parentName=\"p\">{`CELERY_CONFIG`}</inlineCode>{` in your `}<inlineCode parentName=\"p\">{`superset_config.py`}</inlineCode>{`. Both the worker\nand web server processes should have the same configuration.`}</p>\n <pre><code parentName=\"pre\" {...{\n \"className\": \"language-python\"\n }}>{`class CeleryConfig(object):\n BROKER_URL = 'redis://localhost:6379/0'\n CELERY_IMPORTS = (\n 'superset.sql_lab',\n 'superset.tasks',\n )\n CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'\n CELERYD_LOG_LEVEL = 'DEBUG'\n CELERYD_PREFETCH_MULTIPLIER = 10\n CELERY_ACKS_LATE = True\n CELERY_ANNOTATIONS = {\n 'sql_lab.get_sql_results': {\n 'rate_limit': '100/s',\n },\n 'email_reports.send': {\n 'rate_limit': '1/s',\n 'time_limit': 120,\n 'soft_time_limit': 150,\n 'ignore_result': True,\n },\n }\n CELERYBEAT_SCHEDULE = {\n 'email_reports.schedule_hourly': {\n 'task': 'email_reports.schedule_hourly',\n 'schedule': crontab(minute=1, hour='*'),\n },\n }\n\nCELERY_CONFIG = CeleryConfig\n`}</code></pre>\n <p>{`To start a Celery worker to leverage the configuration, run the following command:`}</p>\n <pre><code parentName=\"pre\" {...{}}>{`celery --app=superset.tasks.celery_app:app worker --pool=prefork -O fair -c 4\n`}</code></pre>\n <p>{`To start a job which schedules periodic background jobs, run the following command:`}</p>\n <pre><code parentName=\"pre\" {...{}}>{`celery --app=superset.tasks.celery_app:app beat\n`}</code></pre>\n <p>{`To setup a result backend, you need to pass an instance of a derivative of from\ncachelib.base.BaseCache to the RESULTS_BACKEND configuration key in your superset_config.py. You can\nuse Memcached, Redis, S3 (`}<a parentName=\"p\" {...{\n \"href\": \"https://pypi.python.org/pypi/s3werkzeugcache\"\n }}>{`https://pypi.python.org/pypi/s3werkzeugcache`}</a>{`), memory or the file system\n(in a single server-type setup or for testing), or to write your own caching interface. Your\n`}<inlineCode parentName=\"p\">{`superset_config.py`}</inlineCode>{` may look something like:`}</p>\n <pre><code parentName=\"pre\" {...{\n \"className\": \"language-python\"\n }}>{`# On S3\nfrom s3cache.s3cache import S3Cache\nS3_CACHE_BUCKET = 'foobar-superset'\nS3_CACHE_KEY_PREFIX = 'sql_lab_result'\nRESULTS_BACKEND = S3Cache(S3_CACHE_BUCKET, S3_CACHE_KEY_PREFIX)\n\n# On Redis\nfrom cachelib.redis import RedisCache\nRESULTS_BACKEND = RedisCache(\n host='localhost', port=6379, key_prefix='superset_results')\n`}</code></pre>\n <p>{`For performance gains, `}<a parentName=\"p\" {...{\n \"href\": \"https://github.com/msgpack/msgpack-python\"\n }}>{`MessagePack`}</a>{` and\n`}<a parentName=\"p\" {...{\n \"href\": \"https://arrow.apache.org/docs/python/\"\n }}>{`PyArrow`}</a>{` are now used for results serialization. This can be\ndisabled by setting `}<inlineCode parentName=\"p\">{`RESULTS_BACKEND_USE_MSGPACK = False`}</inlineCode>{` in your `}<inlineCode parentName=\"p\">{`superset_config.py`}</inlineCode>{`, should any\nissues arise. Please clear your existing results cache store when upgrading an existing environment.`}</p>\n <p><strong parentName=\"p\">{`Important Notes`}</strong></p>\n <ul>\n <li parentName=\"ul\">\n <p parentName=\"li\">{`It is important that all the worker nodes and web servers in the Superset cluster `}<em parentName=\"p\">{`share a common\nmetadata database`}</em>{`. This means that SQLite will not work in this context since it has limited\nsupport for concurrency and typically lives on the local file system.`}</p>\n </li>\n <li parentName=\"ul\">\n <p parentName=\"li\">{`There should `}<em parentName=\"p\">{`only be one instance of celery beat running`}</em>{` in your entire setup. If not,\nbackground jobs can get scheduled multiple times resulting in weird behaviors like duplicate\ndelivery of reports, higher than expected load / traffic etc.`}</p>\n </li>\n <li parentName=\"ul\">\n <p parentName=\"li\">{`SQL Lab will `}<em parentName=\"p\">{`only run your queries asynchronously if`}</em>{` you enable `}<strong parentName=\"p\">{`Asynchronous Query Execution`}</strong>{`\nin your database settings (Sources > Databases > Edit record).`}</p>\n </li>\n </ul>\n <h3 {...{\n \"id\": \"celery-flower\"\n }}>{`Celery Flower`}</h3>\n <p>{`Flower is a web based tool for monitoring the Celery cluster which you can install from pip:`}</p>\n <pre><code parentName=\"pre\" {...{\n \"className\": \"language-python\"\n }}>{`pip install flower\n`}</code></pre>\n <p>{`You can run flower using:`}</p>\n <pre><code parentName=\"pre\" {...{}}>{`celery flower --app=superset.tasks.celery_app:app\n`}</code></pre>\n\n </MDXLayout>;\n}\n;\nMDXContent.isMDXComponent = true;\n "],"sourceRoot":""}