blob: c81d28b5da3da4d97a8b8603c50875119c15c7e7 [file] [log] [blame]
{"componentChunkName":"component---node-modules-rocketseat-gatsby-theme-docs-core-src-templates-docs-query-js","path":"/.cache/caches/gatsby-source-remote-file/18b537c6ac9517f3517d0f9a7bcc12b6/getting-started","result":{"data":{"mdx":{"id":"6de4f234-9fd3-5dae-b267-78af915365a6","excerpt":"Apache Liminal Apache Liminal is an end-to-end platform for data engineers & scientists, allowing them to build,\ntrain and deploy machine learning models in a…","fields":{"slug":"/.cache/caches/gatsby-source-remote-file/18b537c6ac9517f3517d0f9a7bcc12b6/getting-started/"},"frontmatter":{"title":"","description":null,"image":null,"disableTableOfContents":null},"body":"function _extends() { _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; return _extends.apply(this, arguments); }\n\nfunction _objectWithoutProperties(source, excluded) { if (source == null) return {}; var target = _objectWithoutPropertiesLoose(source, excluded); var key, i; if (Object.getOwnPropertySymbols) { var sourceSymbolKeys = Object.getOwnPropertySymbols(source); for (i = 0; i < sourceSymbolKeys.length; i++) { key = sourceSymbolKeys[i]; if (excluded.indexOf(key) >= 0) continue; if (!Object.prototype.propertyIsEnumerable.call(source, key)) continue; target[key] = source[key]; } } return target; }\n\nfunction _objectWithoutPropertiesLoose(source, excluded) { if (source == null) return {}; var target = {}; var sourceKeys = Object.keys(source); var key, i; for (i = 0; i < sourceKeys.length; i++) { key = sourceKeys[i]; if (excluded.indexOf(key) >= 0) continue; target[key] = source[key]; } return target; }\n\n/* @jsxRuntime classic */\n\n/* @jsx mdx */\nvar _frontmatter = {};\nvar layoutProps = {\n _frontmatter: _frontmatter\n};\nvar MDXLayout = \"wrapper\";\nreturn function MDXContent(_ref) {\n var components = _ref.components,\n props = _objectWithoutProperties(_ref, [\"components\"]);\n\n return mdx(MDXLayout, _extends({}, layoutProps, props, {\n components: components,\n mdxType: \"MDXLayout\"\n }), mdx(\"h1\", {\n \"id\": \"apache-liminal\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", _extends({\n parentName: \"h1\"\n }, {\n \"href\": \"#apache-liminal\",\n \"aria-label\": \"apache liminal permalink\",\n \"className\": \"anchor before\"\n }), mdx(\"svg\", _extends({\n parentName: \"a\"\n }, {\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }), mdx(\"path\", _extends({\n parentName: \"svg\"\n }, {\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n })))), \"Apache Liminal\"), mdx(\"p\", null, \"Apache Liminal is an end-to-end platform for data engineers & scientists, allowing them to build,\\ntrain and deploy machine learning models in a robust and agile way.\"), mdx(\"p\", null, \"The platform provides the abstractions and declarative capabilities for\\ndata extraction & feature engineering followed by model training and serving.\\nLiminal's goal is to operationalize the machine learning process, allowing data scientists to\\nquickly transition from a successful experiment to an automated pipeline of model training,\\nvalidation, deployment and inference in production, freeing them from engineering and\\nnon-functional tasks, and allowing them to focus on machine learning code and artifacts.\"), mdx(\"h1\", {\n \"id\": \"basics\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", _extends({\n parentName: \"h1\"\n }, {\n \"href\": \"#basics\",\n \"aria-label\": \"basics permalink\",\n \"className\": \"anchor before\"\n }), mdx(\"svg\", _extends({\n parentName: \"a\"\n }, {\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }), mdx(\"path\", _extends({\n parentName: \"svg\"\n }, {\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n })))), \"Basics\"), mdx(\"p\", null, \"Using simple YAML configuration, create your own schedule data pipelines (a sequence of tasks to\\nperform), application servers, and more.\"), mdx(\"h2\", {\n \"id\": \"example-yaml-config-file\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", _extends({\n parentName: \"h2\"\n }, {\n \"href\": \"#example-yaml-config-file\",\n \"aria-label\": \"example yaml config file permalink\",\n \"className\": \"anchor before\"\n }), mdx(\"svg\", _extends({\n parentName: \"a\"\n }, {\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }), mdx(\"path\", _extends({\n parentName: \"svg\"\n }, {\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n })))), \"Example YAML config file\"), mdx(\"pre\", null, mdx(\"code\", _extends({\n parentName: \"pre\"\n }, {\n \"className\": \"language-yaml\"\n }), \"name: MyPipeline\\nowner: Bosco Albert Baracus\\npipelines:\\n - pipeline: my_pipeline\\n start_date: 1970-01-01\\n timeout_minutes: 45\\n schedule: 0 * 1 * *\\n metrics:\\n namespace: TestNamespace\\n backends: [ 'cloudwatch' ]\\n tasks:\\n - task: my_static_input_task\\n type: python\\n description: static input task\\n image: my_static_input_task_image\\n source: helloworld\\n env_vars:\\n env1: \\\"a\\\"\\n env2: \\\"b\\\"\\n input_type: static\\n input_path: '[ { \\\"foo\\\": \\\"bar\\\" }, { \\\"foo\\\": \\\"baz\\\" } ]'\\n output_path: /output.json\\n cmd: python -u hello_world.py\\n - task: my_parallelized_static_input_task\\n type: python\\n description: parallelized static input task\\n image: my_static_input_task_image\\n env_vars:\\n env1: \\\"a\\\"\\n env2: \\\"b\\\"\\n input_type: static\\n input_path: '[ { \\\"foo\\\": \\\"bar\\\" }, { \\\"foo\\\": \\\"baz\\\" } ]'\\n split_input: True\\n executors: 2\\n cmd: python -u helloworld.py\\n - task: my_task_output_input_task\\n type: python\\n description: task with input from other task's output\\n image: my_task_output_input_task_image\\n source: helloworld\\n env_vars:\\n env1: \\\"a\\\"\\n env2: \\\"b\\\"\\n input_type: task\\n input_path: my_static_input_task\\n cmd: python -u hello_world.py\\nservices:\\n - service:\\n name: my_python_server\\n type: python_server\\n description: my python server\\n image: my_server_image\\n source: myserver\\n endpoints:\\n - endpoint: /myendpoint1\\n module: myserver.my_server\\n function: myendpoint1func\\n\")), mdx(\"h1\", {\n \"id\": \"installation\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", _extends({\n parentName: \"h1\"\n }, {\n \"href\": \"#installation\",\n \"aria-label\": \"installation permalink\",\n \"className\": \"anchor before\"\n }), mdx(\"svg\", _extends({\n parentName: \"a\"\n }, {\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }), mdx(\"path\", _extends({\n parentName: \"svg\"\n }, {\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n })))), \"Installation\"), mdx(\"ol\", null, mdx(\"li\", {\n parentName: \"ol\"\n }, \"Install this package\")), mdx(\"pre\", null, mdx(\"code\", _extends({\n parentName: \"pre\"\n }, {\n \"className\": \"language-bash\"\n }), \" pip install liminal\\n\")), mdx(\"ol\", {\n \"start\": 2\n }, mdx(\"li\", {\n parentName: \"ol\"\n }, \"Optional: set LIMINAL_HOME to path of your choice (if not set, will default to ~/liminal_home)\")), mdx(\"pre\", null, mdx(\"code\", _extends({\n parentName: \"pre\"\n }, {\n \"className\": \"language-bash\"\n }), \"echo 'export LIMINAL_HOME=</path/to/some/folder>' >> ~/.bash_profile && source ~/.bash_profile\\n\")), mdx(\"h1\", {\n \"id\": \"authoring-pipelines\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", _extends({\n parentName: \"h1\"\n }, {\n \"href\": \"#authoring-pipelines\",\n \"aria-label\": \"authoring pipelines permalink\",\n \"className\": \"anchor before\"\n }), mdx(\"svg\", _extends({\n parentName: \"a\"\n }, {\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }), mdx(\"path\", _extends({\n parentName: \"svg\"\n }, {\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n })))), \"Authoring pipelines\"), mdx(\"p\", null, \"This involves at minimum creating a single file called liminal.yml as in the example above.\"), mdx(\"p\", null, \"If your pipeline requires custom python code to implement tasks, they should be organized\\n\", mdx(\"a\", _extends({\n parentName: \"p\"\n }, {\n \"href\": \"https://github.com/apache/incubator-liminal/tree/master/tests/runners/airflow/liminal\"\n }), \"like this\")), mdx(\"p\", null, \"If your pipeline introduces imports of external packages which are not already a part\\nof the liminal framework (i.e. you had to pip install them yourself), you need to also provide\\na requirements.txt in the root of your project.\"), mdx(\"h1\", {\n \"id\": \"testing-the-pipeline-locally\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", _extends({\n parentName: \"h1\"\n }, {\n \"href\": \"#testing-the-pipeline-locally\",\n \"aria-label\": \"testing the pipeline locally permalink\",\n \"className\": \"anchor before\"\n }), mdx(\"svg\", _extends({\n parentName: \"a\"\n }, {\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }), mdx(\"path\", _extends({\n parentName: \"svg\"\n }, {\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n })))), \"Testing the pipeline locally\"), mdx(\"p\", null, \"When your pipeline code is ready, you can test it by running it locally on your machine.\"), mdx(\"ol\", null, mdx(\"li\", {\n parentName: \"ol\"\n }, \"Ensure you have The Docker engine running locally, and enable a local Kubernetes cluster:\\n\", mdx(\"img\", _extends({\n parentName: \"li\"\n }, {\n \"src\": \"https://raw.githubusercontent.com/apache/incubator-liminal/master/images/k8s_running.png\",\n \"alt\": \"Kubernetes configured\"\n })))), mdx(\"p\", null, \"If you want to execute your pipeline on a remote kubernetes cluster, make sure the cluster is configured\\nusing :\"), mdx(\"pre\", null, mdx(\"code\", _extends({\n parentName: \"pre\"\n }, {\n \"className\": \"language-bash\"\n }), \"kubectl config set-context <your remote kubernetes cluster>\\n\")), mdx(\"ol\", {\n \"start\": 2\n }, mdx(\"li\", {\n parentName: \"ol\"\n }, \"Build the docker images used by your pipeline.\")), mdx(\"p\", null, \"In the example pipeline above, you can see that tasks and services have an \\\"image\\\" field - such as\\n\\\"my_static_input_task_image\\\". This means that the task is executed inside a docker container, and the docker container\\nis created from a docker image where various code and libraries are installed.\"), mdx(\"p\", null, \"You can take a look at what the build process looks like, e.g.\\n\", mdx(\"a\", _extends({\n parentName: \"p\"\n }, {\n \"href\": \"https://github.com/apache/incubator-liminal/tree/master/liminal/build/image/python\"\n }), \"here\")), mdx(\"p\", null, \"In order for the images to be available for your pipeline, you'll need to build them locally:\"), mdx(\"pre\", null, mdx(\"code\", _extends({\n parentName: \"pre\"\n }, {\n \"className\": \"language-bash\"\n }), \"cd </path/to/your/liminal/code>\\nliminal build\\n\")), mdx(\"p\", null, \"You'll see that a number of outputs indicating various docker images built.\"), mdx(\"ol\", {\n \"start\": 3\n }, mdx(\"li\", {\n parentName: \"ol\"\n }, \"Deploy the pipeline:\")), mdx(\"pre\", null, mdx(\"code\", _extends({\n parentName: \"pre\"\n }, {\n \"className\": \"language-bash\"\n }), \"cd </path/to/your/liminal/code> \\nliminal deploy\\n\")), mdx(\"ol\", {\n \"start\": 4\n }, mdx(\"li\", {\n parentName: \"ol\"\n }, \"Start the server\")), mdx(\"pre\", null, mdx(\"code\", _extends({\n parentName: \"pre\"\n }, {\n \"className\": \"language-bash\"\n }), \"liminal start\\n\")), mdx(\"ol\", {\n \"start\": 5\n }, mdx(\"li\", {\n parentName: \"ol\"\n }, mdx(\"p\", {\n parentName: \"li\"\n }, \"Navigate to \", mdx(\"a\", _extends({\n parentName: \"p\"\n }, {\n \"href\": \"http://localhost:8080/admin\"\n }), \"http://localhost:8080/admin\"))), mdx(\"li\", {\n parentName: \"ol\"\n }, mdx(\"p\", {\n parentName: \"li\"\n }, \"You should see your \", mdx(\"img\", _extends({\n parentName: \"p\"\n }, {\n \"src\": \"https://raw.githubusercontent.com/apache/incubator-liminal/master/images/airflow.png\",\n \"alt\": \"pipeline\"\n })), \"\\nThe pipeline is scheduled to run according to the \", mdx(\"inlineCode\", {\n parentName: \"p\"\n }, \"json schedule: 0 * 1 * *\"), \" field in the .yml file you provided.\")), mdx(\"li\", {\n parentName: \"ol\"\n }, mdx(\"p\", {\n parentName: \"li\"\n }, \"To manually activate your pipeline:\\nClick your pipeline and then click \\\"trigger DAG\\\"\\nClick \\\"Graph view\\\"\\nYou should see the steps in your pipeline getting executed in \\\"real time\\\" by clicking \\\"Refresh\\\" periodically.\"))), mdx(\"p\", null, mdx(\"img\", _extends({\n parentName: \"p\"\n }, {\n \"src\": \"https://raw.githubusercontent.com/apache/incubator-liminal/master/images/airflow_trigger.png\",\n \"alt\": \"Pipeline activation\"\n }))), mdx(\"h3\", {\n \"id\": \"running-tests-for-contributors\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", _extends({\n parentName: \"h3\"\n }, {\n \"href\": \"#running-tests-for-contributors\",\n \"aria-label\": \"running tests for contributors permalink\",\n \"className\": \"anchor before\"\n }), mdx(\"svg\", _extends({\n parentName: \"a\"\n }, {\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }), mdx(\"path\", _extends({\n parentName: \"svg\"\n }, {\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n })))), \"Running Tests (for contributors)\"), mdx(\"p\", null, \"When doing local development and running Liminal unit-tests, make sure to set LIMINAL_STAND_ALONE_MODE=True\"));\n}\n;\nMDXContent.isMDXComponent = true;","headings":[{"depth":1,"value":"Apache Liminal"},{"depth":1,"value":"Basics"},{"depth":2,"value":"Example YAML config file"},{"depth":1,"value":"Installation"},{"depth":1,"value":"Authoring pipelines"},{"depth":1,"value":"Testing the pipeline locally"},{"depth":3,"value":"Running Tests (for contributors)"}]}},"pageContext":{"slug":"/.cache/caches/gatsby-source-remote-file/18b537c6ac9517f3517d0f9a7bcc12b6/getting-started/","next":{"label":"Introduction","link":"/"}}},"staticQueryHashes":["1954253342","2328931024","2501019404","973074209"]}