blob: 475385c5cd8cfd79a84907724d55de9156596642 [file] [log] [blame]
(window.webpackJsonp=window.webpackJsonp||[]).push([[39],{106:function(e,n,t){"use strict";t.r(n),t.d(n,"frontMatter",(function(){return o})),t.d(n,"metadata",(function(){return l})),t.d(n,"toc",(function(){return c})),t.d(n,"default",(function(){return p}));var a=t(3),r=t(7),i=(t(0),t(144)),o={},l={unversionedId:"ecosystem/kaldi/WriteDockerfileKaldi",id:"ecosystem/kaldi/WriteDockerfileKaldi",isDocsHomePage:!1,title:"WriteDockerfileKaldi",description:"\x3c!--",source:"@site/docs/ecosystem/kaldi/WriteDockerfileKaldi.md",slug:"/ecosystem/kaldi/WriteDockerfileKaldi",permalink:"/docs/ecosystem/kaldi/WriteDockerfileKaldi",editUrl:"https://github.com/apache/submarine/edit/master/website/docs/ecosystem/kaldi/WriteDockerfileKaldi.md",version:"current"},c=[{value:"Creating Docker Images for Running Kaldi on YARN",id:"creating-docker-images-for-running-kaldi-on-yarn",children:[{value:"How to create docker images to run Kaldi on YARN",id:"how-to-create-docker-images-to-run-kaldi-on-yarn",children:[]},{value:"Use examples to build your own Kaldi docker images",id:"use-examples-to-build-your-own-kaldi-docker-images",children:[]},{value:"Build Docker images",id:"build-docker-images",children:[]}]}],d={toc:c};function p(e){var n=e.components,t=Object(r.a)(e,["components"]);return Object(i.b)("wrapper",Object(a.a)({},d,t,{components:n,mdxType:"MDXLayout"}),Object(i.b)("h2",{id:"creating-docker-images-for-running-kaldi-on-yarn"},"Creating Docker Images for Running Kaldi on YARN"),Object(i.b)("h3",{id:"how-to-create-docker-images-to-run-kaldi-on-yarn"},"How to create docker images to run Kaldi on YARN"),Object(i.b)("p",null,"Dockerfile to run Kaldi on YARN need two part:"),Object(i.b)("p",null,Object(i.b)("strong",{parentName:"p"},"Base libraries which Kaldi depends on")),Object(i.b)("p",null,"1) OS base image, for example ",Object(i.b)("inlineCode",{parentName:"p"},"nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04")),Object(i.b)("p",null,"2) Kaldi depended libraries and packages. For example ",Object(i.b)("inlineCode",{parentName:"p"},"python"),", ",Object(i.b)("inlineCode",{parentName:"p"},"g++"),", ",Object(i.b)("inlineCode",{parentName:"p"},"make"),". For GPU support, need ",Object(i.b)("inlineCode",{parentName:"p"},"cuda"),", ",Object(i.b)("inlineCode",{parentName:"p"},"cudnn"),", etc."),Object(i.b)("p",null,"3) Kaldi compile."),Object(i.b)("p",null,Object(i.b)("strong",{parentName:"p"},"Libraries to access HDFS")),Object(i.b)("p",null,"1) JDK"),Object(i.b)("p",null,"2) Hadoop"),Object(i.b)("p",null,"Here's an example of a base image (w/o GPU support) to install Kaldi:"),Object(i.b)("pre",null,Object(i.b)("code",{parentName:"pre",className:"language-shell"},"FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04\n\nRUN apt-get clean && \\\n apt-get update && \\\n apt-get install -y --no-install-recommends \\\n sudo \\\n openjdk-8-jdk \\\n iputils-ping \\\n g++ \\\n make \\\n automake \\\n autoconf \\\n bzip2 \\\n unzip \\\n wget \\\n sox \\\n libtool \\\n git \\\n subversion \\\n python2.7 \\\n python3 \\\n zlib1g-dev \\\n ca-certificates \\\n patch \\\n ffmpeg \\\n vim && \\\n rm -rf /var/lib/apt/lists/* && \\\n ln -s /usr/bin/python2.7 /usr/bin/python\n\nRUN git clone --depth 1 https://github.com/kaldi-asr/kaldi.git /opt/kaldi && \\\n cd /opt/kaldi && \\\n cd /opt/kaldi/tools && \\\n ./extras/install_mkl.sh && \\\n make -j $(nproc) && \\\n cd /opt/kaldi/src && \\\n ./configure --shared --use-cuda && \\\n make depend -j $(nproc) && \\\n make -j $(nproc)\n")),Object(i.b)("p",null,"On top of above image, add files, install packages to access HDFS"),Object(i.b)("pre",null,Object(i.b)("code",{parentName:"pre",className:"language-shell"},'RUN apt-get update && apt-get install -y openjdk-8-jdk wget\n# Install hadoop\nENV HADOOP_VERSION="3.2.1"\nENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64\nRUN wget https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz && \\\n tar zxf hadoop-${HADOOP_VERSION}.tar.gz && \\\n ln -s hadoop-${HADOOP_VERSION} hadoop-current && \\\n rm hadoop-${HADOOP_VERSION}.tar.gz\n')),Object(i.b)("p",null,"Build and push to your own docker registry: Use ",Object(i.b)("inlineCode",{parentName:"p"},"docker build ... ")," and ",Object(i.b)("inlineCode",{parentName:"p"},"docker push ...")," to finish this step."),Object(i.b)("h3",{id:"use-examples-to-build-your-own-kaldi-docker-images"},"Use examples to build your own Kaldi docker images"),Object(i.b)("p",null,"We provided following examples for you to build kaldi docker images."),Object(i.b)("p",null,"For latest Kaldi"),Object(i.b)("ul",null,Object(i.b)("li",{parentName:"ul"},"*base/ubuntu-18.04/Dockerfile.gpu.kaldi_latest: Latest Kaldi that supports GPU, which is prebuilt to CUDA10, with models.")),Object(i.b)("h3",{id:"build-docker-images"},"Build Docker images"),Object(i.b)("h4",{id:"manually-build-docker-image"},"Manually build Docker image:"),Object(i.b)("p",null,"Under ",Object(i.b)("inlineCode",{parentName:"p"},"docker/")," directory,The CLUSTER_NAME can be modified in build-all.sh to have installation permissions, run ",Object(i.b)("inlineCode",{parentName:"p"},"build-all.sh")," to build Docker images. It will build following images:"),Object(i.b)("ul",null,Object(i.b)("li",{parentName:"ul"},Object(i.b)("inlineCode",{parentName:"li"},"kaldi-latest-gpu-base:0.0.1")," for base Docker image which includes Hadoop, Kaldi, GPU base libraries, which includes thchs30 model.")),Object(i.b)("h4",{id:"use-prebuilt-images"},"Use prebuilt images"),Object(i.b)("p",null,"(No liability)\nYou can also use prebuilt images for convenience in the docker hub:"),Object(i.b)("ul",null,Object(i.b)("li",{parentName:"ul"},"hadoopsubmarine/kaldi-latest-gpu-base:0.0.1")))}p.isMDXComponent=!0},144:function(e,n,t){"use strict";t.d(n,"a",(function(){return u})),t.d(n,"b",(function(){return m}));var a=t(0),r=t.n(a);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function l(e){for(var n=1;n<arguments.length;n++){var t=null!=arguments[n]?arguments[n]:{};n%2?o(Object(t),!0).forEach((function(n){i(e,n,t[n])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(t)):o(Object(t)).forEach((function(n){Object.defineProperty(e,n,Object.getOwnPropertyDescriptor(t,n))}))}return e}function c(e,n){if(null==e)return{};var t,a,r=function(e,n){if(null==e)return{};var t,a,r={},i=Object.keys(e);for(a=0;a<i.length;a++)t=i[a],n.indexOf(t)>=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a<i.length;a++)t=i[a],n.indexOf(t)>=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var d=r.a.createContext({}),p=function(e){var n=r.a.useContext(d),t=n;return e&&(t="function"==typeof e?e(n):l(l({},n),e)),t},u=function(e){var n=p(e.components);return r.a.createElement(d.Provider,{value:n},e.children)},s={inlineCode:"code",wrapper:function(e){var n=e.children;return r.a.createElement(r.a.Fragment,{},n)}},b=r.a.forwardRef((function(e,n){var t=e.components,a=e.mdxType,i=e.originalType,o=e.parentName,d=c(e,["components","mdxType","originalType","parentName"]),u=p(t),b=a,m=u["".concat(o,".").concat(b)]||u[b]||s[b]||i;return t?r.a.createElement(m,l(l({ref:n},d),{},{components:t})):r.a.createElement(m,l({ref:n},d))}));function m(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var i=t.length,o=new Array(i);o[0]=b;var l={};for(var c in n)hasOwnProperty.call(n,c)&&(l[c]=n[c]);l.originalType=e,l.mdxType="string"==typeof e?e:a,o[1]=l;for(var d=2;d<i;d++)o[d]=t[d];return r.a.createElement.apply(null,o)}return r.a.createElement.apply(null,t)}b.displayName="MDXCreateElement"}}]);