blob: 17b296c841ff6a259d9e8db35dd556ca03f6584f [file] [log] [blame]
"use strict";(self.webpackChunkwebsite=self.webpackChunkwebsite||[]).push([[6672],{3905:function(e,n,t){t.d(n,{Zo:function(){return s},kt:function(){return m}});var a=t(7294);function r(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function i(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function o(e){for(var n=1;n<arguments.length;n++){var t=null!=arguments[n]?arguments[n]:{};n%2?i(Object(t),!0).forEach((function(n){r(e,n,t[n])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(t)):i(Object(t)).forEach((function(n){Object.defineProperty(e,n,Object.getOwnPropertyDescriptor(t,n))}))}return e}function l(e,n){if(null==e)return{};var t,a,r=function(e,n){if(null==e)return{};var t,a,r={},i=Object.keys(e);for(a=0;a<i.length;a++)t=i[a],n.indexOf(t)>=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a<i.length;a++)t=i[a],n.indexOf(t)>=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var d=a.createContext({}),u=function(e){var n=a.useContext(d),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},s=function(e){var n=u(e.components);return a.createElement(d.Provider,{value:n},e.children)},p={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},c=a.forwardRef((function(e,n){var t=e.components,r=e.mdxType,i=e.originalType,d=e.parentName,s=l(e,["components","mdxType","originalType","parentName"]),c=u(t),m=r,k=c["".concat(d,".").concat(m)]||c[m]||p[m]||i;return t?a.createElement(k,o(o({ref:n},s),{},{components:t})):a.createElement(k,o({ref:n},s))}));function m(e,n){var t=arguments,r=n&&n.mdxType;if("string"==typeof e||r){var i=t.length,o=new Array(i);o[0]=c;var l={};for(var d in n)hasOwnProperty.call(n,d)&&(l[d]=n[d]);l.originalType=e,l.mdxType="string"==typeof e?e:r,o[1]=l;for(var u=2;u<i;u++)o[u]=t[u];return a.createElement.apply(null,o)}return a.createElement.apply(null,t)}c.displayName="MDXCreateElement"},2100:function(e,n,t){t.r(n),t.d(n,{assets:function(){return s},contentTitle:function(){return d},default:function(){return m},frontMatter:function(){return l},metadata:function(){return u},toc:function(){return p}});var a=t(7462),r=t(3366),i=(t(7294),t(3905)),o=["components"],l={},d=void 0,u={unversionedId:"ecosystem/kaldi/WriteDockerfileKaldi",id:"version-0.6.0/ecosystem/kaldi/WriteDockerfileKaldi",title:"WriteDockerfileKaldi",description:"\x3c!--",source:"@site/versioned_docs/version-0.6.0/ecosystem/kaldi/WriteDockerfileKaldi.md",sourceDirName:"ecosystem/kaldi",slug:"/ecosystem/kaldi/WriteDockerfileKaldi",permalink:"/docs/ecosystem/kaldi/WriteDockerfileKaldi",editUrl:"https://github.com/apache/submarine/edit/master/website/versioned_docs/version-0.6.0/ecosystem/kaldi/WriteDockerfileKaldi.md",tags:[],version:"0.6.0",frontMatter:{}},s={},p=[{value:"Creating Docker Images for Running Kaldi on YARN",id:"creating-docker-images-for-running-kaldi-on-yarn",level:2},{value:"How to create docker images to run Kaldi on YARN",id:"how-to-create-docker-images-to-run-kaldi-on-yarn",level:3},{value:"Use examples to build your own Kaldi docker images",id:"use-examples-to-build-your-own-kaldi-docker-images",level:3},{value:"Build Docker images",id:"build-docker-images",level:3},{value:"Manually build Docker image:",id:"manually-build-docker-image",level:4},{value:"Use prebuilt images",id:"use-prebuilt-images",level:4}],c={toc:p};function m(e){var n=e.components,t=(0,r.Z)(e,o);return(0,i.kt)("wrapper",(0,a.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"creating-docker-images-for-running-kaldi-on-yarn"},"Creating Docker Images for Running Kaldi on YARN"),(0,i.kt)("h3",{id:"how-to-create-docker-images-to-run-kaldi-on-yarn"},"How to create docker images to run Kaldi on YARN"),(0,i.kt)("p",null,"Dockerfile to run Kaldi on YARN need two part:"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Base libraries which Kaldi depends on")),(0,i.kt)("p",null,"1) OS base image, for example ",(0,i.kt)("inlineCode",{parentName:"p"},"nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04")),(0,i.kt)("p",null,"2) Kaldi depended libraries and packages. For example ",(0,i.kt)("inlineCode",{parentName:"p"},"python"),", ",(0,i.kt)("inlineCode",{parentName:"p"},"g++"),", ",(0,i.kt)("inlineCode",{parentName:"p"},"make"),". For GPU support, need ",(0,i.kt)("inlineCode",{parentName:"p"},"cuda"),", ",(0,i.kt)("inlineCode",{parentName:"p"},"cudnn"),", etc."),(0,i.kt)("p",null,"3) Kaldi compile."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Libraries to access HDFS")),(0,i.kt)("p",null,"1) JDK"),(0,i.kt)("p",null,"2) Hadoop"),(0,i.kt)("p",null,"Here's an example of a base image (w/o GPU support) to install Kaldi:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-shell"},"FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04\n\nRUN apt-get clean && \\\n apt-get update && \\\n apt-get install -y --no-install-recommends \\\n sudo \\\n openjdk-8-jdk \\\n iputils-ping \\\n g++ \\\n make \\\n automake \\\n autoconf \\\n bzip2 \\\n unzip \\\n wget \\\n sox \\\n libtool \\\n git \\\n subversion \\\n python2.7 \\\n python3 \\\n zlib1g-dev \\\n ca-certificates \\\n patch \\\n ffmpeg \\\n vim && \\\n rm -rf /var/lib/apt/lists/* && \\\n ln -s /usr/bin/python2.7 /usr/bin/python\n\nRUN git clone --depth 1 https://github.com/kaldi-asr/kaldi.git /opt/kaldi && \\\n cd /opt/kaldi && \\\n cd /opt/kaldi/tools && \\\n ./extras/install_mkl.sh && \\\n make -j $(nproc) && \\\n cd /opt/kaldi/src && \\\n ./configure --shared --use-cuda && \\\n make depend -j $(nproc) && \\\n make -j $(nproc)\n")),(0,i.kt)("p",null,"On top of above image, add files, install packages to access HDFS"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-shell"},'RUN apt-get update && apt-get install -y openjdk-8-jdk wget\n# Install hadoop\nENV HADOOP_VERSION="3.2.1"\nENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64\nRUN wget https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz && \\\n tar zxf hadoop-${HADOOP_VERSION}.tar.gz && \\\n ln -s hadoop-${HADOOP_VERSION} hadoop-current && \\\n rm hadoop-${HADOOP_VERSION}.tar.gz\n')),(0,i.kt)("p",null,"Build and push to your own docker registry: Use ",(0,i.kt)("inlineCode",{parentName:"p"},"docker build ... ")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"docker push ...")," to finish this step."),(0,i.kt)("h3",{id:"use-examples-to-build-your-own-kaldi-docker-images"},"Use examples to build your own Kaldi docker images"),(0,i.kt)("p",null,"We provided following examples for you to build kaldi docker images."),(0,i.kt)("p",null,"For latest Kaldi"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"*base/ubuntu-18.04/Dockerfile.gpu.kaldi_latest: Latest Kaldi that supports GPU, which is prebuilt to CUDA10, with models.")),(0,i.kt)("h3",{id:"build-docker-images"},"Build Docker images"),(0,i.kt)("h4",{id:"manually-build-docker-image"},"Manually build Docker image:"),(0,i.kt)("p",null,"Under ",(0,i.kt)("inlineCode",{parentName:"p"},"docker/")," directory,The CLUSTER_NAME can be modified in build-all.sh to have installation permissions, run ",(0,i.kt)("inlineCode",{parentName:"p"},"build-all.sh")," to build Docker images. It will build following images:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"kaldi-latest-gpu-base:0.0.1")," for base Docker image which includes Hadoop, Kaldi, GPU base libraries, which includes thchs30 model.")),(0,i.kt)("h4",{id:"use-prebuilt-images"},"Use prebuilt images"),(0,i.kt)("p",null,"(No liability)\nYou can also use prebuilt images for convenience in the docker hub:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"hadoopsubmarine/kaldi-latest-gpu-base:0.0.1")))}m.isMDXComponent=!0}}]);