blob: 575da18fe598b5596ca7e8eda33932b1f0bd013c [file] [log] [blame]
<!doctype html>
<html lang="zh-cn" dir="ltr" class="docs-wrapper docs-doc-page docs-version-0.6.0 plugin-docs plugin-id-default docs-doc-id-adminDocs/yarn/TestAndTroubleshooting">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.18">
<title data-rh="true">Test and Troubleshooting | Apache Submarine</title><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://submarine.apache.org//zh-cn/docs/0.6.0/adminDocs/yarn/TestAndTroubleshooting"><meta data-rh="true" name="docusaurus_locale" content="zh-cn"><meta data-rh="true" name="docsearch:language" content="zh-cn"><meta data-rh="true" name="docusaurus_version" content="0.6.0"><meta data-rh="true" name="docusaurus_tag" content="docs-default-0.6.0"><meta data-rh="true" name="docsearch:version" content="0.6.0"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-0.6.0"><meta data-rh="true" property="og:title" content="Test and Troubleshooting | Apache Submarine"><meta data-rh="true" name="description" content="&lt;!--"><meta data-rh="true" property="og:description" content="&lt;!--"><link data-rh="true" rel="icon" href="/zh-cn/img/submarine.ico"><link data-rh="true" rel="canonical" href="https://submarine.apache.org//zh-cn/docs/0.6.0/adminDocs/yarn/TestAndTroubleshooting"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//docs/0.6.0/adminDocs/yarn/TestAndTroubleshooting" hreflang="en"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//zh-cn/docs/0.6.0/adminDocs/yarn/TestAndTroubleshooting" hreflang="zh-cn"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//docs/0.6.0/adminDocs/yarn/TestAndTroubleshooting" hreflang="x-default"><link rel="stylesheet" href="/zh-cn/assets/css/styles.80258812.css">
<link rel="preload" href="/zh-cn/assets/js/runtime~main.aaa6cb63.js" as="script">
<link rel="preload" href="/zh-cn/assets/js/main.54762d30.js" as="script">
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<div role="region"><a href="#" class="skipToContent_ZgBM">Skip to main content</a></div><nav class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/zh-cn/"><div class="navbar__logo"><img src="/zh-cn/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_W2Cr themedImage--light_TfLj"><img src="/zh-cn/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_W2Cr themedImage--dark_oUvU"></div><b class="navbar__title">Apache Submarine</b></a><a class="navbar__item navbar__link" href="/zh-cn/docs/0.6.0/gettingStarted/quickstart">文档</a><a class="navbar__item navbar__link" href="/zh-cn/docs/0.6.0/api/environment">API</a><a class="navbar__item navbar__link" href="/zh-cn/docs/0.6.0/download">下载</a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link"><span><svg viewBox="0 0 24 24" width="20" height="20" aria-hidden="true" class="iconLanguage_dNtB"><path fill="currentColor" d="M12.87 15.07l-2.54-2.51.03-.03c1.74-1.94 2.98-4.17 3.71-6.53H17V4h-7V2H8v2H1v1.99h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11.76-2.04zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2l-4.5-12zm-2.62 7l1.62-4.33L19.12 17h-3.24z"></path></svg><span>中文</span></span></a><ul class="dropdown__menu"><li><a href="/docs/0.6.0/adminDocs/yarn/TestAndTroubleshooting" target="_self" rel="noopener noreferrer" class="dropdown__link">English</a></li><li><a href="/zh-cn/docs/0.6.0/adminDocs/yarn/TestAndTroubleshooting" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active">中文</a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link" aria-haspopup="true" aria-expanded="false" role="button" href="/zh-cn/docs/0.6.0/gettingStarted/quickstart">0.6.0</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/zh-cn/docs/next/gettingStarted/quickstart">master 🏃</a></li><li><a class="dropdown__link" href="/zh-cn/docs/gettingStarted/quickstart">0.8.0</a></li><li><a class="dropdown__link" href="/zh-cn/docs/0.7.0/gettingStarted/quickstart">0.7.0</a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/zh-cn/docs/0.6.0/adminDocs/yarn/TestAndTroubleshooting">0.6.0</a></li><li><a class="dropdown__link" href="/zh-cn/versions">All versions</a></li></ul></div><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache 软件基金会<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache 授权<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">感谢<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">安全<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">赞助<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="toggle_S7eR colorModeToggle_vKtC"><button class="clean-btn toggleButton_rCf9 toggleButtonDisabled_Pu9x" type="button" disabled="" title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_v35p"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_nQuB"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Search" aria-label="Search" class="navbar__search-input search-bar"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div class="main-wrapper"><div class="docPage_P2Lg"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_RiI4" type="button"></button><main class="docMainContainer_TCnq docMainContainerEnhanced_WDCb"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_DM6M"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is documentation for <!-- -->Apache Submarine<!-- --> <b>0.6.0</b>, which is no longer actively maintained.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/zh-cn/docs/gettingStarted/quickstart">latest version</a></b> (<!-- -->0.8.0<!-- -->).</div></div><div class="docItemContainer_vinB"><article><span class="theme-doc-version-badge badge badge--secondary">Version: 0.6.0</span><div class="tocCollapsible_jdIR theme-doc-toc-mobile tocMobile_TmEX"><button type="button" class="clean-btn tocCollapsibleButton_Fzxq">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Test and Troubleshooting</h1></header><h2 class="anchor anchorWithStickyNavbar_mojV" id="test-with-a-tensorflow-job">Test with a tensorflow job<a class="hash-link" href="#test-with-a-tensorflow-job" title="Direct link to heading"></a></h2><p>Distributed-shell + GPU + cgroup</p><div class="codeBlockContainer_I0IT language-bash theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-bash codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">..</span><span class="token plain">. </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> job run </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">DOCKER_JAVA_HOME</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">/opt/java </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">DOCKER_HADOOP_HDFS_HOME</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">/hadoop-current --name distributed-tf-gpu </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_NETWORK</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">calico-network </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_docker_image tf-1.13.1-gpu:0.0.1 </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_docker_image tf-1.13.1-cpu:0.0.1 </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path hdfs://</span><span class="token variable" style="color:rgb(191, 199, 213)">${dfs_name_service}</span><span class="token plain">/tmp/cifar-10-data </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --checkpoint_path hdfs://</span><span class="token variable" style="color:rgb(191, 199, 213)">${dfs_name_service}</span><span class="token plain">/user/hadoop/tf-distributed-checkpoint </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps </span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">memory</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">4G,vcores</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">2</span><span class="token plain">,gpu</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;python /test/cifar10_estimator/cifar10_main.py --data-dir=hdfs://</span><span class="token string variable" style="color:rgb(191, 199, 213)">${dfs_name_service}</span><span class="token string" style="color:rgb(195, 232, 141)">/tmp/cifar-10-data --job-dir=hdfs://</span><span class="token string variable" style="color:rgb(191, 199, 213)">${dfs_name_service}</span><span class="token string" style="color:rgb(195, 232, 141)">/tmp/cifar-10-jobdir --num-gpus=0&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources </span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">memory</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">4G,vcores</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">2</span><span class="token plain">,gpu</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">1</span><span class="token plain"> --verbose </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers </span><span class="token number" style="color:rgb(247, 140, 108)">1</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;python /test/cifar10_estimator/cifar10_main.py --data-dir=hdfs://</span><span class="token string variable" style="color:rgb(191, 199, 213)">${dfs_name_service}</span><span class="token string" style="color:rgb(195, 232, 141)">/tmp/cifar-10-data --job-dir=hdfs://</span><span class="token string variable" style="color:rgb(191, 199, 213)">${dfs_name_service}</span><span class="token string" style="color:rgb(195, 232, 141)">/tmp/cifar-10-jobdir --train-steps=500 --eval-batch-size=16 --train-batch-size=16 --sync --num-gpus=1&quot;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h2 class="anchor anchorWithStickyNavbar_mojV" id="issues">Issues:<a class="hash-link" href="#issues" title="Direct link to heading"></a></h2><h3 class="anchor anchorWithStickyNavbar_mojV" id="issue-1-fail-to-start-nodemanager-after-system-reboot">Issue 1: Fail to start nodemanager after system reboot<a class="hash-link" href="#issue-1-fail-to-start-nodemanager-after-system-reboot" title="Direct link to heading"></a></h3><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">2018-09-20 18:54:39,785 ERROR org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor: Failed to bootstrap configured resource subsystems!</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException: Unexpected: Cannot create yarn cgroup Subsystem:cpu Mount points:/proc/mounts User:yarn Path:/sys/fs/cgroup/cpu,cpuacct/hadoop-yarn</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandlerImpl.initializePreMountedCGroupController(CGroupsHandlerImpl.java:425)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandlerImpl.initializeCGroupController(CGroupsHandlerImpl.java:377)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsCpuResourceHandlerImpl.bootstrap(CGroupsCpuResourceHandlerImpl.java:98)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsCpuResourceHandlerImpl.bootstrap(CGroupsCpuResourceHandlerImpl.java:87)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain.bootstrap(ResourceHandlerChain.java:58)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor.init(LinuxContainerExecutor.java:320)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.yarn.server.nodemanager.NodeManager.serviceInit(NodeManager.java:389)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.service.AbstractService.init(AbstractService.java:164)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.yarn.server.nodemanager.NodeManager.initAndStartNodeManager(NodeManager.java:929)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.yarn.server.nodemanager.NodeManager.main(NodeManager.java:997)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2018-09-20 18:54:39,789 INFO org.apache.hadoop.service.AbstractService: Service NodeManager failed in state INITED</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>Solution: Grant user yarn the access to <code>/sys/fs/cgroup/cpu,cpuacct</code>, which is the subfolder of cgroup mount destination.</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">chown :yarn -R /sys/fs/cgroup/cpu,cpuacct</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">chmod g+rwx -R /sys/fs/cgroup/cpu,cpuacct</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>If GPUs are used,the access to cgroup devices folder is neede as well</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">chown :yarn -R /sys/fs/cgroup/devices</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">chmod g+rwx -R /sys/fs/cgroup/devices</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="issue-2-container-executor-permission-denied">Issue 2: container-executor permission denied<a class="hash-link" href="#issue-2-container-executor-permission-denied" title="Direct link to heading"></a></h3><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">2018-09-21 09:36:26,102 WARN org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor: IOException executing command:</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java.io.IOException: Cannot run program &quot;/etc/yarn/sbin/Linux-amd64-64/container-executor&quot;: error=13, Permission denied</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at java.lang.ProcessBuilder.start(ProcessBuilder.java:1048)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.util.Shell.runCommand(Shell.java:938)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.util.Shell.run(Shell.java:901)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:1213)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>Solution: The permission of <code>/etc/yarn/sbin/Linux-amd64-64/container-executor</code> should be 6050</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="issue-3how-to-get-docker-service-log">Issue 3:How to get docker service log<a class="hash-link" href="#issue-3how-to-get-docker-service-log" title="Direct link to heading"></a></h3><p>Solution: we can get docker log with the following command</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">journalctl -u docker</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="issue-4docker-cant-remove-containers-with-errors-like-device-or-resource-busy">Issue 4:docker can&#x27;t remove containers with errors like <code>device or resource busy</code><a class="hash-link" href="#issue-4docker-cant-remove-containers-with-errors-like-device-or-resource-busy" title="Direct link to heading"></a></h3><div class="codeBlockContainer_I0IT language-bash theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-bash codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">$ </span><span class="token function" style="color:rgb(130, 170, 255)">docker</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">rm</span><span class="token plain"> 0bfafa146431</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">Error response from daemon: Unable to remove filesystem </span><span class="token keyword" style="font-style:italic">for</span><span class="token plain"> 0bfafa146431771f6024dcb9775ef47f170edb2f1852f71916ba44209ca6120a: remove /app/docker/containers/0bfafa146431771f6024dcb9775ef47f170edb2f152f71916ba44209ca6120a/shm: device or resource busy</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>Solution: to find which process leads to a <code>device or resource busy</code>, we can add a shell script, named <code>find-busy-mnt.sh</code></p><div class="codeBlockContainer_I0IT language-bash theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-bash codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token shebang important">#!/usr/bin/env bash</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># A simple script to get information about mount points and pids and their</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># mount namespaces.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">if</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"> </span><span class="token variable" style="color:rgb(191, 199, 213)">$#</span><span class="token plain"> -ne </span><span class="token number" style="color:rgb(247, 140, 108)">1</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">;</span><span class="token keyword" style="font-style:italic">then</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">echo</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;Usage: </span><span class="token string variable" style="color:rgb(191, 199, 213)">$0</span><span class="token string" style="color:rgb(195, 232, 141)"> &lt;devicemapper-device-id&gt;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">exit</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">1</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">fi</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">ID</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token variable" style="color:rgb(191, 199, 213)">$1</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">MOUNTS</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token variable" style="color:rgb(191, 199, 213)">`</span><span class="token variable function" style="color:rgb(130, 170, 255)">find</span><span class="token variable" style="color:rgb(191, 199, 213)"> /proc/*/mounts </span><span class="token variable operator" style="color:rgb(137, 221, 255)">|</span><span class="token variable" style="color:rgb(191, 199, 213)"> </span><span class="token variable function" style="color:rgb(130, 170, 255)">xargs</span><span class="token variable" style="color:rgb(191, 199, 213)"> </span><span class="token variable function" style="color:rgb(130, 170, 255)">grep</span><span class="token variable" style="color:rgb(191, 199, 213)"> $ID </span><span class="token variable operator file-descriptor important" style="color:rgb(137, 221, 255)">2</span><span class="token variable operator" style="color:rgb(137, 221, 255)">&gt;</span><span class="token variable" style="color:rgb(191, 199, 213)">/dev/null</span><span class="token variable" style="color:rgb(191, 199, 213)">`</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"> -z </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token string variable" style="color:rgb(191, 199, 213)">$MOUNTS</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">&amp;&amp;</span><span class="token plain"> </span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">echo</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;No pids found&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">&amp;&amp;</span><span class="token plain"> </span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">exit</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">printf</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;PID</span><span class="token string entity" style="color:rgb(195, 232, 141)">\t</span><span class="token string" style="color:rgb(195, 232, 141)">NAME</span><span class="token string entity" style="color:rgb(195, 232, 141)">\t</span><span class="token string entity" style="color:rgb(195, 232, 141)">\t</span><span class="token string" style="color:rgb(195, 232, 141)">MNTNS</span><span class="token string entity" style="color:rgb(195, 232, 141)">\n</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">echo</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token string variable" style="color:rgb(191, 199, 213)">$MOUNTS</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">|</span><span class="token plain"> </span><span class="token keyword" style="font-style:italic">while</span><span class="token plain"> </span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">read</span><span class="token plain"> LINE</span><span class="token punctuation" style="color:rgb(199, 146, 234)">;</span><span class="token plain"> </span><span class="token keyword" style="font-style:italic">do</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">PID</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token variable" style="color:rgb(191, 199, 213)">`</span><span class="token variable builtin class-name" style="color:rgb(255, 203, 107)">echo</span><span class="token variable" style="color:rgb(191, 199, 213)"> $LINE </span><span class="token variable operator" style="color:rgb(137, 221, 255)">|</span><span class="token variable" style="color:rgb(191, 199, 213)"> </span><span class="token variable function" style="color:rgb(130, 170, 255)">cut</span><span class="token variable" style="color:rgb(191, 199, 213)"> -d </span><span class="token variable string" style="color:rgb(195, 232, 141)">&quot;:&quot;</span><span class="token variable" style="color:rgb(191, 199, 213)"> -f1 </span><span class="token variable operator" style="color:rgb(137, 221, 255)">|</span><span class="token variable" style="color:rgb(191, 199, 213)"> </span><span class="token variable function" style="color:rgb(130, 170, 255)">cut</span><span class="token variable" style="color:rgb(191, 199, 213)"> -d </span><span class="token variable string" style="color:rgb(195, 232, 141)">&quot;/&quot;</span><span class="token variable" style="color:rgb(191, 199, 213)"> -f3</span><span class="token variable" style="color:rgb(191, 199, 213)">`</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Ignore self and thread-self</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">if</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token string variable" style="color:rgb(191, 199, 213)">$PID</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">==</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;self&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">||</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token string variable" style="color:rgb(191, 199, 213)">$PID</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token plain"> </span><span class="token operator" style="color:rgb(137, 221, 255)">==</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;thread-self&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">;</span><span class="token plain"> </span><span class="token keyword" style="font-style:italic">then</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">continue</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">fi</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">NAME</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token variable" style="color:rgb(191, 199, 213)">`</span><span class="token variable function" style="color:rgb(130, 170, 255)">ps</span><span class="token variable" style="color:rgb(191, 199, 213)"> -q $PID -o </span><span class="token variable assign-left variable" style="color:rgb(191, 199, 213)">comm</span><span class="token variable operator" style="color:rgb(137, 221, 255)">=</span><span class="token variable" style="color:rgb(191, 199, 213)">`</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token assign-left variable" style="color:rgb(191, 199, 213)">MNTNS</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token variable" style="color:rgb(191, 199, 213)">`</span><span class="token variable" style="color:rgb(191, 199, 213)">readlink /proc/$PID/ns/mnt</span><span class="token variable" style="color:rgb(191, 199, 213)">`</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token builtin class-name" style="color:rgb(255, 203, 107)">printf</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;%s</span><span class="token string entity" style="color:rgb(195, 232, 141)">\t</span><span class="token string" style="color:rgb(195, 232, 141)">%s</span><span class="token string entity" style="color:rgb(195, 232, 141)">\t</span><span class="token string entity" style="color:rgb(195, 232, 141)">\t</span><span class="token string" style="color:rgb(195, 232, 141)">%s</span><span class="token string entity" style="color:rgb(195, 232, 141)">\n</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token string variable" style="color:rgb(191, 199, 213)">$PID</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token string variable" style="color:rgb(191, 199, 213)">$NAME</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token string variable" style="color:rgb(191, 199, 213)">$MNTNS</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">done</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>Kill the process by pid, which is found by the script</p><div class="codeBlockContainer_I0IT language-bash theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-bash codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">$ </span><span class="token function" style="color:rgb(130, 170, 255)">chmod</span><span class="token plain"> +x find-busy-mnt.sh</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">./find-busy-mnt.sh 0bfafa146431771f6024dcb9775ef47f170edb2f152f71916ba44209ca6120a</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># PID NAME MNTNS</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># 5007 ntpd mnt:[4026533598]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">$ </span><span class="token function" style="color:rgb(130, 170, 255)">kill</span><span class="token plain"> -9 </span><span class="token number" style="color:rgb(247, 140, 108)">5007</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="issue-5yarn-failed-to-start-containers">Issue 5:Yarn failed to start containers<a class="hash-link" href="#issue-5yarn-failed-to-start-containers" title="Direct link to heading"></a></h3><p>if the number of GPUs required by applications is larger than the number of GPUs in the cluster, there would be some containers can&#x27;t be created.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/submarine/edit/master/website/versioned_docs/version-0.6.0/adminDocs/yarn/TestAndTroubleshooting.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_dcUD" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_foO9"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"></div><div class="pagination-nav__item pagination-nav__item--next"></div></nav></div></div><div class="col col--3"><div class="tableOfContents_cNA8 thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#test-with-a-tensorflow-job" class="table-of-contents__link toc-highlight">Test with a tensorflow job</a></li><li><a href="#issues" class="table-of-contents__link toc-highlight">Issues:</a><ul><li><a href="#issue-1-fail-to-start-nodemanager-after-system-reboot" class="table-of-contents__link toc-highlight">Issue 1: Fail to start nodemanager after system reboot</a></li><li><a href="#issue-2-container-executor-permission-denied" class="table-of-contents__link toc-highlight">Issue 2: container-executor permission denied</a></li><li><a href="#issue-3how-to-get-docker-service-log" class="table-of-contents__link toc-highlight">Issue 3:How to get docker service log</a></li><li><a href="#issue-4docker-cant-remove-containers-with-errors-like-device-or-resource-busy" class="table-of-contents__link toc-highlight">Issue 4:docker can&#39;t remove containers with errors like <code>device or resource busy</code></a></li><li><a href="#issue-5yarn-failed-to-start-containers" class="table-of-contents__link toc-highlight">Issue 5:Yarn failed to start containers</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">文档</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/zh-cn/docs/gettingStarted/quickstart">入门教程</a></li><li class="footer__item"><a class="footer__link-item" href="/zh-cn/docs/api/environment">API 文档</a></li></ul></div><div class="col footer__col"><div class="footer__title">社区</div><ul class="footer__items"><li class="footer__item"><a href="https://stackoverflow.com/questions/tagged/apache-submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Stack Overflow<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://s.apache.org/slack-invite" target="_blank" rel="noopener noreferrer" class="footer__link-item">Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">更多</div><ul class="footer__items"><li class="footer__item"><a href="https://medium.com/@apache.submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">博客</a></li><li class="footer__item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_gHmE"><img src="https://hadoop.apache.org/asf_logo_wide.png" alt="Apache Open Source Logo" class="themedImage_W2Cr themedImage--light_TfLj footer__logo"><img src="https://hadoop.apache.org/asf_logo_wide.png" alt="Apache Open Source Logo" class="themedImage_W2Cr themedImage--dark_oUvU footer__logo"></a></div><div class="footer__copyright">Apache Submarine, Submarine, Apache, the Apache feather logo, and the Apache Submarine project logo are
either registered trademarks or trademarks of the Apache Software Foundation in the United States and other
countries.<br> Copyright © 2023 Apache Submarine is Apache2 Licensed software.</div></div></div></footer></div>
<script src="/zh-cn/assets/js/runtime~main.aaa6cb63.js"></script>
<script src="/zh-cn/assets/js/main.54762d30.js"></script>
</body>
</html>