blob: 84c863998677f3c24ffee621e1b87efdc2477a78 [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-0.7.0 plugin-docs plugin-id-default docs-doc-id-designDocs/wip-designs/submarine-clusterServer">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.18">
<title data-rh="true">Cluster Server Design - High-Availability | Apache Submarine</title><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://submarine.apache.org//docs/0.7.0/designDocs/wip-designs/submarine-clusterServer"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="0.7.0"><meta data-rh="true" name="docusaurus_tag" content="docs-default-0.7.0"><meta data-rh="true" name="docsearch:version" content="0.7.0"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-0.7.0"><meta data-rh="true" property="og:title" content="Cluster Server Design - High-Availability | Apache Submarine"><meta data-rh="true" name="description" content="&lt;!--"><meta data-rh="true" property="og:description" content="&lt;!--"><link data-rh="true" rel="icon" href="/img/submarine.ico"><link data-rh="true" rel="canonical" href="https://submarine.apache.org//docs/0.7.0/designDocs/wip-designs/submarine-clusterServer"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//docs/0.7.0/designDocs/wip-designs/submarine-clusterServer" hreflang="en"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//zh-cn/docs/0.7.0/designDocs/wip-designs/submarine-clusterServer" hreflang="zh-cn"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//docs/0.7.0/designDocs/wip-designs/submarine-clusterServer" hreflang="x-default"><link rel="stylesheet" href="/assets/css/styles.80258812.css">
<link rel="preload" href="/assets/js/runtime~main.9d177e25.js" as="script">
<link rel="preload" href="/assets/js/main.7cd2eed3.js" as="script">
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<div role="region"><a href="#" class="skipToContent_ZgBM">Skip to main content</a></div><nav class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_W2Cr themedImage--light_TfLj"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_W2Cr themedImage--dark_oUvU"></div><b class="navbar__title">Apache Submarine</b></a><a class="navbar__item navbar__link navbar__link--active" href="/docs/0.7.0/gettingStarted/quickstart">Docs</a><a class="navbar__item navbar__link" href="/docs/0.7.0/api/environment">API</a><a class="navbar__item navbar__link" href="/docs/0.7.0/download">Download</a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link"><span><svg viewBox="0 0 24 24" width="20" height="20" aria-hidden="true" class="iconLanguage_dNtB"><path fill="currentColor" d="M12.87 15.07l-2.54-2.51.03-.03c1.74-1.94 2.98-4.17 3.71-6.53H17V4h-7V2H8v2H1v1.99h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11.76-2.04zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2l-4.5-12zm-2.62 7l1.62-4.33L19.12 17h-3.24z"></path></svg><span>English</span></span></a><ul class="dropdown__menu"><li><a href="/docs/0.7.0/designDocs/wip-designs/submarine-clusterServer" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active">English</a></li><li><a href="/zh-cn/docs/0.7.0/designDocs/wip-designs/submarine-clusterServer" target="_self" rel="noopener noreferrer" class="dropdown__link">中文</a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link" aria-haspopup="true" aria-expanded="false" role="button" href="/docs/0.7.0/gettingStarted/quickstart">0.7.0</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/gettingStarted/quickstart">master 🏃</a></li><li><a class="dropdown__link" href="/docs/gettingStarted/quickstart">0.8.0</a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/0.7.0/designDocs/wip-designs/submarine-clusterServer">0.7.0</a></li><li><a class="dropdown__link" href="/docs/0.6.0/designDocs/wip-designs/submarine-clusterServer">0.6.0</a></li><li><a class="dropdown__link" href="/versions">All versions</a></li></ul></div><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache Software Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="toggle_S7eR colorModeToggle_vKtC"><button class="clean-btn toggleButton_rCf9 toggleButtonDisabled_Pu9x" type="button" disabled="" title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_v35p"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_nQuB"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Search" aria-label="Search" class="navbar__search-input search-bar"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div class="main-wrapper"><div class="docPage_P2Lg"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_RiI4" type="button"></button><aside class="theme-doc-sidebar-container docSidebarContainer_rKC_"><div class="sidebar_RiAD"><nav class="menu thin-scrollbar menu_izAj"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/0.7.0/gettingStarted/quickstart">Getting Started</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/0.7.0/userDocs/submarine-sdk/experiment-client">User Docs</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/0.7.0/devDocs/">Developer Docs</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/0.7.0/community/">Community</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" aria-expanded="true" href="/docs/0.7.0/designDocs/architecture-and-requirements">Design Docs</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/0.7.0/designDocs/architecture-and-requirements">Architecture and Requirment</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/0.7.0/designDocs/implementation-notes">Implementation Notes</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/0.7.0/designDocs/environments-implementation">Environments Implementation</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/0.7.0/designDocs/experiment-implementation">Experiment Implementation</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/0.7.0/designDocs/notebook-implementation">Notebook Implementation</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/0.7.0/designDocs/storage-implementation">Storage Implementation</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" tabindex="0" href="/docs/0.7.0/designDocs/submarine-server/architecture">Submarine Server</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" aria-expanded="true" tabindex="0" href="/docs/0.7.0/designDocs/wip-designs/submarine-launcher">WIP Design Docs</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/0.7.0/designDocs/wip-designs/submarine-launcher">Submarine Launcher</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/0.7.0/designDocs/wip-designs/submarine-clusterServer">Cluster Server Design - High-Availability</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/0.7.0/designDocs/wip-designs/security-implementation">Security Implementation</a></li></ul></li></ul></li></ul></nav></div></aside><main class="docMainContainer_TCnq"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_DM6M"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is documentation for <!-- -->Apache Submarine<!-- --> <b>0.7.0</b>, which is no longer actively maintained.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/docs/gettingStarted/quickstart">latest version</a></b> (<!-- -->0.8.0<!-- -->).</div></div><div class="docItemContainer_vinB"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Xlws" aria-label="breadcrumbs"><ul class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList"><li class="breadcrumbs__item"><a class="breadcrumbs__link" href="/">🏠</a></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item"><span class="breadcrumbs__link" itemprop="item name">Design Docs</span><meta itemprop="position" content="1"></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item"><span class="breadcrumbs__link" itemprop="item name">WIP Design Docs</span><meta itemprop="position" content="2"></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link" itemprop="item name">Cluster Server Design - High-Availability</span><meta itemprop="position" content="3"></li></ul></nav><span class="theme-doc-version-badge badge badge--secondary">Version: 0.7.0</span><div class="tocCollapsible_jdIR theme-doc-toc-mobile tocMobile_TmEX"><button type="button" class="clean-btn tocCollapsibleButton_Fzxq">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Cluster Server Design - High-Availability</h1></header><div class="admonition admonition-warning alert alert--danger"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="12" height="16" viewBox="0 0 12 16"><path fill-rule="evenodd" d="M5.05.31c.81 2.17.41 3.38-.52 4.31C3.55 5.67 1.98 6.45.9 7.98c-1.45 2.05-1.7 6.53 3.53 7.7-2.2-1.16-2.67-4.52-.3-6.61-.61 2.03.53 3.33 1.94 2.86 1.39-.47 2.3.53 2.27 1.67-.02.78-.31 1.44-1.13 1.81 3.42-.59 4.78-3.42 4.78-5.56 0-2.84-2.53-3.22-1.25-5.61-1.52.13-2.03 1.13-1.89 2.75.09 1.08-1.02 1.8-1.86 1.33-.67-.41-.66-1.19-.06-1.78C8.18 5.31 8.68 2.45 5.05.32L5.03.3l.02.01z"></path></svg></span>warning</h5></div><div class="admonition-content"><p>Please note that this design doc is working-in-progress and need more works to complete. </p></div></div><h2 class="anchor anchorWithStickyNavbar_mojV" id="below-is-existing-proposal">Below is existing proposal:<a class="hash-link" href="#below-is-existing-proposal" title="Direct link to heading"></a></h2><h2 class="anchor anchorWithStickyNavbar_mojV" id="introduction">Introduction<a class="hash-link" href="#introduction" title="Direct link to heading"></a></h2><p>The Submarine system contains a total of two daemon services, Submarine Server and Workbench Server.</p><p>Submarine Server mainly provides job submission, job scheduling, job status monitoring, and model online service for Submarine.</p><p>Workbench Server is mainly for algorithm users to provide algorithm development, Python/Spark interpreter operation, and other services through Notebook.</p><p>The goal of the Submarine project is to provide high availability and high-reliability services for big data processing,
algorithm development, job scheduling, model online services, model batch, and incremental updates. </p><p>In addition to the high availability of big data and machine learning frameworks,
the high availability of Submarine Server and Workbench Server itself is a key consideration.</p><h2 class="anchor anchorWithStickyNavbar_mojV" id="requirement">Requirement<a class="hash-link" href="#requirement" title="Direct link to heading"></a></h2><h3 class="anchor anchorWithStickyNavbar_mojV" id="cluster-metadata-center">Cluster Metadata Center<a class="hash-link" href="#cluster-metadata-center" title="Direct link to heading"></a></h3><p>Multiple Submarine (or Workbench) Server processes create a Submarine Cluster through the RAFT algorithm library. </p><p>The cluster internally maintains a metadata center. All servers can operate the metadata. </p><p>The RAFT algorithm ensures that multiple processes are simultaneously co-located. </p><p>A data modification will not cause problems such as mutual coverage and dirty data.</p><p>This metadata center stores data by means of key-value pairs. it can store/support a variety of data,
but it should be noted that metadata is only suitable for storing small amounts of data and cannot be used to replace data storage.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="service-discovery">Service discovery<a class="hash-link" href="#service-discovery" title="Direct link to heading"></a></h3><p>By storing the information of the service or process in the metadata center, we can easily find the information of the service or process we need in any place,
for example, the IP address and port where the Python interpreter will be the process. Information is stored in metadata,
and other services can easily find process information through process IDs and connect to provide service discovery capabilities.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="cluster-event">Cluster event<a class="hash-link" href="#cluster-event" title="Direct link to heading"></a></h3><p>In the entire Submarine cluster, the servers can communicate with each other and other child processes to send cluster events to each other. </p><p>The service or process processes the corresponding programs according to the cluster events. For example,
the Workbench Server can be managed to Python. The interpreter process sends a shutdown event that controls the operation of the services and individual subprocesses throughout the cluster.</p><p>Cluster events support both broadcast and separate delivery capabilities.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="independence">Independence<a class="hash-link" href="#independence" title="Direct link to heading"></a></h3><p>We implement Submarine&#x27;s clustering capabilities through the RAFT algorithm library, without relying on any external services (e.g. Zookeeper, Etcd, etc.)</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="disadvantages">Disadvantages<a class="hash-link" href="#disadvantages" title="Direct link to heading"></a></h3><p>Because the RAFT algorithm requires more than half of the servers available to ensure the normality of the RAFT algorithm,
if we need to turn on the clustering capabilities of Submarine (Workbench) Server, when more than half of the servers are unavailable,
some programs may appear abnormal. Of course, we also detected this in the system, downgrading the system or refusing to provide service status.</p><h2 class="anchor anchorWithStickyNavbar_mojV" id="system-design">System design<a class="hash-link" href="#system-design" title="Direct link to heading"></a></h2><h3 class="anchor anchorWithStickyNavbar_mojV" id="universal-design">Universal design<a class="hash-link" href="#universal-design" title="Direct link to heading"></a></h3><p>Modular design, Submarine (Workbench) Server exists in the Submarine system, these two services need to provide clustering capabilities,
so we abstract the cluster function into a separate module for development so that Submarine (Workbench) Server can reuse the cluster function module.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="clusterconfigure">ClusterConfigure<a class="hash-link" href="#clusterconfigure" title="Direct link to heading"></a></h3><p>Add a <code>submarine.server.addr</code> and <code>workbench.server.addr</code> configuration items in <code>submarine-site.xml</code>, <code>submarine.server.addr=ip1, ip2, ip3</code>,
through the IP list, the RAFT algorithm module in the server process can Cluster with other server processes.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="clusterserver">ClusterServer<a class="hash-link" href="#clusterserver" title="Direct link to heading"></a></h3><ul><li><p>The ClusterServer module encapsulates the RAFT algorithm module, which can create a service cluster and read and write metadata based on the two configuration items submarine.server.addr or workbench.server.addr.</p></li><li><p>The cluster management service runs in each submarine server;</p></li><li><p>The cluster management service establishes a cluster by using the atomix RaftServer class of the Raft algorithm library, maintains the ClusterStateMachine,
and manages the service state metadata of each submarine server through the PutCommand, GetQuery, and DeleteCommand operation commands.</p></li></ul><h3 class="anchor anchorWithStickyNavbar_mojV" id="clusterclient">ClusterClient<a class="hash-link" href="#clusterclient" title="Direct link to heading"></a></h3><ul><li><p>The ClusterClient module encapsulates the RAFT algorithm client module, which can communicate with the cluster according to the two configuration items <code>submarine.server.addr</code> or <code>workbench.server.addr</code>,
read and write metadata, and write the IP and port information of the client process. Into the cluster&#x27;s metadata center.</p></li><li><p>The cluster management client runs in each submarine server and submarine Interpreter process;</p></li><li><p>The cluster management client manages the submarine server and submarine Interpreter process state (metadata information)
in the ClusterStateMachine by using the atomix RaftClient class of the Raft library to connect to the atomix RaftServer. </p></li><li><p>When the submarine server and Submarine Interpreter processes are started, they are added to the ClusterStateMachine and are removed from the ClusterStateMachine </p></li><li><p>when the Submarine Server and Submarine Interpreter processes are closed.</p></li></ul><h3 class="anchor anchorWithStickyNavbar_mojV" id="clustermetadata">ClusterMetadata<a class="hash-link" href="#clustermetadata" title="Direct link to heading"></a></h3><p>Metadata stores metadata information in a KV key-value pair。
ServerMeta:key=&#x27;host:port&#x27;,value= {SERVER_HOST=...,SERVER_PORT=...,...}</p><table><thead><tr><th>Name</th><th>Description</th></tr></thead><tbody><tr><td>SUBMARINE_SERVER_HOST</td><td>Submarine server IP</td></tr><tr><td>SUBMARINE_SERVER_PORT</td><td>Submarine server port</td></tr><tr><td>WORKBENCH_SERVER_HOST</td><td>Submarine workbench server IP</td></tr><tr><td>WORKBENCH_SERVER_PORT</td><td>Submarine workbench server port</td></tr></tbody></table><p>InterpreterMeta:key=InterpreterGroupId,value={INTP_TSERVER_HOST=...,...}</p><table><thead><tr><th>Name</th><th>Description</th></tr></thead><tbody><tr><td>INTP_TSERVER_HOST</td><td>Submarine Interpreter Thrift IP</td></tr><tr><td>INTP_TSERVER_PORT</td><td>Submarine Interpreter Thrift port</td></tr><tr><td>INTP_START_TIME</td><td>Submarine Interpreter start time</td></tr><tr><td>HEARTBEAT</td><td>Submarine Interpreter heartbeat time</td></tr></tbody></table><h3 class="anchor anchorWithStickyNavbar_mojV" id="network-fault-tolerance">Network fault tolerance<a class="hash-link" href="#network-fault-tolerance" title="Direct link to heading"></a></h3><p>In a distributed environment, there may be network anomalies, network delays, or service exceptions. After submitting metadata to the cluster,
check whether the submission is successful. After the submission fails, save the metadata in the local message queue. A separate commit thread to retry;</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="cluster-monitoring">Cluster monitoring<a class="hash-link" href="#cluster-monitoring" title="Direct link to heading"></a></h3><p>The cluster needs to monitor whether the Submarine Server and Submarine-Interpreter processes are working properly.</p><p>The Submarine Server and Submarine Interpreter processes periodically send heartbeats to update their own timestamps in the cluster metadata. </p><p>The Submarine Server with Leader identity periodically checks the timestamps of the Submarine Server and Submarine Interpreter processes to clear the timeout services and processes.</p><ol><li><p>The cluster monitoring module runs in each Submarine Server and Submarine Interpreter process,
periodically sending heartbeat data of the service or process to the cluster;</p></li><li><p>When the cluster monitoring module runs in Submarine Server, it sends the heartbeat to the cluster&#x27;s ClusterStateMachine.
If the cluster does not receive heartbeat information for a long time, Indicates that the service or process is abnormal and unavailable.</p></li><li><p>Resource usage statistics strategy, in order to avoid the instantaneous high peak and low peak of the server,
the cluster monitoring will collect the average resource usage in the most recent period for reporting, and improve the reasonable line and effectiveness of the server resources as much as possible;</p></li><li><p>When the cluster monitoring module runs in the Submarine Server, it checks the heartbeat data of each Submarine Server and Submarine Interpreter process.
If it times out, it considers that the service or process is abnormally unavailable and removes it from the cluster.</p></li></ol><h3 class="anchor anchorWithStickyNavbar_mojV" id="atomix-raft-algorithm-library">Atomix Raft algorithm library<a class="hash-link" href="#atomix-raft-algorithm-library" title="Direct link to heading"></a></h3><p>In order to reduce the deployment complexity of distributed mode, submarine server does not use Zookeeper to build a distributed cluster.
Multiple submarine server groups are built into distributed clusters by using the Raft algorithm in submarine server.
The Raft algorithm is involved by atomix lib of atomix that has passed Jepsen consistency verification.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="synchronize-workbench-notes">Synchronize workbench notes<a class="hash-link" href="#synchronize-workbench-notes" title="Direct link to heading"></a></h3><p>In cluster mode, the user creates, modifies, and deletes the note on any of the servers.
All need to be notified to all the servers in the cluster to synchronize the update of Notebook.
Failure to do so will result in the user not being able to continue while switching to another server.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="listen-for-note-update-events">Listen for note update events<a class="hash-link" href="#listen-for-note-update-events" title="Direct link to heading"></a></h3><p>Listen for the NEW_NOTE, DEL_NOTE, REMOVE_NOTE_TO_TRASH ... event of the notebook in the NotebookServer#onMessage() function.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="broadcast-note-update-event">Broadcast note update event<a class="hash-link" href="#broadcast-note-update-event" title="Direct link to heading"></a></h3><p>The note is refreshed by notifying the event to all Submarine servers in the cluster via messaging Service.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/submarine/edit/master/website/versioned_docs/version-0.7.0/designDocs/wip-designs/submarine-clusterServer.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_dcUD" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_foO9"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/docs/0.7.0/designDocs/wip-designs/submarine-launcher"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Submarine Launcher</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/docs/0.7.0/designDocs/wip-designs/security-implementation"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Security Implementation</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_cNA8 thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#below-is-existing-proposal" class="table-of-contents__link toc-highlight">Below is existing proposal:</a></li><li><a href="#introduction" class="table-of-contents__link toc-highlight">Introduction</a></li><li><a href="#requirement" class="table-of-contents__link toc-highlight">Requirement</a><ul><li><a href="#cluster-metadata-center" class="table-of-contents__link toc-highlight">Cluster Metadata Center</a></li><li><a href="#service-discovery" class="table-of-contents__link toc-highlight">Service discovery</a></li><li><a href="#cluster-event" class="table-of-contents__link toc-highlight">Cluster event</a></li><li><a href="#independence" class="table-of-contents__link toc-highlight">Independence</a></li><li><a href="#disadvantages" class="table-of-contents__link toc-highlight">Disadvantages</a></li></ul></li><li><a href="#system-design" class="table-of-contents__link toc-highlight">System design</a><ul><li><a href="#universal-design" class="table-of-contents__link toc-highlight">Universal design</a></li><li><a href="#clusterconfigure" class="table-of-contents__link toc-highlight">ClusterConfigure</a></li><li><a href="#clusterserver" class="table-of-contents__link toc-highlight">ClusterServer</a></li><li><a href="#clusterclient" class="table-of-contents__link toc-highlight">ClusterClient</a></li><li><a href="#clustermetadata" class="table-of-contents__link toc-highlight">ClusterMetadata</a></li><li><a href="#network-fault-tolerance" class="table-of-contents__link toc-highlight">Network fault tolerance</a></li><li><a href="#cluster-monitoring" class="table-of-contents__link toc-highlight">Cluster monitoring</a></li><li><a href="#atomix-raft-algorithm-library" class="table-of-contents__link toc-highlight">Atomix Raft algorithm library</a></li><li><a href="#synchronize-workbench-notes" class="table-of-contents__link toc-highlight">Synchronize workbench notes</a></li><li><a href="#listen-for-note-update-events" class="table-of-contents__link toc-highlight">Listen for note update events</a></li><li><a href="#broadcast-note-update-event" class="table-of-contents__link toc-highlight">Broadcast note update event</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Docs</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/gettingStarted/quickstart">Getting Started</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/api/environment">API docs</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a href="https://stackoverflow.com/questions/tagged/apache-submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Stack Overflow<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://s.apache.org/slack-invite" target="_blank" rel="noopener noreferrer" class="footer__link-item">Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">More</div><ul class="footer__items"><li class="footer__item"><a href="https://medium.com/@apache.submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Blog</a></li><li class="footer__item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_gHmE"><img src="https://hadoop.apache.org/asf_logo_wide.png" alt="Apache Open Source Logo" class="themedImage_W2Cr themedImage--light_TfLj footer__logo"><img src="https://hadoop.apache.org/asf_logo_wide.png" alt="Apache Open Source Logo" class="themedImage_W2Cr themedImage--dark_oUvU footer__logo"></a></div><div class="footer__copyright">Apache Submarine, Submarine, Apache, the Apache feather logo, and the Apache Submarine project logo are
either registered trademarks or trademarks of the Apache Software Foundation in the United States and other
countries.<br> Copyright © 2023 Apache Submarine is Apache2 Licensed software.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.9d177e25.js"></script>
<script src="/assets/js/main.7cd2eed3.js"></script>
</body>
</html>