blob: 97f62caf141545ea70bff08816b5bfcb7f2c5599 [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper plugin-docs plugin-id-default docs-version-1.2.0 docs-doc-page docs-doc-id-design/cache_removal" data-has-hydrated="false">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v3.5.2">
<title data-rh="true">Scheduler cache removal design | Apache YuniKorn</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://yunikorn.apache.org/img/logo/yunikorn-logo-main.png"><meta data-rh="true" name="twitter:image" content="https://yunikorn.apache.org/img/logo/yunikorn-logo-main.png"><meta data-rh="true" property="og:url" content="https://yunikorn.apache.org/docs/1.2.0/design/cache_removal"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="1.2.0"><meta data-rh="true" name="docusaurus_tag" content="docs-default-1.2.0"><meta data-rh="true" name="docsearch:version" content="1.2.0"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-1.2.0"><meta data-rh="true" property="og:title" content="Scheduler cache removal design | Apache YuniKorn"><meta data-rh="true" name="description" content="&lt;!--"><meta data-rh="true" property="og:description" content="&lt;!--"><link data-rh="true" rel="icon" href="/img/yunikorn.ico"><link data-rh="true" rel="canonical" href="https://yunikorn.apache.org/docs/1.2.0/design/cache_removal"><link data-rh="true" rel="alternate" href="https://yunikorn.apache.org/docs/1.2.0/design/cache_removal" hreflang="en"><link data-rh="true" rel="alternate" href="https://yunikorn.apache.org/docs/1.2.0/design/cache_removal" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://Q1V951BG2V-dsn.algolia.net" crossorigin="anonymous"><link rel="search" type="application/opensearchdescription+xml" title="Apache YuniKorn" href="/opensearch.xml"><link rel="stylesheet" href="/assets/css/styles.9ba51dea.css">
<script src="/assets/js/runtime~main.4cabfa25.js" defer="defer"></script>
<script src="/assets/js/main.239d0b84.js" defer="defer"></script>
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme")}catch(t){}}();t(null!==e?e:"light")}(),function(){try{const a=new URLSearchParams(window.location.search).entries();for(var[t,e]of a)if(t.startsWith("docusaurus-data-")){var n=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(n,e)}}catch(t){}}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus"><div role="region" aria-label="Skip to main content"><a class="skipToContent_WRxJ" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_iQDU" style="background-color:#fafbfc;color:#091E42" role="banner"><div class="announcementBarPlaceholder_adlA"></div><div class="content_FQ9N announcementBarContent_rCew">1.6.3 has been released, check the <a href="/community/download">DOWNLOADS</a>.</div><button type="button" aria-label="Close" class="clean-btn close closeButton_PH0D announcementBarClose_l50r"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/logo/yunikorn_blue_logo.png" alt="YuniKorn Site Logo" class="themedComponent_xEWn themedComponent--light_o9Nf"><img src="/img/logo/yunikorn_white_logo.png" alt="YuniKorn Site Logo" class="themedComponent_xEWn themedComponent--dark_DSW7"></div><b class="navbar__title text--truncate">Apache YuniKorn</b></a><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs/">Docs</a><a class="navbar__item navbar__link" href="/community/roadmap">Roadmap</a><a class="navbar__item navbar__link" href="/community/download">Download</a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Community</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/community/get_involved">Get Involved</a></li><li><a class="dropdown__link" href="/community/how_to_contribute">How to Contribute</a></li><li><a class="dropdown__link" href="/community/coding_guidelines">Coding Guidelines</a></li><li><a class="dropdown__link" href="/community/reporting_issues">Reporting Issues</a></li><li><a class="dropdown__link" href="/community/release_procedure">Release Procedure</a></li><li><a class="dropdown__link" href="/community/events">Events</a></li><li><a class="dropdown__link" href="/community/people">People</a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache Software Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsors<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Privacy Policy<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a aria-current="page" class="navbar__link active" aria-haspopup="true" aria-expanded="false" role="button" href="/docs/1.2.0/design/cache_removal">1.2.0</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/design/cache_removal">Next</a></li><li><a class="dropdown__link" href="/docs/design/cache_removal">1.6.3</a></li><li><a class="dropdown__link" href="/docs/1.6.2/design/cache_removal">1.6.2</a></li><li><a class="dropdown__link" href="/docs/1.6.1/design/cache_removal">1.6.1</a></li><li><a class="dropdown__link" href="/docs/1.6.0/design/cache_removal">1.6.0</a></li><li><a class="dropdown__link" href="/docs/1.5.2/design/cache_removal">1.5.2</a></li><li><a class="dropdown__link" href="/docs/1.5.1/design/cache_removal">1.5.1</a></li><li><a class="dropdown__link" href="/docs/1.5.0/design/cache_removal">1.5.0</a></li><li><a class="dropdown__link" href="/docs/1.4.0/design/cache_removal">1.4.0</a></li><li><a class="dropdown__link" href="/docs/1.3.0/design/cache_removal">1.3.0</a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/1.2.0/design/cache_removal">1.2.0</a></li><li><a class="dropdown__link" href="/docs/1.1.0/design/cache_removal">1.1.0</a></li><li><a class="dropdown__link" href="/docs/1.0.0/design/cache_removal">1.0.0</a></li></ul></div><a href="https://github.com/apache/yunikorn-core" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"></a><div class="toggle_wrt9 colorModeToggle_C9lx"><button class="clean-btn toggleButton_I_1w toggleButtonDisabled_mJvf" type="button" disabled="" title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)" aria-live="polite"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_ykP0"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_E3Gk"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="navbarSearchContainer_sJwN"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20" aria-hidden="true"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_NHDD"><div class="docsWrapper_c0tn"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_ZTj1" type="button"></button><div class="docRoot_vtCj"><aside class="theme-doc-sidebar-container docSidebarContainer_Y6GP"><div class="sidebarViewport_wPSD"><div class="sidebar_edep"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_mTkH menuWithAnnouncementBar_eLT9"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" href="/docs/1.2.0/">Get Started</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" href="/docs/1.2.0/user_guide/deployment_modes">User Guide</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" role="button" aria-expanded="true" href="/docs/1.2.0/developer_guide/env_setup">Developer Guide</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/developer_guide/env_setup">Dev Environment Setup</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/developer_guide/build">Build and Run</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/developer_guide/dependencies">Go module updates</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/developer_guide/deployment">Deploy to Kubernetes</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/developer_guide/openshift_development">Development in CodeReady Containers</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" role="button" aria-expanded="true" tabindex="0" href="/docs/1.2.0/design/architecture">Designs</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/architecture">Architecture</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/scheduler_plugin">K8s Scheduler Plugin</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/gang_scheduling">Gang scheduling design</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/user_group">User/Group handling and lookup design</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/user_based_resource_usage_tracking">User Based Resource Usage Tracking</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/interface_message_simplification">Simplifying Interface Messages</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/1.2.0/design/cache_removal">Scheduler cache removal design</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/simple_preemptor">DaemonSet Scheduling using Simple Preemptor</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/generic_resource">Generic Resource Types in Namespace Quota</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/pluggable_app_management">Pluggable App Management</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/priority_scheduling">Priority Scheduling</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/resilience">Resilience</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/state_aware_scheduling">Batch Workloads Ordering with StateAware Policy</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/scheduler_object_states">Scheduler Object States</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/config_v2">Configuration V2</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/1.2.0/design/scheduler_configuration">Scheduler Configuration</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" tabindex="0" href="/docs/1.2.0/archived_design/k8shim">Archived Designs</a></div></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" href="/docs/1.2.0/performance/evaluate_perf_function_with_kubemark">Performance</a></div></li></ul></nav></div></div></aside><main class="docMainContainer_I6ad"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_WqSy"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is documentation for <!-- -->Apache YuniKorn<!-- --> <b>1.2.0</b>, which is no longer actively maintained.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/docs/design/cache_removal">latest version</a></b> (<!-- -->1.6.3<!-- -->).</div></div><div class="docItemContainer_Esva"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_mFzf" aria-label="Breadcrumbs"><ul class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList"><li class="breadcrumbs__item"><a aria-label="Home page" class="breadcrumbs__link" href="/"><svg viewBox="0 0 24 24" class="breadcrumbHomeIcon_hTEa"><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill="currentColor"></path></svg></a></li><li class="breadcrumbs__item"><span class="breadcrumbs__link">Developer Guide</span><meta itemprop="position" content="1"></li><li class="breadcrumbs__item"><span class="breadcrumbs__link">Designs</span><meta itemprop="position" content="2"></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link" itemprop="name">Scheduler cache removal design</span><meta itemprop="position" content="3"></li></ul></nav><span class="theme-doc-version-badge badge badge--secondary">Version: 1.2.0</span><div class="tocCollapsible_GaFJ theme-doc-toc-mobile tocMobile_pwPL"><button type="button" class="clean-btn tocCollapsibleButton_I6fr">On this page</button></div><div class="theme-doc-markdown markdown"><div class="theme-admonition theme-admonition-caution admonition_DRKi alert alert--warning"><div class="admonitionHeading_Wd3u"><span class="admonitionIcon_XJlf"><svg viewBox="0 0 16 16"><path fill-rule="evenodd" d="M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"></path></svg></span>caution</div><div class="admonitionContent_vZ3Q"><p>The Interface Message definitions described in this design doc has undergone major refactoring to reduce the complexity. <a href="https://issues.apache.org/jira/browse/YUNIKORN-337" target="_blank" rel="noopener noreferrer">YUNIKORN-337</a> was committed and simplified the message communication between Core and Shim to greater extent.
See <a href="/docs/1.2.0/design/interface_message_simplification">Simplifying Interface Messages and Breaking Shim build dependency on Core</a> to know the updated message definitions.</p></div></div>
<header><h1>Proposal to combine Cache and Scheduler&#x27;s implementation in the core</h1></header>
<p>This document describes the current state of the scheduler and cache implementation.
It describes the changes planned based on the analysis that was done of the current behaviour.</p>
<h2 class="anchor anchorWithStickyNavbar_X9mg" id="goals">Goals<a href="#goals" class="hash-link" aria-label="Direct link to Goals" title="Direct link to Goals"></a></h2>
<p>The goal is to provide the same functionality before and after the change.</p>
<ul>
<li>Unit tests before and after the merge must all pass.</li>
<li>Smoke tests defined in the core should all pass without major changes.</li>
<li>End-to-end tests that are part of the shim code must all pass without changes.</li>
</ul>
<div class="theme-admonition theme-admonition-info admonition_DRKi alert alert--info"><div class="admonitionHeading_Wd3u"><span class="admonitionIcon_XJlf"><svg viewBox="0 0 14 16"><path fill-rule="evenodd" d="M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"></path></svg></span>info</div><div class="admonitionContent_vZ3Q"><p>Major changes for smoke tests are defined as changes to the tests that alter use case and thus test flows. Some changes will be needed as checks made could rely on cache objects which have been removed.</p></div></div>
<h2 class="anchor anchorWithStickyNavbar_X9mg" id="background">Background<a href="#background" class="hash-link" aria-label="Direct link to Background" title="Direct link to Background"></a></h2>
<p>The current Scheduler Core is build up around two major components to store the data: the cache and scheduler objects.
The cache objects form the base for most data to be tracked.
The Scheduler objects track specific in flight details and are build on top of a cache object.</p>
<p>The communication between the two layers uses a-synchronous events and in some cases direct updates.
A synchronous update between the scheduler and the cache does mean that there is a short period the scheduler is &quot;out of sync&quot; with the cache.
This short period can have an impact on the scheduling decisions.
One of which is logged as <a href="https://issues.apache.org/jira/browse/YUNIKORN-169" target="_blank" rel="noopener noreferrer">YUNIKORN-169</a>.</p>
<p>A further point is the complexity that the two structure brings to the code.
A distinct set of messages to communicate between the scheduler and the cache. A one-on-one mapping between the scheduler and cache objects shows that the distinction is probably more artificial than required.</p>
<h2 class="anchor anchorWithStickyNavbar_X9mg" id="structure-analysis">Structure analysis<a href="#structure-analysis" class="hash-link" aria-label="Direct link to Structure analysis" title="Direct link to Structure analysis"></a></h2>
<h3 class="anchor anchorWithStickyNavbar_X9mg" id="objects">Objects<a href="#objects" class="hash-link" aria-label="Direct link to Objects" title="Direct link to Objects"></a></h3>
<p>The existing objects as per the code analysis.
The overlap between the scheduler and the cache objects is shown by showing them at the same line.
N/A means that there is no equivalent object in either the scheduler or cache.</p>
<table><thead><tr><th>Cache Object</th><th>Scheduler Object</th></tr></thead><tbody><tr><td>ClusterInfo</td><td>ClusterSchedulingContext</td></tr><tr><td>PartitionInfo</td><td>partitionSchedulingContext</td></tr><tr><td>AllocationInfo</td><td>schedulingAllocation</td></tr><tr><td>N/A</td><td>schedulingAllocationAsk</td></tr><tr><td>N/A</td><td>reservation</td></tr><tr><td>ApplicationInfo</td><td>SchedulingApplication</td></tr><tr><td>applicationState</td><td>N/A</td></tr><tr><td>NodeInfo</td><td>SchedulingNode</td></tr><tr><td>QueueInfo</td><td>SchedulingQueue</td></tr><tr><td>SchedulingObjectState</td><td>N/A</td></tr></tbody></table>
<p>The <code>initializer</code> code that is part of the cache does not define a specific object.
It contains a mixture of code defined at the package level and code that is part of the <code>ClusterInfo</code> object.</p>
<h3 class="anchor anchorWithStickyNavbar_X9mg" id="events">Events<a href="#events" class="hash-link" aria-label="Direct link to Events" title="Direct link to Events"></a></h3>
<p>Events defined in the core have multiple origins and destinations.
Some events are only internal for the core between the cache and scheduler.
These events will be removed.</p>
<table><thead><tr><th>Event</th><th>Flow</th><th>Proposal</th></tr></thead><tbody><tr><td>AllocationProposalBundleEvent</td><td>Scheduler -&gt; Cache</td><td>Remove</td></tr><tr><td>RejectedNewApplicationEvent</td><td>Scheduler -&gt; Cache</td><td>Remove</td></tr><tr><td>ReleaseAllocationsEvent</td><td>Scheduler -&gt; Cache</td><td>Remove</td></tr><tr><td>RemoveRMPartitionsEvent</td><td>Scheduler -&gt; Cache</td><td>Remove</td></tr><tr><td>RemovedApplicationEvent</td><td>Scheduler -&gt; Cache</td><td>Remove</td></tr><tr><td>SchedulerNodeEvent</td><td>Cache -&gt; Scheduler</td><td>Remove</td></tr><tr><td>SchedulerAllocationUpdatesEvent</td><td>Cache -&gt; Scheduler</td><td>Remove</td></tr><tr><td>SchedulerApplicationsUpdateEvent</td><td>Cache -&gt; Scheduler</td><td>Remove</td></tr><tr><td>SchedulerUpdatePartitionsConfigEvent</td><td>Cache -&gt; Scheduler</td><td>Remove</td></tr><tr><td>SchedulerDeletePartitionsConfigEvent</td><td>Cache -&gt; Scheduler</td><td>Remove</td></tr><tr><td>RMApplicationUpdateEvent (add/remove app)</td><td>Cache/Scheduler -&gt; RM</td><td>Modify</td></tr><tr><td>RMRejectedAllocationAskEvent</td><td>Cache/Scheduler -&gt; RM</td><td>Modify</td></tr><tr><td>RemoveRMPartitionsEvent</td><td>RM -&gt; Scheduler</td><td></td></tr><tr><td>RMUpdateRequestEvent</td><td>RM -&gt; Cache</td><td>Modify</td></tr><tr><td>RegisterRMEvent</td><td>RM -&gt; Cache</td><td>Modify</td></tr><tr><td>ConfigUpdateRMEvent</td><td>RM -&gt; Cache</td><td>Modify</td></tr><tr><td>RMNewAllocationsEvent</td><td>Cache -&gt; RM</td><td>Modify</td></tr><tr><td>RMReleaseAllocationEvent</td><td>Cache -&gt; RM</td><td>Modify</td></tr><tr><td>RMNodeUpdateEvent</td><td>Cache -&gt; RM</td><td>Modify</td></tr><tr><td></td><td></td><td></td></tr></tbody></table>
<p>Events that are handled by the cache will need to be handled by the core code after the removal of the cache.
Two events are handled by the cache and the scheduler.</p>
<h2 class="anchor anchorWithStickyNavbar_X9mg" id="detailed-flow-analysis">Detailed flow analysis<a href="#detailed-flow-analysis" class="hash-link" aria-label="Direct link to Detailed flow analysis" title="Direct link to Detailed flow analysis"></a></h2>
<h3 class="anchor anchorWithStickyNavbar_X9mg" id="object-existing-in-both-cache-and-scheduler">Object existing in both cache and scheduler<a href="#object-existing-in-both-cache-and-scheduler" class="hash-link" aria-label="Direct link to Object existing in both cache and scheduler" title="Direct link to Object existing in both cache and scheduler"></a></h3>
<p>The current design is based on the fact that the cache object is the basis for all data storage.
Each cache object must have a corresponding scheduler object.
The contract in the core around the cache and scheduler objects was simple.
If the object exists in both scheduler and cache the object will be added to cache triggering the creation of the corresponding scheduler object.
Removing the object is always handled in reverse: first from the scheduler which will trigger the removal from the cache.
An example would be the creation of an application triggered by the <code>RMUpdateRequestEvent</code> would be processed by the cache.
Creating a <code>SchedulerApplicationsUpdateEvent</code> to create the corresponding application in the scheduler.</p>
<p>When the application and object state were added they were added into the cache objects.
The cache objects were considered the data store and thus also contain the state.
There were no corresponding state objects in the scheduler.
Maintaining two states for the same object is not possible.</p>
<p>The other exceptions to that rule are two objects that were considered volatile and scheduler only.
The <code>schedulingAllocationAsk</code> tracks outstanding requests for an application in the scheduler.
The <code>reservation</code> tracks a temporary reservation of a node for an application and ask combination.</p>
<h3 class="anchor anchorWithStickyNavbar_X9mg" id="operations-to-addremove-app">Operations to add/remove app<a href="#operations-to-addremove-app" class="hash-link" aria-label="Direct link to Operations to add/remove app" title="Direct link to Operations to add/remove app"></a></h3>
<p>The RM (shim) sends a complex <code>UpdateRequest</code> as defined in the scheduler interface.
This message is wrapped by the RM proxy and forwarded to the cache for processing.
The RM can request an application to be added or removed.</p>
<p><strong>application add or delete</strong></p>
<div class="codeBlockContainer_EajB theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_yYRp"><pre tabindex="0" class="prism-code language-text codeBlock_z86c thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_zIeA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">1. RMProxy sends cacheevent.RMUpdateRequestEvent to cache</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2. cluster_info.processApplicationUpdateFromRMUpdate</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.1: Add new apps to the partition.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.2: Send removed apps to scheduler (but not remove anything from cache)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">3. scheduler.processApplicationUpdateEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.1: Add new apps to scheduler </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> (when fails, send RejectedNewApplicationEvent to cache)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> No matter if failed or not, send RMApplicationUpdateEvent to RM.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.2: Remove app from scheduler</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> Send RemovedApplicationEvent to cache</span><br></span></code></pre><div class="buttonGroup_mp8g"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_HhJ2" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_giQl"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_Nh_7"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<h3 class="anchor anchorWithStickyNavbar_X9mg" id="operations-to-remove-allocations-and-add-or-remove-asks">Operations to remove allocations and add or remove asks<a href="#operations-to-remove-allocations-and-add-or-remove-asks" class="hash-link" aria-label="Direct link to Operations to remove allocations and add or remove asks" title="Direct link to Operations to remove allocations and add or remove asks"></a></h3>
<p>The RM (shim) sends a complex <code>UpdateRequest</code> as defined in the scheduler interface.
This message is wrapped by the RM proxy and forwarded to the cache for processing.
The RM can request an allocation to be removed.
The RM can request an ask to be added or removed</p>
<p><strong>allocation delete</strong>
This describes the allocation delete initiated by the RM only</p>
<div class="codeBlockContainer_EajB theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_yYRp"><pre tabindex="0" class="prism-code language-text codeBlock_z86c thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_zIeA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">1. RMProxy sends cacheevent.RMUpdateRequestEvent to cache</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2. cluster_info.processNewAndReleaseAllocationRequests</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.1: (by-pass): Send to scheduler via event SchedulerAllocationUpdatesEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">3. scheduler.processAllocationUpdateEvent </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.1: Update ReconcilePlugin</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.2: Send confirmation of the releases back to Cache via event ReleaseAllocationsEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">4. cluster_info.processAllocationReleases to process the confirmed release</span><br></span></code></pre><div class="buttonGroup_mp8g"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_HhJ2" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_giQl"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_Nh_7"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<p><strong>ask add</strong>
If the ask already exists this add is automatically converted into an update.</p>
<div class="codeBlockContainer_EajB theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_yYRp"><pre tabindex="0" class="prism-code language-text codeBlock_z86c thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_zIeA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">1. RMProxy sends cacheevent.RMUpdateRequestEvent to cache</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2. cluster_info.processNewAndReleaseAllocationRequests</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.1: Ask sanity check (such as existence of partition/app), rejections are send back to the RM via RMRejectedAllocationAskEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.2: pass checked asks to scheduler via SchedulerAllocationUpdatesEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">3. scheduler.processAllocationUpdateEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.1: Update scheduling application with the new or updated ask. </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.2: rejections are send back to the RM via RMRejectedAllocationAskEvent </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.3: accepted asks are not confirmed to RM or cache</span><br></span></code></pre><div class="buttonGroup_mp8g"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_HhJ2" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_giQl"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_Nh_7"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<p><strong>ask delete</strong></p>
<div class="codeBlockContainer_EajB theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_yYRp"><pre tabindex="0" class="prism-code language-text codeBlock_z86c thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_zIeA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">1. RMProxy sends cacheevent.RMUpdateRequestEvent to cache</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2. cluster_info.processNewAndReleaseAllocationRequests</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.1: (by-pass): Send to scheduler via event SchedulerAllocationUpdatesEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">3. scheduler.processAllocationReleaseByAllocationKey</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.1: Update scheduling application and remove the ask. </span><br></span></code></pre><div class="buttonGroup_mp8g"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_HhJ2" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_giQl"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_Nh_7"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<h3 class="anchor anchorWithStickyNavbar_X9mg" id="operations-to-add-update-or-remove-nodes">Operations to add, update or remove nodes<a href="#operations-to-add-update-or-remove-nodes" class="hash-link" aria-label="Direct link to Operations to add, update or remove nodes" title="Direct link to Operations to add, update or remove nodes"></a></h3>
<p>The RM (shim) sends a complex <code>UpdateRequest</code> as defined in the scheduler interface.
This message is wrapped by the RM proxy and forwarded to the cache for processing.
The RM can request a node to be added, updated or removed.</p>
<p><strong>node add</strong></p>
<div class="codeBlockContainer_EajB theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_yYRp"><pre tabindex="0" class="prism-code language-text codeBlock_z86c thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_zIeA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">1. RMProxy sends cacheevent.RMUpdateRequestEvent to cache</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2. cluster_info.processNewSchedulableNodes</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.1: node sanity check (such as existence of partition/node)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.2: Add new nodes to the partition.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.3: notify scheduler of new node via SchedulerNodeEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">3. notify RM of node additions and rejections via RMNodeUpdateEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.1: notify the scheduler of allocations to recover via SchedulerAllocationUpdatesEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">4. scheduler.processAllocationUpdateEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 4.1: scheduler creates a new ask based on the Allocation to recover </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 4.2: recover the allocation on the new node using a special process</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 4.3: confirm the allocation in the scheduler, on failure update the cache with a ReleaseAllocationsEvent</span><br></span></code></pre><div class="buttonGroup_mp8g"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_HhJ2" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_giQl"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_Nh_7"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<p><strong>node update and removal</strong></p>
<div class="codeBlockContainer_EajB theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_yYRp"><pre tabindex="0" class="prism-code language-text codeBlock_z86c thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_zIeA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">1. RMProxy sends cacheevent.RMUpdateRequestEvent to cache</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2. cluster_info.processNodeActions</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.1: node sanity check (such as existence of partition/node)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.2: Node info update (resource change)</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.2.1: update node in cache</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.2.2: notify scheduler of the node update via SchedulerNodeEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.3: Node status update (not removal), update node status in cache only</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.4: Node removal</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.4.1: update node status and remove node from the cache</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.4.2: remove alloations and inform RM via RMReleaseAllocationEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.4.3: notify scheduler of the node removal via SchedulerNodeEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">3. scheduler.processNodeEvent add/remove/update the node </span><br></span></code></pre><div class="buttonGroup_mp8g"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_HhJ2" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_giQl"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_Nh_7"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<h3 class="anchor anchorWithStickyNavbar_X9mg" id="operations-to-add-update-or-remove-partitions">Operations to add, update or remove partitions<a href="#operations-to-add-update-or-remove-partitions" class="hash-link" aria-label="Direct link to Operations to add, update or remove partitions" title="Direct link to Operations to add, update or remove partitions"></a></h3>
<p><strong>Add RM</strong></p>
<div class="codeBlockContainer_EajB theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_yYRp"><pre tabindex="0" class="prism-code language-text codeBlock_z86c thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_zIeA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">1. RMProxy sends commonevents.RemoveRMPartitionsEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> if RM is already registered</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 1.1: scheduler.removePartitionsBelongToRM</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 1.1.1: scheduler cleans up</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 1.1.2: scheduler sends commonevents.RemoveRMPartitionsEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 1.2: cluster_info.processRemoveRMPartitionsEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 1.2.1: cache cleans up</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2. RMProxy sends commonevents.RegisterRMEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">3. cluster_info.processRMRegistrationEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.1: cache update internal partitions/queues accordingly.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.2: cache sends to scheduler SchedulerUpdatePartitionsConfigEvent.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">3. scheduler.processUpdatePartitionConfigsEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.1: Scheduler update partition/queue info accordingly.</span><br></span></code></pre><div class="buttonGroup_mp8g"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_HhJ2" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_giQl"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_Nh_7"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<p><strong>Update and Remove partition</strong>
Triggered by a configuration file update.</p>
<div class="codeBlockContainer_EajB theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_yYRp"><pre tabindex="0" class="prism-code language-text codeBlock_z86c thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_zIeA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">1. RMProxy sends commonevents.ConfigUpdateRMEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2. cluster_info.processRMConfigUpdateEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.1: cache update internal partitions/queues accordingly.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.2: cache sends to scheduler SchedulerUpdatePartitionsConfigEvent.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.3: cache marks partitions for deletion (not removed yet).</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.4: cache sends to scheduler SchedulerDeletePartitionsConfigEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">3. scheduler.processUpdatePartitionConfigsEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.1: scheduler updates internal partitions/queues accordingly.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">4. scheduler.processDeletePartitionConfigsEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 4.1: Scheduler set partitionManager.stop = true.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 4.2: PartitionManager removes queues, applications, nodes async.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> This is the REAL CLEANUP including the cache</span><br></span></code></pre><div class="buttonGroup_mp8g"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_HhJ2" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_giQl"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_Nh_7"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<h3 class="anchor anchorWithStickyNavbar_X9mg" id="allocations">Allocations<a href="#allocations" class="hash-link" aria-label="Direct link to Allocations" title="Direct link to Allocations"></a></h3>
<p>Allocations are initiated by the scheduling process.
The scheduler creates a SchedulingAllocation on the scheduler side which then gets wrapped in an AllocationProposal.
The scheduler has checked resources etc already and marked the allocation as inflight.
This description picks up at the point the allocation will be confirmed and finalised.</p>
<p><strong>New allocation</strong></p>
<div class="codeBlockContainer_EajB theme-code-block" style="--prism-color:#bfc7d5;--prism-background-color:#292d3e"><div class="codeBlockContent_yYRp"><pre tabindex="0" class="prism-code language-text codeBlock_z86c thin-scrollbar" style="color:#bfc7d5;background-color:#292d3e"><code class="codeBlockLines_zIeA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">1. Scheduler wraps an SchedulingAllocation in an AllocationProposalBundleEvent </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2. cluster_info.processAllocationProposalEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> preemption case: release preempted allocations</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.1: release the allocation in the cache</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.2: inform the scheduler the allocation is released via SchedulerNodeEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.3: inform the RM the allocation is released via RMReleaseAllocationEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> all cases: add the new allocation</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.4: add the new allocation to the cache</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.5: rejections are send back to the scheduler via SchedulerAllocationUpdatesEvent </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.6: inform the scheduler the allocation is added via SchedulerAllocationUpdatesEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 2.7: inform the RM the allocation is added via RMNewAllocationsEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">3. scheduler.processAllocationUpdateEvent</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.1: confirmations are added to the scheduler and change from inflight to confirmed.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> On failure of processing a ReleaseAllocationsEvent is send to the cache *again* to clean up.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> This is part of the issue in [YUNIKORN-169]</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> cluster_info.processAllocationReleases</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> 3.2: rejections remove the inflight allocation from the scheduler. </span><br></span></code></pre><div class="buttonGroup_mp8g"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_HhJ2" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_giQl"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_Nh_7"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<h2 class="anchor anchorWithStickyNavbar_X9mg" id="current-locking">Current locking<a href="#current-locking" class="hash-link" aria-label="Direct link to Current locking" title="Direct link to Current locking"></a></h2>
<p><strong>Cluster Lock:</strong><br>
<!-- -->A cluster contains one or more Partition objects. A partition is a sub object of Cluster.<br>
<!-- -->Adding or Removing ANY Partition requires a write-lock of the cluster.
Retrieving any object within the cluster will require iterating over the Partition list and thus a read-lock of the cluster</p>
<p><strong>Partition Lock:</strong><br>
<!-- -->The partition object contains all links to Queue, Application or Node objects.
Adding or Removing ANY Queue, Application or Node needs a write-lock of the partition.
Retrieving any object within the partition will require a read-lock of the partition to prevent data races</p>
<p>Examples of operation needing a write-lock</p>
<ul>
<li>Allocation processing after scheduling, will change application, queue and node objects.
Partition lock is required due to possible updates to reservations.</li>
<li>Update of Node Resource
It not only affect node&#x27;s available resource, it also affects the Partition&#x27;s total allocatable Resource</li>
</ul>
<p>Example of operations that need a read-lock:</p>
<ul>
<li>Retrieving any Queue, Application or Node needs a read-lock
The object itself is not locked as part of the retrieval</li>
<li>Confirming an allocation after processing in the cache
The partition is only locked for reading to allow retrieval of the objects that will be changed.
The changes are made on the underlying objects.</li>
</ul>
<p>Example of operations that do not need any lock:</p>
<ul>
<li>Scheduling<br>
<!-- -->Locks are taken on the specific objects when needed, no direct updates to the partition until the allocation is confirmed.</li>
</ul>
<p><strong>Queue lock:</strong><br>
<!-- -->A queue can track either applications (leaf type) or other queues (parent type).
Resources are tracked for both types in the same way.</p>
<p>Adding or removing an Application (leaf type), or a direct child queue (parent type) requires a write-lock of the queue.<br>
<!-- -->Updating tracked resources requires a write-lock.
Changes are made recursively never locking more than 1 queue at a time.<br>
<!-- -->Updating any configuration property on the queue requires a write-lock.
Retrieving any configuration value, or tracked resource, application or queue requires a read-lock.</p>
<p>Examples of operation needing a write-lock</p>
<ul>
<li>Adding an application to a leaf queue</li>
<li>Updating the reservations</li>
</ul>
<p>Examples of operation needing a read-lock</p>
<ul>
<li>Retrieving an application from a leaf type queue</li>
<li>Retrieving the pending resources</li>
</ul>
<p><strong>Application lock:</strong><br>
<!-- -->An application tracks resources of different types, the allocations and outstanding requests.<br>
<!-- -->Updating any tracked resources, allocations or requests requires a write-lock.
Retrieving any of those values requires a read-lock.</p>
<p>Scheduling also requires a write-lock of the application.
During scheduling the write-lock is held for the application.
Locks will be taken on the node or queue that need to be accessed or updated.<br>
<!-- -->Examples of the locks taken on other objects are:</p>
<ul>
<li>a read lock to access queue tracked resources</li>
<li>a write-lock to update the in progress allocations on the node</li>
</ul>
<p>Examples of operation needing a write-lock</p>
<ul>
<li>Adding a new ask</li>
<li>Trying to schedule a pending request</li>
</ul>
<p>Examples of operation needing a read-lock</p>
<ul>
<li>Retrieving the allocated resources</li>
<li>Retrieving the pending requests</li>
</ul>
<p><strong>Node lock:</strong><br>
<!-- -->An node tracks resources of different types and allocations.
Updating any tracked resources or allocations requires a write-lock.
Retrieving any of those values requires a read-lock.</p>
<p>Checks run during the allocation phases take locks as required.
Read-locks when checking write-locks when updating.
A node is not locked for the whole allocation cycle.</p>
<p>Examples of operation needing a write-lock</p>
<ul>
<li>Adding a new allocation</li>
<li>updating the node resources</li>
</ul>
<p>Examples of operation needing a read-lock</p>
<ul>
<li>Retrieving the allocated resources</li>
<li>Retrieving the reservation status</li>
</ul>
<h2 class="anchor anchorWithStickyNavbar_X9mg" id="how-to-merge-cache-and-scheduler-objects">How to merge Cache and scheduler objects<a href="#how-to-merge-cache-and-scheduler-objects" class="hash-link" aria-label="Direct link to How to merge Cache and scheduler objects" title="Direct link to How to merge Cache and scheduler objects"></a></h2>
<p>Since there is no longer the requirement to distinguish the objects in the cache and scheduler the <code>scheduling</code> and <code>info</code> parts of the name will be dropped.</p>
<p>Overview of the main moves and merges:</p>
<ol>
<li><code>application_info</code> &amp; <code>scheduling_application</code>: <strong>merge</strong> to <code>scheduler.object.application</code></li>
<li><code>allocation_info</code> &amp; <code>scheduling_allocation</code>: <strong>merge</strong> to <code>scheduler.object.allocation</code></li>
<li><code>node_info</code> &amp; <code>scheduling_node</code>: <strong>merge</strong> to <code>scheduler.object.node</code></li>
<li><code>queue_info</code> &amp; <code>scheduling_queue</code>: <strong>merge</strong> to <code>scheduler.object.queue</code></li>
<li><code>partition_info</code> &amp; <code>scheduling_partition</code>: <strong>merge</strong> to <code>scheduler.PartitionContext</code></li>
<li><code>cluster_info</code> &amp; <code>scheduling_context</code>: <strong>merge</strong> to <code>scheduler.ClusterContext</code></li>
<li><code>application_state</code>: <strong>move</strong> to <code>scheduler.object.applicationState</code></li>
<li><code>object_state</code>: <strong>move</strong> to <code>scheduler.object.objectState</code></li>
<li><code>initializer</code>: <strong>merge</strong> into <code>scheduler.ClusterContext</code></li>
</ol>
<p>This move and merge of code includes a refactor of the objects into their own package.
That thus affects the two scheduler only objects, reservations and schedulingAsk, that are already defined.
Both will be moved into the objects package.</p>
<p>The top level scheduler package remains for the contexts and scheduling code.</p>
<h2 class="anchor anchorWithStickyNavbar_X9mg" id="code-merges">Code merges<a href="#code-merges" class="hash-link" aria-label="Direct link to Code merges" title="Direct link to Code merges"></a></h2>
<p>The first change is the event processing.
All RM events will now directly be handled in the scheduler.
Event handling will undergo a major change, far more than a simple merge.
Only the RM generated events will be left after the merge.
As described in the analysis above the scheduler is, in almost all cases, notified of changes from RM events.</p>
<p>Broadly speaking there are only three types of changes triggered by the event removal:</p>
<ul>
<li>configuration changes: new scheduler code required as the cache handling is not transferable to the scheduler</li>
<li>node, ask and application changes: merge of the cache code into the scheduler</li>
<li>allocation changes: removal of confirmation cycle and simplification of the scheduler code</li>
</ul>
<p>Part of the event handling is the processing of the configuration changes.
All configuration changes will now update the scheduler objects directly.
The way the scheduler works is slightly different from the cache which means the code is not transferable.</p>
<p>Nodes and applications are really split between the cache and scheduler.
Anything that is tracked in the cache object that does not have an equivalent value in the scheduler object will be moved into the scheduler object.
All references to scheduler objects will be removed.
With the code merges existing scheduler code that calls out directly into the cache objects will return the newly tracked value in the scheduler object.
These calls will thus become locked calls in the scheduler.</p>
<p>The concept of an in flight allocation will be removed.
Allocation will be made in the same scheduling iteration without events or creation of a proposal.
Removing the need for tracking of allocating resources on the scheduler objects.
In flight resource tracking was required to make sure that an allocation while not confirmed by the cache would being taken into account while making scheduling decisions.</p>
<p>The application and object state will be an integrated part of the scheduler object.
A state change is thus immediate and this should prevent an issue like <a href="https://issues.apache.org/jira/browse/YUNIKORN-169" target="_blank" rel="noopener noreferrer">YUNIKORN-169</a> from occuring.</p>
<h2 class="anchor anchorWithStickyNavbar_X9mg" id="locking-after-merge">Locking after merge<a href="#locking-after-merge" class="hash-link" aria-label="Direct link to Locking after merge" title="Direct link to Locking after merge"></a></h2>
<h3 class="anchor anchorWithStickyNavbar_X9mg" id="direction-of-lock">Direction of lock<a href="#direction-of-lock" class="hash-link" aria-label="Direct link to Direction of lock" title="Direct link to Direction of lock"></a></h3>
<p>It is possible to acquire another lock while holding a lock, but we need to make sure that we do not allow:</p>
<ul>
<li>Holding A.lock and acquire B&#x27;s lock.</li>
<li>Holding B.lock and acquire B&#x27;s lock.</li>
</ul>
<p>The current code in the scheduler takes a lock as late as possible and only for the time period needed.
Some actions are not locked on the scheduler side just on the cache side as each object has its own lock.
This means that a read of a value from the cache would not lock the scheduling object.</p>
<p>With the integration of the cache into the scheduler the number of locks will decrease as the number of objects decreases.
Each equivalent object, cache and scheduler, which used to have their own lock will now have just one.
After the merge of the code is performed one lock will be left.
Locking will occur more frequently as the number of fields in the scheduler objects has increased.</p>
<p>Calls that did not lock the scheduler object before the merge will become locked.
Lock contention could lead to performance degradation.
The reduced overhead in objects and event handling can hopefully compensate for this.
One point to keep track of is the change in locking behaviour.
New behaviour could lead to new deadlock situations when code is simply merged without looking at the order.</p>
<h3 class="anchor anchorWithStickyNavbar_X9mg" id="mitigations-for-deadlocks">Mitigations for deadlocks<a href="#mitigations-for-deadlocks" class="hash-link" aria-label="Direct link to Mitigations for deadlocks" title="Direct link to Mitigations for deadlocks"></a></h3>
<p>The locking inside the scheduler will be left as is.
This means that the main scheduling logic will be taking and releasing locks as required on the objects.
There are no long held read-locks or write-locks until the application is locked to schedule it.</p>
<p>A major point of attention will need to be that no iterations of objects should be performed while holding on to a lock.
For instance during scheduling while iterating over a queue&#x27;s application we should not lock the queue.</p>
<p>Another example would be that event processing in the partition should not lock the partition unneeded.
The partition should be locked while retrieving for instance the node that needs updating and release the lock before it tries to lock the node itself.</p>
<p>This approach fits in with the current locking approach and will keep the locking changes to a minimum.
Testing, specifically end-to-end testing, should catch these deadlocks.
There are no known tools that could be used to detect or describe lock order.</p></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/1.2.0/design/interface_message_simplification"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Simplifying Interface Messages</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/docs/1.2.0/design/simple_preemptor"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">DaemonSet Scheduling using Simple Preemptor</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_ivnz thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#goals" class="table-of-contents__link toc-highlight">Goals</a></li><li><a href="#background" class="table-of-contents__link toc-highlight">Background</a></li><li><a href="#structure-analysis" class="table-of-contents__link toc-highlight">Structure analysis</a><ul><li><a href="#objects" class="table-of-contents__link toc-highlight">Objects</a></li><li><a href="#events" class="table-of-contents__link toc-highlight">Events</a></li></ul></li><li><a href="#detailed-flow-analysis" class="table-of-contents__link toc-highlight">Detailed flow analysis</a><ul><li><a href="#object-existing-in-both-cache-and-scheduler" class="table-of-contents__link toc-highlight">Object existing in both cache and scheduler</a></li><li><a href="#operations-to-addremove-app" class="table-of-contents__link toc-highlight">Operations to add/remove app</a></li><li><a href="#operations-to-remove-allocations-and-add-or-remove-asks" class="table-of-contents__link toc-highlight">Operations to remove allocations and add or remove asks</a></li><li><a href="#operations-to-add-update-or-remove-nodes" class="table-of-contents__link toc-highlight">Operations to add, update or remove nodes</a></li><li><a href="#operations-to-add-update-or-remove-partitions" class="table-of-contents__link toc-highlight">Operations to add, update or remove partitions</a></li><li><a href="#allocations" class="table-of-contents__link toc-highlight">Allocations</a></li></ul></li><li><a href="#current-locking" class="table-of-contents__link toc-highlight">Current locking</a></li><li><a href="#how-to-merge-cache-and-scheduler-objects" class="table-of-contents__link toc-highlight">How to merge Cache and scheduler objects</a></li><li><a href="#code-merges" class="table-of-contents__link toc-highlight">Code merges</a></li><li><a href="#locking-after-merge" class="table-of-contents__link toc-highlight">Locking after merge</a><ul><li><a href="#direction-of-lock" class="table-of-contents__link toc-highlight">Direction of lock</a></li><li><a href="#mitigations-for-deadlocks" class="table-of-contents__link toc-highlight">Mitigations for deadlocks</a></li></ul></li></ul></div></div></div></div></main></div></div></div><footer class="footer"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Blog</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://blog.cloudera.com/yunikorn-a-universal-resources-scheduler/" target="_blank" rel="noopener noreferrer" class="footer__link-item">What&#x27;s YuniKorn?<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://blog.cloudera.com/spark-on-kubernetes-gang-scheduling-with-yunikorn/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Spark on Kubernetes – Gang Scheduling with YuniKorn<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">Code Repositories</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://github.com/apache/yunikorn-core/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Core scheduler<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://github.com/apache/yunikorn-k8shim" target="_blank" rel="noopener noreferrer" class="footer__link-item">Kubernetes shim<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://github.com/apache/yunikorn-scheduler-interface" target="_blank" rel="noopener noreferrer" class="footer__link-item">Scheduler Interface<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://github.com/apache/yunikorn-web" target="_blank" rel="noopener noreferrer" class="footer__link-item">WEB application<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://github.com/apache/yunikorn-site" target="_blank" rel="noopener noreferrer" class="footer__link-item">Website<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/community/get_involved">Get Involved</a></li><li class="footer__item"><a class="footer__link-item" href="/community/people">People</a></li><li class="footer__item"><a href="https://issues.apache.org/jira/projects/YUNIKORN/issues" target="_blank" rel="noopener noreferrer" class="footer__link-item">Issues<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_KRyU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="footer__copyright">
<div style="font-size: 70%">
Copyright © 2020-2025 <a href="https://www.apache.org/">The Apache Software Foundation</a>. Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>. <br>
<div style="padding:20px; margin: 10px; color: #4d4d4d;">
<p>The Apache Software Foundation Apache YuniKorn, YuniKorn, Apache, the Apache feather, and the Apache YuniKorn project logo are either registered trademarks or trademarks of the Apache Software Foundation.</p>
</div>
</div></div></div></div></footer></div>
</body>
</html>