blob: 3767f3bf13011b516871be041894259a1e94b449 [file] [log] [blame]
<!doctype html>
<html class="docs-version-current" lang="en" dir="ltr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="ahrefs-site-verification" content="c2f7370ecf46173f4fb25f114e74c97e0a2976d4f02f61c9b00a9d7d34e34698">
<meta name="generator" content="Docusaurus v2.0.0-beta.6">
<link rel="search" type="application/opensearchdescription+xml" title="Apache APISIX® -- Cloud-Native API Gateway and AI Gateway" href="/opensearch.xml">
<script type="application/ld+json">{"@context":"https://schema.org","@type":"WebSite","name":"Apache APISIX","url":"https://apisix.apache.org"}</script>
<script src="https://widget.kapa.ai/kapa-widget.bundle.js" data-website-id="24b59d9a-682e-4c3d-9e83-bf2ee85cdc19" data-project-name="APISIX" data-project-color="#E8442E" data-project-logo="https://static.apiseven.com/202202/apache-apisix.png" data-modal-disclaimer="This is a custom LLM for APISIX with access to all developer documentation, GitHub issues and discussions." data-modal-example-questions="How to set up canary release in APISIX?,How to develop a custom APISIX plugin?,How to use custom NGINX configuration in APISIX?,How to configure mTLS between clients and APISIX?,How to only allow a specific APISIX consumer to access special services or routes?" async></script><title data-react-helmet="true">ai-rate-limiting | Apache APISIX® -- Cloud-Native API Gateway and AI Gateway</title><meta data-react-helmet="true" property="og:image" content="https://static.apiseven.com/202202/apache-apisix.png"><meta data-react-helmet="true" name="twitter:image" content="https://static.apiseven.com/202202/apache-apisix.png"><meta data-react-helmet="true" property="og:url" content="https://apisix.apache.org/docs/apisix/next/plugins/ai-rate-limiting/"><meta data-react-helmet="true" name="docsearch:language" content="en"><meta data-react-helmet="true" name="docsearch:version" content="current"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="docs-docs-apisix-current"><meta data-react-helmet="true" name="robots" content="index,follow"><meta data-react-helmet="true" name="twitter:card" content="summary"><meta data-react-helmet="true" property="og:title" content="ai-rate-limiting | Apache APISIX® -- Cloud-Native API Gateway and AI Gateway"><meta data-react-helmet="true" name="description" content="The ai-rate-limiting Plugin enforces token-based rate limiting for LLM service requests, preventing overuse, optimizing API consumption, and ensuring efficient resource allocation."><meta data-react-helmet="true" property="og:description" content="The ai-rate-limiting Plugin enforces token-based rate limiting for LLM service requests, preventing overuse, optimizing API consumption, and ensuring efficient resource allocation."><meta data-react-helmet="true" name="keywords" content="Apache APISIX,API Gateway,Plugin,ai-rate-limiting,AI,LLM"><link data-react-helmet="true" rel="shortcut icon" href="https://static.apiseven.com/202202/favicon.png"><link data-react-helmet="true" rel="alternate" href="https://apisix.apache.org/docs/apisix/next/plugins/ai-rate-limiting/" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://apisix.apache.org/zh/docs/apisix/next/plugins/ai-rate-limiting/" hreflang="zh"><link data-react-helmet="true" rel="alternate" href="https://apisix.apache.org/docs/apisix/next/plugins/ai-rate-limiting/" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://38VC84A2WJ-dsn.algolia.net" crossorigin="anonymous"><link data-react-helmet="true" rel="canonical" href="https://docs.api7.ai/hub/ai-rate-limiting"><link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-Medium.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-Bold.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-Light.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-Demi.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-ExtraBold.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://apisix-website-static.apiseven.com/assets/js/runtime~main.6f5566cc.js" as="script">
<link rel="preload" href="https://apisix-website-static.apiseven.com/assets/js/main.ab3b624f.js" as="script">
<link rel="stylesheet" href="https://apisix-website-static.apiseven.com/assets/css/styles.8de0825e.css">
<script>var _paq=window._paq=window._paq||[];_paq.push(["disableCookies"]),_paq.push(["trackPageView"]),_paq.push(["enableLinkTracking"]),function(){var a="https://analytics.apache.org/";_paq.push(["setTrackerUrl",a+"matomo.php"]),_paq.push(["setSiteId","17"]);var e=document,p=e.createElement("script"),t=e.getElementsByTagName("script")[0];p.async=!0,p.src=a+"matomo.js",t.parentNode.insertBefore(p,t)}()</script>
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" style="background-color:#e8433e;color:white" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">🤔 Introducing APISIX AI Gateway – Built for LLMs and AI workloads. <a target="_blank" rel="noopener noreferrer" href="/ai-gateway/"> Learn More</a></div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 24 24" width="14" height="14" fill="currentColor"><path d="M24 20.188l-8.315-8.209 8.2-8.282-3.697-3.697-8.212 8.318-8.31-8.203-3.666 3.666 8.321 8.24-8.206 8.313 3.666 3.666 8.237-8.318 8.285 8.203z"></path></svg></button></div><nav class="navbar navbar--fixed-top navbarHideable_RReh"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a target="_parent" class="navbar__brand" href="/"><img src="/img/logo2.svg" alt="Apache APISIX®" class="themedImage_TMUO themedImage--light_4Vu1 navbar__logo"><img src="/img/logo2.svg" alt="Apache APISIX®" class="themedImage_TMUO themedImage--dark_uzRr navbar__logo"><b class="navbar__title">Apache APISIX®</b></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a aria-current="page" class="navbar__link" target="_parent" href="/docs/">Docs</a><ul class="dropdown__menu"><li><a class="dropdown__link" target="_parent" href="/docs/apisix/getting-started/">Apache APISIX®️</a></li><li><a class="dropdown__link" target="_parent" href="/docs/apisix/next/dashboard/">Apache APISIX®️ Dashboard</a></li><li><a class="dropdown__link" target="_parent" href="/docs/ingress-controller/overview/">Apache APISIX®️ Ingress Controller</a></li><li><a class="dropdown__link" target="_parent" href="/docs/helm-chart/apisix/">Apache APISIX®️ Helm Charts</a></li><li><a class="dropdown__link" target="_parent" href="/docs/docker/build/">Apache APISIX®️ Docker</a></li><li><a class="dropdown__link" target="_parent" href="/docs/java-plugin-runner/development/">Apache APISIX®️ Java Plugin Runner</a></li><li><a class="dropdown__link" target="_parent" href="/docs/go-plugin-runner/getting-started/">Apache APISIX®️ Go Plugin Runner</a></li><li><a class="dropdown__link" target="_parent" href="/docs/python-plugin-runner/getting-started/">Apache APISIX®️ Python Plugin Runner</a></li><li><a class="dropdown__link" target="_parent" href="/docs/general/join/">General</a></li></ul></div><a class="navbar__item navbar__link" target="_parent" href="/blog/">Blog</a><a class="navbar__item navbar__link" target="_parent" href="/blog/tags/case-studies/">Case Studies</a><a class="navbar__item navbar__link" target="_parent" href="/downloads/">Downloads</a><a class="navbar__item navbar__link" target="_parent" href="/help/">Help</a><a class="navbar__item navbar__link" target="_parent" href="/team/">Team</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link">Resources</a><ul class="dropdown__menu"><li><a class="dropdown__link" target="_parent" href="/showcase/">Showcase</a></li><li><a class="dropdown__link" target="_parent" href="/docs/general/code-samples/">Code Samples</a></li><li><a class="dropdown__link" target="_parent" href="/plugins/">PluginHub</a></li><li><a class="dropdown__link" target="_parent" href="/docs/general/join/">Community</a></li><li><a class="dropdown__link" target="_parent" href="/docs/general/events/">Events</a></li><li><a href="https://github.com/apache/apisix/milestones" target="_parent" rel="noopener noreferrer" class="dropdown__link">Roadmap</a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>English</span></span></a><ul class="dropdown__menu"><li><a href="/docs/apisix/next/plugins/ai-rate-limiting/" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active" style="text-transform:capitalize">English</a></li><li><a href="/zh/docs/apisix/next/plugins/ai-rate-limiting/" target="_self" rel="noopener noreferrer" class="dropdown__link" style="text-transform:capitalize">简体中文</a></li></ul></div><div class="react-toggle toggle_2i4l react-toggle--disabled"><div class="react-toggle-track" role="button" tabindex="-1"><div class="react-toggle-track-check"><span class="toggle_iYfV">🌜</span></div><div class="react-toggle-track-x"><span class="toggle_iYfV">🌞</span></div><div class="react-toggle-thumb"></div></div><input type="checkbox" class="react-toggle-screenreader-only" aria-label="Switch between dark and light mode"></div><div class="searchBox_fBfG"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div class="main-wrapper docs-wrapper docs-doc-page"><div class="docPage_GMj9"><button class="clean-btn backToTopButton_i9tI" type="button"><svg viewBox="0 0 24 24" width="28"><path d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z" fill="currentColor"></path></svg></button><aside class="docSidebarContainer_k0Pq"><div class="sidebar_LIo8 sidebarWithHideableNavbar_CMI-"><a target="_parent" tabindex="-1" class="sidebarLogo_P87M" href="/"><img src="/img/logo2.svg" alt="Apache APISIX®" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/img/logo2.svg" alt="Apache APISIX®" class="themedImage_TMUO themedImage--dark_uzRr"><b>Apache APISIX®</b></a><div class="sidebarVersionSwitch_0QIZ">Version:<div class="navbar__item dropdown dropdown--hoverable"><a class="navbar__link" href="/docs/apisix/next/getting-started/README/">Next</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/apisix/next/plugins/ai-rate-limiting/"><div>Next</div></a></li><li><a class="dropdown__link" href="/docs/apisix/plugins/ai-rate-limiting/"><div>3.14<div class="badge_6FVu Latest_oyqS">Latest</div></div></a></li><li><a class="dropdown__link" href="/docs/apisix/3.13/plugins/ai-rate-limiting/"><div>3.13</div></a></li><li><a class="dropdown__link" href="/docs/apisix/3.12/plugins/ai-rate-limiting/"><div>3.12</div></a></li><li><a class="dropdown__link" href="/docs/apisix/3.11/getting-started/README/"><div>3.11</div></a></li><li><a class="dropdown__link" href="/docs/apisix/3.10/getting-started/README/"><div>3.10</div></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.9/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.9<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.8/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.8<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.7/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.7<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.6/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.6<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.5/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.5<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.4/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.4<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.3/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.3<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.2/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.2<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.1/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.1<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.0/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.0<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.15/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.15<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.14/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.14<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.13/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.13<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.12/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.12<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.11/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.11<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.10/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.10<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.9/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.9<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.8/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.8<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.7/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.7<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.6/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.6<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.5/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.5<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.4/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.4<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div></div><nav class="menu thin-scrollbar menu_oAhv menuWithAnnouncementBar_IVfW"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Getting Started</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/installation-guide/">Installation</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/architecture-design/apisix/">Architecture</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Tutorials</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Terminology</a></li><li class="theme-doc-sidebar-item-category menu__list-item"><a class="menu__link menu__link--sublist menu__link--active" href="#">Plugins</a><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-category menu__list-item"><a class="menu__link menu__link--sublist menu__link--active" href="#" tabindex="0">AI</a><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-proxy/">ai-proxy</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-proxy-multi/">ai-proxy-multi</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/apisix/next/plugins/ai-rate-limiting/">ai-rate-limiting</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-prompt-guard/">ai-prompt-guard</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-aws-content-moderation/">ai-aws-content-moderation</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-aliyun-content-moderation/">ai-aliyun-content-moderation</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-prompt-decorator/">ai-prompt-decorator</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-prompt-template/">ai-prompt-template</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-rag/">ai-rag</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-request-rewrite/">ai-request-rewrite</a></li></ul></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">General</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Transformation</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Authentication</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Security</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Traffic</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Observability</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Serverless</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Other protocols</a></li></ul></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">API</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/dashboard/">Apache APISIX Dashboard</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Development</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/deployment-modes/">Deployment modes</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/FAQ/">FAQ</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Others</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a href="https://github.com/apache/apisix/blob/master/CHANGELOG.md" target="_blank" rel="noopener noreferrer" class="menu__link"><span>CHANGELOG<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/upgrade-guide-from-2.15.x-to-3.0.0/">Upgrade Guide</a></li></ul></nav><button type="button" title="Collapse sidebar" aria-label="Collapse sidebar" class="button button--secondary button--outline collapseSidebarButton_EBxv"><svg width="20" height="20" aria-hidden="true" class="collapseSidebarButtonIcon_AF9Q"><g fill="#7a7a7a"><path d="M9.992 10.023c0 .2-.062.399-.172.547l-4.996 7.492a.982.982 0 01-.828.454H1c-.55 0-1-.453-1-1 0-.2.059-.403.168-.551l4.629-6.942L.168 3.078A.939.939 0 010 2.528c0-.548.45-.997 1-.997h2.996c.352 0 .649.18.828.45L9.82 9.472c.11.148.172.347.172.55zm0 0"></path><path d="M19.98 10.023c0 .2-.058.399-.168.547l-4.996 7.492a.987.987 0 01-.828.454h-3c-.547 0-.996-.453-.996-1 0-.2.059-.403.168-.551l4.625-6.942-4.625-6.945a.939.939 0 01-.168-.55 1 1 0 01.996-.997h3c.348 0 .649.18.828.45l4.996 7.492c.11.148.168.347.168.55zm0 0"></path></g></svg></button></div></aside><main class="docMainContainer_Q970"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_zHA2"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is unreleased documentation for Apache APISIX® -- Cloud-Native API Gateway and AI Gateway <b>Next</b> version.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/docs/apisix/plugins/ai-rate-limiting/">latest version</a></b> (3.14).</div></div><div class="docItemContainer_oiyr"><article><span class="theme-doc-version-badge badge badge--secondary">Version: Next</span><div class="tocCollapsible_aw-L theme-doc-toc-mobile tocMobile_Tx6Y"><button type="button" class="clean-btn tocCollapsibleButton_zr6a">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>ai-rate-limiting</h1></header><h2><a aria-hidden="true" tabindex="-1" class="anchor anchor__h2 anchorWithHideOnScrollNavbar_3ly5" id="description"></a>Description<a class="hash-link" href="#description" title="Direct link to heading">#</a></h2><p>The <code>ai-rate-limiting</code> Plugin enforces token-based rate limiting for requests sent to LLM services. It helps manage API usage by controlling the number of tokens consumed within a specified time frame, ensuring fair resource allocation and preventing excessive load on the service. It is often used with <a href="/docs/apisix/next/plugins/ai-proxy/"><code>ai-proxy</code></a> or <a href="/docs/apisix/next/plugins/ai-proxy-multi/"><code>ai-proxy-multi</code></a> plugin.</p><h2><a aria-hidden="true" tabindex="-1" class="anchor anchor__h2 anchorWithHideOnScrollNavbar_3ly5" id="attributes"></a>Attributes<a class="hash-link" href="#attributes" title="Direct link to heading">#</a></h2><table><thead><tr><th>Name</th><th>Type</th><th>Required</th><th>Default</th><th>Valid values</th><th>Description</th></tr></thead><tbody><tr><td>limit</td><td>integer</td><td>False</td><td></td><td>&gt;0</td><td>The maximum number of tokens allowed within a given time interval. At least one of <code>limit</code> and <code>instances.limit</code> should be configured.</td></tr><tr><td>time_window</td><td>integer</td><td>False</td><td></td><td>&gt;0</td><td>The time interval corresponding to the rate limiting <code>limit</code> in seconds. At least one of <code>time_window</code> and <code>instances.time_window</code> should be configured.</td></tr><tr><td>show_limit_quota_header</td><td>boolean</td><td>False</td><td>true</td><td></td><td>If true, includes <code>X-AI-RateLimit-Limit-*</code>, <code>X-AI-RateLimit-Remaining-*</code>, and <code>X-AI-RateLimit-Reset-*</code> headers in the response, where <code>*</code> is the instance name.</td></tr><tr><td>limit_strategy</td><td>string</td><td>False</td><td>total_tokens</td><td>[total_tokens, prompt_tokens, completion_tokens]</td><td>Type of token to apply rate limiting. <code>total_tokens</code> is the sum of <code>prompt_tokens</code> and <code>completion_tokens</code>.</td></tr><tr><td>instances</td><td>array[object]</td><td>False</td><td></td><td></td><td>LLM instance rate limiting configurations.</td></tr><tr><td>instances.name</td><td>string</td><td>True</td><td></td><td></td><td>Name of the LLM service instance.</td></tr><tr><td>instances.limit</td><td>integer</td><td>True</td><td></td><td>&gt;0</td><td>The maximum number of tokens allowed within a given time interval for an instance.</td></tr><tr><td>instances.time_window</td><td>integer</td><td>True</td><td></td><td>&gt;0</td><td>The time interval corresponding to the rate limiting <code>limit</code> in seconds for an instance.</td></tr><tr><td>rejected_code</td><td>integer</td><td>False</td><td>503</td><td>[200, 599]</td><td>The HTTP status code returned when a request exceeding the quota is rejected.</td></tr><tr><td>rejected_msg</td><td>string</td><td>False</td><td></td><td></td><td>The response body returned when a request exceeding the quota is rejected.</td></tr></tbody></table><h2><a aria-hidden="true" tabindex="-1" class="anchor anchor__h2 anchorWithHideOnScrollNavbar_3ly5" id="examples"></a>Examples<a class="hash-link" href="#examples" title="Direct link to heading">#</a></h2><p>The examples below demonstrate how you can configure <code>ai-rate-limiting</code> for different scenarios.</p><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>You can fetch the <code>admin_key</code> from <code>config.yaml</code> and save to an environment variable with the following command:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 bash"><pre tabindex="0" class="prism-code language-bash codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token assign-left variable" style="color:#36acaa">admin_key</span><span class="token operator" style="color:#393A34">=</span><span class="token variable" style="color:#36acaa">$(</span><span class="token variable" style="color:#36acaa">yq </span><span class="token variable string" style="color:#e3116c">&#x27;.deployment.admin.admin_key[0].key&#x27;</span><span class="token variable" style="color:#36acaa"> conf/config.yaml </span><span class="token variable operator" style="color:#393A34">|</span><span class="token variable" style="color:#36acaa"> </span><span class="token variable function" style="color:#d73a49">sed</span><span class="token variable" style="color:#36acaa"> </span><span class="token variable string" style="color:#e3116c">&#x27;s/&quot;//g&#x27;</span><span class="token variable" style="color:#36acaa">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="apply-rate-limiting-with-ai-proxy"></a>Apply Rate Limiting with <code>ai-proxy</code><a class="hash-link" href="#apply-rate-limiting-with-ai-proxy" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can use <code>ai-proxy</code> to proxy LLM traffic and use <code>ai-rate-limiting</code> to configure token-based rate limiting on the instance.</p><p>Create a Route as such and update with your LLM providers, models, API keys, and endpoints, if applicable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-rate-limiting-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token plain">&quot;</span><span class="token variable" style="color:#36acaa">$OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-35-turbo-instruct&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;max_tokens&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">512</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;temperature&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">1.0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;ai-rate-limiting&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;limit&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">300</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;time_window&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">30</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;limit_strategy&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;prompt_tokens&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with a system prompt and a sample user question in the request body:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;What is 1+1?&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;1 + 1 equals 2. This is a fundamental arithmetic operation where adding one unit to another results in a total of two units.&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>If the rate limiting quota of 300 prompt tokens has been consumed in a 30-second window, all additional requests will be rejected.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="rate-limit-one-instance-among-multiple"></a>Rate Limit One Instance Among Multiple<a class="hash-link" href="#rate-limit-one-instance-among-multiple" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can use <code>ai-proxy-multi</code> to configure two models for load balancing, forwarding 80% of the traffic to one instance and 20% to the other. Additionally, use <code>ai-rate-limiting</code> to configure token-based rate limiting on the instance that receives 80% of the traffic, such that when the configured quota is fully consumed, the additional traffic will be forwarded to the other instance.</p><p>Create a Route which applies rate limiting quota of 100 total tokens in a 30-second window on the <code>deepseek-instance-1</code> instance, and update with your LLM providers, models, API keys, and endpoints, if applicable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-rate-limiting-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-rate-limiting&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;deepseek-instance-1&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;deepseek&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;weight&quot;: 8,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token plain">&quot;</span><span class="token variable" style="color:#36acaa">$DEEPSEEK_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;name&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-instance-2&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;provider&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;weight&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">2</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;auth&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;header&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Authorization&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Bearer &#x27;&quot;</span><span class="token variable" style="color:#36acaa">$DEEPSEEK_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;ai-rate-limiting&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;instances&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;name&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-instance-1&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;limit_strategy&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;total_tokens&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;limit&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">100</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;time_window&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">30</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with a system prompt and a sample user question in the request body:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;What is 1+1?&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;1 + 1 equals 2. This is a fundamental arithmetic operation where adding one unit to another results in a total of two units.&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>If <code>deepseek-instance-1</code> instance rate limiting quota of 100 tokens has been consumed in a 30-second window, the additional requests will all be forwarded to <code>deepseek-instance-2</code>, which is not rate limited.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="apply-the-same-quota-to-all-instances"></a>Apply the Same Quota to All Instances<a class="hash-link" href="#apply-the-same-quota-to-all-instances" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can apply the same rate limiting quota to all LLM upstream instances in <code>ai-rate-limiting</code>.</p><p>For demonstration and easier differentiation, you will be configuring one OpenAI instance and one DeepSeek instance as the upstream LLM services.</p><p>Create a Route which applies a rate limiting quota of 100 total tokens for all instances within a 60-second window, and update with your LLM providers, models, API keys, and endpoints, if applicable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-rate-limiting-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-rate-limiting&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;openai-instance&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;weight&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token plain">&quot;</span><span class="token variable" style="color:#36acaa">$OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;name&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-instance&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;provider&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;weight&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;auth&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;header&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Authorization&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Bearer &#x27;&quot;</span><span class="token variable" style="color:#36acaa">$DEEPSEEK_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;ai-rate-limiting&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;limit&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">100</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;time_window&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">60</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;rejected_code&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">429</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;limit_strategy&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;total_tokens&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with a system prompt and a sample user question in the request body:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> -i </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Explain Newtons laws&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response from either LLM instance, similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4-0613&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Sure! Sir Isaac Newton formulated three laws of motion that describe the motion of objects. These laws are widely used in physics and engineering for studying and understanding how things move. Here they are:\n\n1. Newton&#x27;s First Law - Law of Inertia: An object at rest tends to stay at rest and an object in motion tends to stay in motion with the same speed and in the same direction unless acted upon by an unbalanced force. This is also known as the principle of inertia.\n\n2. Newton&#x27;s Second Law of Motion - Force and Acceleration: The acceleration of an object is directly proportional to the net force acting on it and inversely proportional to its mass. This is usually formulated as F=ma where F is the force applied, m is the mass of the object and a is the acceleration produced.\n\n3. Newton&#x27;s Third Law - Action and Reaction: For every action, there is an equal and opposite reaction. This means that any force exerted on a body will create a force of equal magnitude but in the opposite direction on the object that exerted the first force.\n\nIn simple terms: \n1. If you slide a book on a table and let go, it will stop because of the friction (or force) between it and the table.\n2.&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;refusal&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;length&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">23</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">256</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">279</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;cached_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;reasoning_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;accepted_prediction_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;rejected_prediction_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;service_tier&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;default&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;system_fingerprint&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Since the <code>total_tokens</code> value exceeds the configured quota of <code>100</code>, the next request within the 60-second window is expected to be forwarded to the other instance.</p><p>Within the same 60-second window, send another POST request to the Route:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> -i </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Explain Newtons laws&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response from the other LLM instance, similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Sure! Newton&#x27;s laws of motion are three fundamental principles that describe the relationship between the motion of an object and the forces acting on it. They were formulated by Sir Isaac Newton in the late 17th century and are foundational to classical mechanics. Here&#x27;s an explanation of each law:\n\n---\n\n### **1. Newton&#x27;s First Law (Law of Inertia)**\n- **Statement**: An object will remain at rest or in uniform motion in a straight line unless acted upon by an external force.\n- **What it means**: This law introduces the concept of **inertia**, which is the tendency of an object to resist changes in its state of motion. If no net force acts on an object, its velocity (speed and direction) will not change.\n- **Example**: A book lying on a table will stay at rest unless you push it. Similarly, a hockey puck sliding on ice will keep moving at a constant speed unless friction or another force slows it down.\n\n---\n\n### **2. Newton&#x27;s Second Law (Law of Acceleration)**\n- **Statement**: The acceleration of an object is directly proportional to the net force acting on it and inversely proportional to its mass. Mathematically, this is expressed as:\n \\[\n F = ma\n \\]\n&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;length&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">13</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">256</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">269</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;cached_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_cache_hit_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_cache_miss_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">13</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;system_fingerprint&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;fp_3a5770e1b4_prod0225&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Since the <code>total_tokens</code> value exceeds the configured quota of <code>100</code>, the next request within the 60-second window is expected to be rejected.</p><p>Within the same 60-second window, send a third POST request to the Route:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> -i </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Explain Newtons laws&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive an <code>HTTP 429 Too Many Requests</code> response and observe the following headers:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 text"><pre tabindex="0" class="prism-code language-text codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token plain">X-AI-RateLimit-Limit-openai-instance: 100</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain">X-AI-RateLimit-Remaining-openai-instance: 0</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain">X-AI-RateLimit-Reset-openai-instance: 0</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain">X-AI-RateLimit-Limit-deepseek-instance: 100</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain">X-AI-RateLimit-Remaining-deepseek-instance: 0</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain">X-AI-RateLimit-Reset-deepseek-instance: 0</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="configure-instance-priority-and-rate-limiting"></a>Configure Instance Priority and Rate Limiting<a class="hash-link" href="#configure-instance-priority-and-rate-limiting" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can configure two models with different priorities and apply rate limiting on the instance with a higher priority. In the case where <code>fallback_strategy</code> is set to <code>[&quot;rate_limiting&quot;]</code>, the Plugin should continue to forward requests to the low priority instance once the high priority instance&#x27;s rate limiting quota is fully consumed.</p><p>Create a Route as such to set rate limiting and a higher priority on <code>openai-instance</code> instance and set the <code>fallback_strategy</code> to <code>[&quot;rate_limiting&quot;]</code>. Update with your LLM providers, models, API keys, and endpoints, if applicable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-rate-limiting-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy-multi&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;fallback_strategy: [&quot;rate_limiting&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;openai-instance&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;priority&quot;: 1,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;weight&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token string variable" style="color:#36acaa">$OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain">&#x27;</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">options</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">model</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">gpt-4</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">name</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">deepseek-instance</span><span class="token string" style="color:#e3116c">&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">provider</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">deepseek</span><span class="token string" style="color:#e3116c">&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">priority</span><span class="token string" style="color:#e3116c">&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">weight</span><span class="token string" style="color:#e3116c">&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">auth</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">header</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">Authorization</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">Bearer </span><span class="token string" style="color:#e3116c">&#x27;&quot;$DEEPSEEK_API_KEY&quot;&#x27;</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">options</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">model</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">deepseek-chat</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">ai-rate-limiting</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">instances</span><span class="token string" style="color:#e3116c">&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">name</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">openai-instance</span><span class="token string" style="color:#e3116c">&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">limit</span><span class="token string" style="color:#e3116c">&quot;: 10,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">time_window</span><span class="token string" style="color:#e3116c">&quot;: 60</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">limit_strategy</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">total_tokens&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with a system prompt and a sample user question in the request body:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;What is 1+1?&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4-0613&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;1+1 equals 2.&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;refusal&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">23</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">8</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">31</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;cached_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;reasoning_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;accepted_prediction_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;rejected_prediction_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;service_tier&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;default&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;system_fingerprint&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Since the <code>total_tokens</code> value exceeds the configured quota of <code>10</code>, the next request within the 60-second window is expected to be forwarded to the other instance.</p><p>Within the same 60-second window, send another POST request to the Route:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Explain Newton law&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should see a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Certainly! Newton&#x27;s laws of motion are three fundamental principles that describe the relationship between the motion of an object and the forces acting on it. They were formulated by Sir Isaac Newton in the late 17th century and are foundational to classical mechanics.\n\n---\n\n### **1. Newton&#x27;s First Law (Law of Inertia):**\n- **Statement:** An object at rest will remain at rest, and an object in motion will continue moving at a constant velocity (in a straight line at a constant speed), unless acted upon by an external force.\n- **Key Idea:** This law introduces the concept of **inertia**, which is the tendency of an object to resist changes in its state of motion.\n- **Example:** If you slide a book across a table, it eventually stops because of the force of friction acting on it. Without friction, the book would keep moving indefinitely.\n\n---\n\n### **2. Newton&#x27;s Second Law (Law of Acceleration):**\n- **Statement:** The acceleration of an object is directly proportional to the net force acting on it and inversely proportional to its mass. Mathematically, this is expressed as:\n \\[\n F = ma\n \\]\n where:\n - \\( F \\) = net force applied (in Newtons),\n -&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="load-balance-and-rate-limit-by-consumers"></a>Load Balance and Rate Limit by Consumers<a class="hash-link" href="#load-balance-and-rate-limit-by-consumers" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can configure two models for load balancing and apply rate limiting by Consumer.</p><p>Create a Consumer <code>johndoe</code> and a rate limiting quota of 10 tokens in a 60-second window on <code>openai-instance</code> instance:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/consumers&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;username&quot;: &quot;johndoe&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-rate-limiting&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;openai-instance&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;limit&quot;: 10,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;time_window&quot;: 60</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;rejected_code&quot;: 429,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;limit_strategy&quot;: &quot;total_tokens&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Configure <code>key-auth</code> credential for <code>johndoe</code>:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/consumers/johndoe/credentials&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;cred-john-key-auth&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;key-auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;key&quot;: &quot;john-key&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Create another Consumer <code>janedoe</code> and a rate limiting quota of 10 tokens in a 60-second window on <code>deepseek-instance</code> instance:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/consumers&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;username&quot;: &quot;johndoe&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-rate-limiting&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;deepseek-instance&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;limit&quot;: 10,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;time_window&quot;: 60</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;rejected_code&quot;: 429,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;limit_strategy&quot;: &quot;total_tokens&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Configure <code>key-auth</code> credential for <code>janedoe</code>:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/consumers/janedoe/credentials&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;cred-jane-key-auth&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;key-auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;key&quot;: &quot;jane-key&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Create a Route as such and update with your LLM providers, models, API keys, and endpoints, if applicable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-rate-limiting-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;key-auth&quot;: {},</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy-multi&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;fallback_strategy: [&quot;rate_limiting&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;openai-instance&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;weight&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token string variable" style="color:#36acaa">$OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain">&#x27;</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">options</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">model</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">gpt-4</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">name</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">deepseek-instance</span><span class="token string" style="color:#e3116c">&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">provider</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">deepseek</span><span class="token string" style="color:#e3116c">&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">weight</span><span class="token string" style="color:#e3116c">&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">auth</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">header</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">Authorization</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">Bearer </span><span class="token string" style="color:#e3116c">&#x27;&quot;$DEEPSEEK_API_KEY&quot;&#x27;</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">options</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">model</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">deepseek-chat&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route without any Consumer key:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> -i </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;What is 1+1?&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive an <code>HTTP/1.1 401 Unauthorized</code> response.</p><p>Send a POST request to the Route with <code>johndoe</code>&#x27;s key:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&#x27;apikey: john-key&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;What is 1+1?&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4-0613&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;1+1 equals 2.&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;refusal&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">23</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">8</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">31</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;cached_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;reasoning_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;accepted_prediction_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;rejected_prediction_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;service_tier&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;default&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;system_fingerprint&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Since the <code>total_tokens</code> value exceeds the configured quota of the <code>openai</code> instance for <code>johndoe</code>, the next request within the 60-second window from <code>johndoe</code> is expected to be forwarded to the <code>deepseek</code> instance.</p><p>Within the same 60-second window, send another POST request to the Route with <code>johndoe</code>&#x27;s key:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&#x27;apikey: john-key&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Explain Newtons laws to me&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should see a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Certainly! Newton&#x27;s laws of motion are three fundamental principles that describe the relationship between the motion of an object and the forces acting on it. They were formulated by Sir Isaac Newton in the late 17th century and are foundational to classical mechanics.\n\n---\n\n### **1. Newton&#x27;s First Law (Law of Inertia):**\n- **Statement:** An object at rest will remain at rest, and an object in motion will continue moving at a constant velocity (in a straight line at a constant speed), unless acted upon by an external force.\n- **Key Idea:** This law introduces the concept of **inertia**, which is the tendency of an object to resist changes in its state of motion.\n- **Example:** If you slide a book across a table, it eventually stops because of the force of friction acting on it. Without friction, the book would keep moving indefinitely.\n\n---\n\n### **2. Newton&#x27;s Second Law (Law of Acceleration):**\n- **Statement:** The acceleration of an object is directly proportional to the net force acting on it and inversely proportional to its mass. Mathematically, this is expressed as:\n \\[\n F = ma\n \\]\n where:\n - \\( F \\) = net force applied (in Newtons),\n -&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with <code>janedoe</code>&#x27;s key:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&#x27;apikey: jane-key&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;What is 1+1?&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;The sum of 1 and 1 is 2. This is a basic arithmetic operation where you combine two units to get a total of two units.&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">14</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">31</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">45</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;cached_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_cache_hit_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_cache_miss_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">14</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;system_fingerprint&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;fp_3a5770e1b4_prod0225&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Since the <code>total_tokens</code> value exceeds the configured quota of the <code>deepseek</code> instance for <code>janedoe</code>, the next request within the 60-second window from <code>janedoe</code> is expected to be forwarded to the <code>openai</code> instance.</p><p>Within the same 60-second window, send another POST request to the Route with <code>janedoe</code>&#x27;s key:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&#x27;apikey: jane-key&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Explain Newtons laws to me&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should see a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4-0613&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Sure, here are Newton&#x27;s three laws of motion:\n\n1) Newton&#x27;s First Law, also known as the Law of Inertia, states that an object at rest will stay at rest, and an object in motion will stay in motion, unless acted on by an external force. In simple words, this law suggests that an object will keep doing whatever it is doing until something causes it to do otherwise. \n\n2) Newton&#x27;s Second Law states that the force acting on an object is equal to the mass of that object times its acceleration (F=ma). This means that force is directly proportional to mass and acceleration. The heavier the object and the faster it accelerates, the greater the force.\n\n3) Newton&#x27;s Third Law, also known as the law of action and reaction, states that for every action, there is an equal and opposite reaction. Essentially, any force exerted onto a body will create a force of equal magnitude but in the opposite direction on the object that exerted the first force.\n\nRemember, these laws become less accurate when considering speeds near the speed of light (where Einstein&#x27;s theory of relativity becomes more appropriate) or objects very small or very large. However, for everyday situations, they provide a good model of how things move.&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;refusal&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>This shows <code>ai-proxy-multi</code> load balance the traffic with respect to the rate limiting rules in <code>ai-rate-limiting</code> by Consumers.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="/edit#https://github.com/apache/apisix/edit/master/docs/en/latest/plugins/ai-rate-limiting.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_mt2f"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/docs/apisix/next/plugins/ai-proxy-multi/"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">« ai-proxy-multi</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/docs/apisix/next/plugins/ai-prompt-guard/"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">ai-prompt-guard »</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_vrFS thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#description" class="table-of-contents__link">Description</a></li><li><a href="#attributes" class="table-of-contents__link">Attributes</a></li><li><a href="#examples" class="table-of-contents__link">Examples</a><ul><li><a href="#apply-rate-limiting-with-ai-proxy" class="table-of-contents__link">Apply Rate Limiting with <code>ai-proxy</code></a></li><li><a href="#rate-limit-one-instance-among-multiple" class="table-of-contents__link">Rate Limit One Instance Among Multiple</a></li><li><a href="#apply-the-same-quota-to-all-instances" class="table-of-contents__link">Apply the Same Quota to All Instances</a></li><li><a href="#configure-instance-priority-and-rate-limiting" class="table-of-contents__link">Configure Instance Priority and Rate Limiting</a></li><li><a href="#load-balance-and-rate-limit-by-consumers" class="table-of-contents__link">Load Balance and Rate Limit by Consumers</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="container_MP5Z"><div class="linksRow_iwpv"><div class="linksCol_a1ec"><div>ASF</div><ul><li class="footer__item"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer"><span></span><span>Foundation</span></a></li><li class="footer__item"><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer"><span></span><span>License</span></a></li><li class="footer__item"><a href="https://www.apache.org/events/" target="_blank" rel="noopener noreferrer"><span></span><span>Events</span></a></li><li class="footer__item"><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer"><span></span><span>Security</span></a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer"><span></span><span>Sponsorship</span></a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer"><span></span><span>Thanks</span></a></li></ul></div><div class="linksCol_a1ec"><div>Community</div><ul><li class="footer__item"><a href="https://github.com/apache/apisix/issues" target="_blank" rel="noopener noreferrer"><span></span><span>GitHub</span></a></li><li class="footer__item"><a href="/docs/general/join/"><span></span><span>Slack</span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheAPISIX" target="_blank" rel="noopener noreferrer"><span></span><span>Twitter</span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCgPD18cMhOg5rmPVnQhAC8g" target="_blank" rel="noopener noreferrer"><span></span><span>YouTube</span></a></li></ul></div><div class="linksCol_a1ec"><div>More</div><ul><li class="footer__item"><a target="_parent" href="/blog/"><span></span><span>Blog</span></a></li><li class="footer__item"><a target="_parent" href="/showcase/"><span></span><span>Showcase</span></a></li><li class="footer__item"><a target="_parent" href="/plugins/"><span></span><span>Plugin Hub</span></a></li><li class="footer__item"><a href="https://github.com/apache/apisix/milestones" target="_parent" rel="noopener noreferrer"><span></span><span>Roadmap</span></a></li></ul></div></div><div class="copyright_ZfFh"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer"><span style="display:inline-block;width:231.25px;height:40px"></span></a><div>Copyright © 2019-2025 The Apache Software Foundation. Apache APISIX, APISIX®, Apache, the Apache feather logo, and the Apache APISIX project logo are either registered trademarks or trademarks of the Apache Software Foundation.</div></div></footer></div>
<script src="https://apisix-website-static.apiseven.com/assets/js/runtime~main.6f5566cc.js"></script>
<script src="https://apisix-website-static.apiseven.com/assets/js/main.ab3b624f.js"></script>
</body>
</html>