blob: e8597674b47d0fdcff038b15132d265f5e91c77e [file] [log] [blame]
<!doctype html>
<html class="docs-version-current" lang="en" dir="ltr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="ahrefs-site-verification" content="c2f7370ecf46173f4fb25f114e74c97e0a2976d4f02f61c9b00a9d7d34e34698">
<meta name="generator" content="Docusaurus v2.0.0-beta.6">
<link rel="search" type="application/opensearchdescription+xml" title="Apache APISIX® -- Cloud-Native API Gateway and AI Gateway" href="/opensearch.xml">
<script type="application/ld+json">{"@context":"https://schema.org","@type":"WebSite","name":"Apache APISIX","url":"https://apisix.apache.org"}</script>
<script src="https://widget.kapa.ai/kapa-widget.bundle.js" data-website-id="24b59d9a-682e-4c3d-9e83-bf2ee85cdc19" data-project-name="APISIX" data-project-color="#E8442E" data-project-logo="https://static.apiseven.com/202202/apache-apisix.png" data-modal-disclaimer="This is a custom LLM for APISIX with access to all developer documentation, GitHub issues and discussions." data-modal-example-questions="How to set up canary release in APISIX?,How to develop a custom APISIX plugin?,How to use custom NGINX configuration in APISIX?,How to configure mTLS between clients and APISIX?,How to only allow a specific APISIX consumer to access special services or routes?" async></script><title data-react-helmet="true">ai-proxy-multi | Apache APISIX® -- Cloud-Native API Gateway and AI Gateway</title><meta data-react-helmet="true" property="og:image" content="https://static.apiseven.com/202202/apache-apisix.png"><meta data-react-helmet="true" name="twitter:image" content="https://static.apiseven.com/202202/apache-apisix.png"><meta data-react-helmet="true" property="og:url" content="https://apisix.apache.org/docs/apisix/next/plugins/ai-proxy-multi/"><meta data-react-helmet="true" name="docsearch:language" content="en"><meta data-react-helmet="true" name="docsearch:version" content="current"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="docs-docs-apisix-current"><meta data-react-helmet="true" name="robots" content="index,follow"><meta data-react-helmet="true" name="twitter:card" content="summary"><meta data-react-helmet="true" property="og:title" content="ai-proxy-multi | Apache APISIX® -- Cloud-Native API Gateway and AI Gateway"><meta data-react-helmet="true" name="description" content="The ai-proxy-multi Plugin extends the capabilities of ai-proxy with load balancing, retries, fallbacks, and health chekcs, simplifying the integration with OpenAI, DeepSeek, Azure, AIMLAPI, and other OpenAI-compatible APIs."><meta data-react-helmet="true" property="og:description" content="The ai-proxy-multi Plugin extends the capabilities of ai-proxy with load balancing, retries, fallbacks, and health chekcs, simplifying the integration with OpenAI, DeepSeek, Azure, AIMLAPI, and other OpenAI-compatible APIs."><meta data-react-helmet="true" name="keywords" content="Apache APISIX,API Gateway,Plugin,ai-proxy-multi,AI,LLM"><link data-react-helmet="true" rel="shortcut icon" href="https://static.apiseven.com/202202/favicon.png"><link data-react-helmet="true" rel="alternate" href="https://apisix.apache.org/docs/apisix/next/plugins/ai-proxy-multi/" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://apisix.apache.org/zh/docs/apisix/next/plugins/ai-proxy-multi/" hreflang="zh"><link data-react-helmet="true" rel="alternate" href="https://apisix.apache.org/docs/apisix/next/plugins/ai-proxy-multi/" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://38VC84A2WJ-dsn.algolia.net" crossorigin="anonymous"><link data-react-helmet="true" rel="canonical" href="https://docs.api7.ai/hub/ai-proxy-multi"><link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-Medium.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-Bold.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-Light.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-Demi.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-ExtraBold.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://apisix-website-static.apiseven.com/assets/js/runtime~main.e29fe45b.js" as="script">
<link rel="preload" href="https://apisix-website-static.apiseven.com/assets/js/main.121d6aaf.js" as="script">
<link rel="stylesheet" href="https://apisix-website-static.apiseven.com/assets/css/styles.8de0825e.css">
<script>var _paq=window._paq=window._paq||[];_paq.push(["disableCookies"]),_paq.push(["trackPageView"]),_paq.push(["enableLinkTracking"]),function(){var a="https://analytics.apache.org/";_paq.push(["setTrackerUrl",a+"matomo.php"]),_paq.push(["setSiteId","17"]);var e=document,p=e.createElement("script"),t=e.getElementsByTagName("script")[0];p.async=!0,p.src=a+"matomo.js",t.parentNode.insertBefore(p,t)}()</script>
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" style="background-color:#e8433e;color:white" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">🤔 Introducing APISIX AI Gateway – Built for LLMs and AI workloads. <a target="_blank" rel="noopener noreferrer" href="/ai-gateway/"> Learn More</a></div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 24 24" width="14" height="14" fill="currentColor"><path d="M24 20.188l-8.315-8.209 8.2-8.282-3.697-3.697-8.212 8.318-8.31-8.203-3.666 3.666 8.321 8.24-8.206 8.313 3.666 3.666 8.237-8.318 8.285 8.203z"></path></svg></button></div><nav class="navbar navbar--fixed-top navbarHideable_RReh"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a target="_parent" class="navbar__brand" href="/"><img src="/img/logo2.svg" alt="Apache APISIX®" class="themedImage_TMUO themedImage--light_4Vu1 navbar__logo"><img src="/img/logo2.svg" alt="Apache APISIX®" class="themedImage_TMUO themedImage--dark_uzRr navbar__logo"><b class="navbar__title">Apache APISIX®</b></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a aria-current="page" class="navbar__link" target="_parent" href="/docs/">Docs</a><ul class="dropdown__menu"><li><a class="dropdown__link" target="_parent" href="/docs/apisix/getting-started/">Apache APISIX®️</a></li><li><a class="dropdown__link" target="_parent" href="/docs/apisix/next/dashboard/">Apache APISIX®️ Dashboard</a></li><li><a class="dropdown__link" target="_parent" href="/docs/ingress-controller/overview/">Apache APISIX®️ Ingress Controller</a></li><li><a class="dropdown__link" target="_parent" href="/docs/helm-chart/apisix/">Apache APISIX®️ Helm Charts</a></li><li><a class="dropdown__link" target="_parent" href="/docs/docker/build/">Apache APISIX®️ Docker</a></li><li><a class="dropdown__link" target="_parent" href="/docs/java-plugin-runner/development/">Apache APISIX®️ Java Plugin Runner</a></li><li><a class="dropdown__link" target="_parent" href="/docs/go-plugin-runner/getting-started/">Apache APISIX®️ Go Plugin Runner</a></li><li><a class="dropdown__link" target="_parent" href="/docs/python-plugin-runner/getting-started/">Apache APISIX®️ Python Plugin Runner</a></li><li><a class="dropdown__link" target="_parent" href="/docs/general/join/">General</a></li></ul></div><a class="navbar__item navbar__link" target="_parent" href="/blog/">Blog</a><a class="navbar__item navbar__link" target="_parent" href="/blog/tags/case-studies/">Case Studies</a><a class="navbar__item navbar__link" target="_parent" href="/downloads/">Downloads</a><a class="navbar__item navbar__link" target="_parent" href="/help/">Help</a><a class="navbar__item navbar__link" target="_parent" href="/team/">Team</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link">Resources</a><ul class="dropdown__menu"><li><a class="dropdown__link" target="_parent" href="/showcase/">Showcase</a></li><li><a class="dropdown__link" target="_parent" href="/docs/general/code-samples/">Code Samples</a></li><li><a class="dropdown__link" target="_parent" href="/plugins/">PluginHub</a></li><li><a class="dropdown__link" target="_parent" href="/docs/general/join/">Community</a></li><li><a class="dropdown__link" target="_parent" href="/docs/general/events/">Events</a></li><li><a href="https://github.com/apache/apisix/milestones" target="_parent" rel="noopener noreferrer" class="dropdown__link">Roadmap</a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>English</span></span></a><ul class="dropdown__menu"><li><a href="/docs/apisix/next/plugins/ai-proxy-multi/" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active" style="text-transform:capitalize">English</a></li><li><a href="/zh/docs/apisix/next/plugins/ai-proxy-multi/" target="_self" rel="noopener noreferrer" class="dropdown__link" style="text-transform:capitalize">简体中文</a></li></ul></div><div class="react-toggle toggle_2i4l react-toggle--disabled"><div class="react-toggle-track" role="button" tabindex="-1"><div class="react-toggle-track-check"><span class="toggle_iYfV">🌜</span></div><div class="react-toggle-track-x"><span class="toggle_iYfV">🌞</span></div><div class="react-toggle-thumb"></div></div><input type="checkbox" class="react-toggle-screenreader-only" aria-label="Switch between dark and light mode"></div><div class="searchBox_fBfG"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div class="main-wrapper docs-wrapper docs-doc-page"><div class="docPage_GMj9"><button class="clean-btn backToTopButton_i9tI" type="button"><svg viewBox="0 0 24 24" width="28"><path d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z" fill="currentColor"></path></svg></button><aside class="docSidebarContainer_k0Pq"><div class="sidebar_LIo8 sidebarWithHideableNavbar_CMI-"><a target="_parent" tabindex="-1" class="sidebarLogo_P87M" href="/"><img src="/img/logo2.svg" alt="Apache APISIX®" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/img/logo2.svg" alt="Apache APISIX®" class="themedImage_TMUO themedImage--dark_uzRr"><b>Apache APISIX®</b></a><div class="sidebarVersionSwitch_0QIZ">Version:<div class="navbar__item dropdown dropdown--hoverable"><a class="navbar__link" href="/docs/apisix/next/getting-started/README/">Next</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/apisix/next/plugins/ai-proxy-multi/"><div>Next</div></a></li><li><a class="dropdown__link" href="/docs/apisix/plugins/ai-proxy-multi/"><div>3.14<div class="badge_6FVu Latest_oyqS">Latest</div></div></a></li><li><a class="dropdown__link" href="/docs/apisix/3.13/plugins/ai-proxy-multi/"><div>3.13</div></a></li><li><a class="dropdown__link" href="/docs/apisix/3.12/plugins/ai-proxy-multi/"><div>3.12</div></a></li><li><a class="dropdown__link" href="/docs/apisix/3.11/getting-started/README/"><div>3.11</div></a></li><li><a class="dropdown__link" href="/docs/apisix/3.10/getting-started/README/"><div>3.10</div></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.9/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.9<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.8/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.8<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.7/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.7<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.6/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.6<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.5/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.5<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.4/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.4<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.3/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.3<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.2/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.2<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.1/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.1<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.0/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.0<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.15/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.15<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.14/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.14<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.13/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.13<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.12/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.12<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.11/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.11<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.10/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.10<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.9/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.9<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.8/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.8<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.7/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.7<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.6/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.6<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.5/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.5<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.4/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.4<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div></div><nav class="menu thin-scrollbar menu_oAhv menuWithAnnouncementBar_IVfW"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Getting Started</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/installation-guide/">Installation</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/architecture-design/apisix/">Architecture</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Tutorials</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Terminology</a></li><li class="theme-doc-sidebar-item-category menu__list-item"><a class="menu__link menu__link--sublist menu__link--active" href="#">Plugins</a><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-category menu__list-item"><a class="menu__link menu__link--sublist menu__link--active" href="#" tabindex="0">AI</a><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-proxy/">ai-proxy</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/apisix/next/plugins/ai-proxy-multi/">ai-proxy-multi</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-rate-limiting/">ai-rate-limiting</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-prompt-guard/">ai-prompt-guard</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-aws-content-moderation/">ai-aws-content-moderation</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-aliyun-content-moderation/">ai-aliyun-content-moderation</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-prompt-decorator/">ai-prompt-decorator</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-prompt-template/">ai-prompt-template</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-rag/">ai-rag</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-request-rewrite/">ai-request-rewrite</a></li></ul></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">General</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Transformation</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Authentication</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Security</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Traffic</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Observability</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Serverless</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Other protocols</a></li></ul></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">API</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/dashboard/">Apache APISIX Dashboard</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Development</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/deployment-modes/">Deployment modes</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/FAQ/">FAQ</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Others</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a href="https://github.com/apache/apisix/blob/master/CHANGELOG.md" target="_blank" rel="noopener noreferrer" class="menu__link"><span>CHANGELOG<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/upgrade-guide-from-2.15.x-to-3.0.0/">Upgrade Guide</a></li></ul></nav><button type="button" title="Collapse sidebar" aria-label="Collapse sidebar" class="button button--secondary button--outline collapseSidebarButton_EBxv"><svg width="20" height="20" aria-hidden="true" class="collapseSidebarButtonIcon_AF9Q"><g fill="#7a7a7a"><path d="M9.992 10.023c0 .2-.062.399-.172.547l-4.996 7.492a.982.982 0 01-.828.454H1c-.55 0-1-.453-1-1 0-.2.059-.403.168-.551l4.629-6.942L.168 3.078A.939.939 0 010 2.528c0-.548.45-.997 1-.997h2.996c.352 0 .649.18.828.45L9.82 9.472c.11.148.172.347.172.55zm0 0"></path><path d="M19.98 10.023c0 .2-.058.399-.168.547l-4.996 7.492a.987.987 0 01-.828.454h-3c-.547 0-.996-.453-.996-1 0-.2.059-.403.168-.551l4.625-6.942-4.625-6.945a.939.939 0 01-.168-.55 1 1 0 01.996-.997h3c.348 0 .649.18.828.45l4.996 7.492c.11.148.168.347.168.55zm0 0"></path></g></svg></button></div></aside><main class="docMainContainer_Q970"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_zHA2"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is unreleased documentation for Apache APISIX® -- Cloud-Native API Gateway and AI Gateway <b>Next</b> version.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/docs/apisix/plugins/ai-proxy-multi/">latest version</a></b> (3.14).</div></div><div class="docItemContainer_oiyr"><article><span class="theme-doc-version-badge badge badge--secondary">Version: Next</span><div class="tocCollapsible_aw-L theme-doc-toc-mobile tocMobile_Tx6Y"><button type="button" class="clean-btn tocCollapsibleButton_zr6a">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>ai-proxy-multi</h1></header><h2><a aria-hidden="true" tabindex="-1" class="anchor anchor__h2 anchorWithHideOnScrollNavbar_3ly5" id="description"></a>Description<a class="hash-link" href="#description" title="Direct link to heading">#</a></h2><p>The <code>ai-proxy-multi</code> Plugin simplifies access to LLM and embedding models by transforming Plugin configurations into the designated request format for OpenAI, DeepSeek, Azure, AIMLAPI, and other OpenAI-compatible APIs. It extends the capabilities of <a href="/docs/apisix/next/plugins/ai-proxy/"><code>ai-proxy</code></a> with load balancing, retries, fallbacks, and health checks.</p><p>In addition, the Plugin also supports logging LLM request information in the access log, such as token usage, model, time to the first response, and more.</p><h2><a aria-hidden="true" tabindex="-1" class="anchor anchor__h2 anchorWithHideOnScrollNavbar_3ly5" id="request-format"></a>Request Format<a class="hash-link" href="#request-format" title="Direct link to heading">#</a></h2><table><thead><tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr></thead><tbody><tr><td><code>messages</code></td><td>Array</td><td>True</td><td>An array of message objects.</td></tr><tr><td><code>messages.role</code></td><td>String</td><td>True</td><td>Role of the message (<code>system</code>, <code>user</code>, <code>assistant</code>).</td></tr><tr><td><code>messages.content</code></td><td>String</td><td>True</td><td>Content of the message.</td></tr></tbody></table><h2><a aria-hidden="true" tabindex="-1" class="anchor anchor__h2 anchorWithHideOnScrollNavbar_3ly5" id="attributes"></a>Attributes<a class="hash-link" href="#attributes" title="Direct link to heading">#</a></h2><table><thead><tr><th>Name</th><th>Type</th><th>Required</th><th>Default</th><th>Valid Values</th><th>Description</th></tr></thead><tbody><tr><td>fallback_strategy</td><td>string or array</td><td>False</td><td></td><td>string: &quot;instance_health_and_rate_limiting&quot;, &quot;http_429&quot;, &quot;http_5xx&quot;<br>array: [&quot;rate_limiting&quot;, &quot;http_429&quot;, &quot;http_5xx&quot;]</td><td>Fallback strategy. When set, the Plugin will check whether the specified instance’s token has been exhausted when a request is forwarded. If so, forward the request to the next instance regardless of the instance priority. When not set, the Plugin will not forward the request to low priority instances when token of the high priority instance is exhausted.</td></tr><tr><td>balancer</td><td>object</td><td>False</td><td></td><td></td><td>Load balancing configurations.</td></tr><tr><td>balancer.algorithm</td><td>string</td><td>False</td><td>roundrobin</td><td>[roundrobin, chash]</td><td>Load balancing algorithm. When set to <code>roundrobin</code>, weighted round robin algorithm is used. When set to <code>chash</code>, consistent hashing algorithm is used.</td></tr><tr><td>balancer.hash_on</td><td>string</td><td>False</td><td></td><td>[vars, headers, cookie, consumer, vars_combinations]</td><td>Used when <code>type</code> is <code>chash</code>. Support hashing on <a href="https://nginx.org/en/docs/varindex.html" target="_blank" rel="noopener noreferrer">NGINX variables</a>, headers, cookie, consumer, or a combination of <a href="https://nginx.org/en/docs/varindex.html" target="_blank" rel="noopener noreferrer">NGINX variables</a>.</td></tr><tr><td>balancer.key</td><td>string</td><td>False</td><td></td><td></td><td>Used when <code>type</code> is <code>chash</code>. When <code>hash_on</code> is set to <code>header</code> or <code>cookie</code>, <code>key</code> is required. When <code>hash_on</code> is set to <code>consumer</code>, <code>key</code> is not required as the consumer name will be used as the key automatically.</td></tr><tr><td>instances</td><td>array[object]</td><td>True</td><td></td><td></td><td>LLM instance configurations.</td></tr><tr><td>instances.name</td><td>string</td><td>True</td><td></td><td></td><td>Name of the LLM service instance.</td></tr><tr><td>instances.provider</td><td>string</td><td>True</td><td></td><td>[openai, deepseek, azure-openai, aimlapi, openai-compatible]</td><td>LLM service provider. When set to <code>openai</code>, the Plugin will proxy the request to <code>api.openai.com</code>. When set to <code>deepseek</code>, the Plugin will proxy the request to <code>api.deepseek.com</code>. When set to <code>aimlapi</code>, the Plugin uses the OpenAI-compatible driver and proxies the request to <code>api.aimlapi.com</code> by default. When set to <code>openai-compatible</code>, the Plugin will proxy the request to the custom endpoint configured in <code>override</code>.</td></tr><tr><td>instances.priority</td><td>integer</td><td>False</td><td>0</td><td></td><td>Priority of the LLM instance in load balancing. <code>priority</code> takes precedence over <code>weight</code>.</td></tr><tr><td>instances.weight</td><td>string</td><td>True</td><td>0</td><td>greater or equal to 0</td><td>Weight of the LLM instance in load balancing.</td></tr><tr><td>instances.auth</td><td>object</td><td>True</td><td></td><td></td><td>Authentication configurations.</td></tr><tr><td>instances.auth.header</td><td>object</td><td>False</td><td></td><td></td><td>Authentication headers. At least one of the <code>header</code> and <code>query</code> should be configured.</td></tr><tr><td>instances.auth.query</td><td>object</td><td>False</td><td></td><td></td><td>Authentication query parameters. At least one of the <code>header</code> and <code>query</code> should be configured.</td></tr><tr><td>instances.options</td><td>object</td><td>False</td><td></td><td></td><td>Model configurations. In addition to <code>model</code>, you can configure additional parameters and they will be forwarded to the upstream LLM service in the request body. For instance, if you are working with OpenAI, DeepSeek, or AIMLAPI, you can configure additional parameters such as <code>max_tokens</code>, <code>temperature</code>, <code>top_p</code>, and <code>stream</code>. See your LLM provider&#x27;s API documentation for more available options.</td></tr><tr><td>instances.options.model</td><td>string</td><td>False</td><td></td><td></td><td>Name of the LLM model, such as <code>gpt-4</code> or <code>gpt-3.5</code>. See your LLM provider&#x27;s API documentation for more available models.</td></tr><tr><td>logging</td><td>object</td><td>False</td><td></td><td></td><td>Logging configurations.</td></tr><tr><td>logging.summaries</td><td>boolean</td><td>False</td><td>false</td><td></td><td>If true, log request LLM model, duration, request, and response tokens.</td></tr><tr><td>logging.payloads</td><td>boolean</td><td>False</td><td>false</td><td></td><td>If true, log request and response payload.</td></tr><tr><td>logging.override</td><td>object</td><td>False</td><td></td><td></td><td>Override setting.</td></tr><tr><td>logging.override.endpoint</td><td>string</td><td>False</td><td></td><td></td><td>LLM provider endpoint to replace the default endpoint with. If not configured, the Plugin uses the default OpenAI endpoint <code>https://api.openai.com/v1/chat/completions</code>.</td></tr><tr><td>checks</td><td>object</td><td>False</td><td></td><td></td><td>Health check configurations. Note that at the moment, OpenAI, DeepSeek, and AIMLAPI do not provide an official health check endpoint. Other LLM services that you can configure under <code>openai-compatible</code> provider may have available health check endpoints.</td></tr><tr><td>checks.active</td><td>object</td><td>True</td><td></td><td></td><td>Active health check configurations.</td></tr><tr><td>checks.active.type</td><td>string</td><td>False</td><td>http</td><td>[http, https, tcp]</td><td>Type of health check connection.</td></tr><tr><td>checks.active.timeout</td><td>number</td><td>False</td><td>1</td><td></td><td>Health check timeout in seconds.</td></tr><tr><td>checks.active.concurrency</td><td>integer</td><td>False</td><td>10</td><td></td><td>Number of upstream nodes to be checked at the same time.</td></tr><tr><td>checks.active.host</td><td>string</td><td>False</td><td></td><td></td><td>HTTP host.</td></tr><tr><td>checks.active.port</td><td>integer</td><td>False</td><td></td><td>between 1 and 65535 inclusive</td><td>HTTP port.</td></tr><tr><td>checks.active.http_path</td><td>string</td><td>False</td><td>/</td><td></td><td>Path for HTTP probing requests.</td></tr><tr><td>checks.active.https_verify_certificate</td><td>boolean</td><td>False</td><td>true</td><td></td><td>If true, verify the node&#x27;s TLS certificate.</td></tr><tr><td>timeout</td><td>integer</td><td>False</td><td>30000</td><td>greater than or equal to 1</td><td>Request timeout in milliseconds when requesting the LLM service.</td></tr><tr><td>keepalive</td><td>boolean</td><td>False</td><td>true</td><td></td><td>If true, keep the connection alive when requesting the LLM service.</td></tr><tr><td>keepalive_timeout</td><td>integer</td><td>False</td><td>60000</td><td>greater than or equal to 1000</td><td>Request timeout in milliseconds when requesting the LLM service.</td></tr><tr><td>keepalive_pool</td><td>integer</td><td>False</td><td>30</td><td></td><td>Keepalive pool size for when connecting with the LLM service.</td></tr><tr><td>ssl_verify</td><td>boolean</td><td>False</td><td>true</td><td></td><td>If true, verify the LLM service&#x27;s certificate.</td></tr></tbody></table><h2><a aria-hidden="true" tabindex="-1" class="anchor anchor__h2 anchorWithHideOnScrollNavbar_3ly5" id="examples"></a>Examples<a class="hash-link" href="#examples" title="Direct link to heading">#</a></h2><p>The examples below demonstrate how you can configure <code>ai-proxy-multi</code> for different scenarios.</p><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>You can fetch the <code>admin_key</code> from <code>config.yaml</code> and save to an environment variable with the following command:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 bash"><pre tabindex="0" class="prism-code language-bash codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token assign-left variable" style="color:#36acaa">admin_key</span><span class="token operator" style="color:#393A34">=</span><span class="token variable" style="color:#36acaa">$(</span><span class="token variable" style="color:#36acaa">yq </span><span class="token variable string" style="color:#e3116c">&#x27;.deployment.admin.admin_key[0].key&#x27;</span><span class="token variable" style="color:#36acaa"> conf/config.yaml </span><span class="token variable operator" style="color:#393A34">|</span><span class="token variable" style="color:#36acaa"> </span><span class="token variable function" style="color:#d73a49">sed</span><span class="token variable" style="color:#36acaa"> </span><span class="token variable string" style="color:#e3116c">&#x27;s/&quot;//g&#x27;</span><span class="token variable" style="color:#36acaa">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="load-balance-between-instances"></a>Load Balance between Instances<a class="hash-link" href="#load-balance-between-instances" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can configure two models for load balancing, forwarding 80% of the traffic to one instance and 20% to the other.</p><p>For demonstration and easier differentiation, you will be configuring one OpenAI instance and one DeepSeek instance as the upstream LLM services.</p><p>Create a Route as such and update with your LLM providers, models, API keys, and endpoints if applicable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-proxy-multi-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy-multi&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;openai-instance&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;weight&quot;: 8,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token plain">&quot;</span><span class="token variable" style="color:#36acaa">$OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;name&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-instance&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;provider&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;weight&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">2</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;auth&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;header&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Authorization&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Bearer &#x27;&quot;</span><span class="token variable" style="color:#36acaa">$DEEPSEEK_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send 10 POST requests to the Route with a system prompt and a sample user question in the request body, to see the number of requests forwarded to OpenAI and DeepSeek:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token assign-left variable" style="color:#36acaa">openai_count</span><span class="token operator" style="color:#393A34">=</span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token assign-left variable" style="color:#36acaa">deepseek_count</span><span class="token operator" style="color:#393A34">=</span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">for</span><span class="token plain"> </span><span class="token for-or-select variable" style="color:#36acaa">i</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">in</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token number" style="color:#36acaa">1</span><span class="token punctuation" style="color:#393A34">..</span><span class="token number" style="color:#36acaa">10</span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">do</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token assign-left variable" style="color:#36acaa">model</span><span class="token operator" style="color:#393A34">=</span><span class="token variable" style="color:#36acaa">$(</span><span class="token variable function" style="color:#d73a49">curl</span><span class="token variable" style="color:#36acaa"> -s </span><span class="token variable string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token variable" style="color:#36acaa"> -X POST </span><span class="token variable punctuation" style="color:#393A34">\</span><span class="token variable" style="color:#36acaa"></span><br></span><span class="token-line" style="color:#393A34"><span class="token variable" style="color:#36acaa"> -H </span><span class="token variable string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token variable" style="color:#36acaa"> </span><span class="token variable punctuation" style="color:#393A34">\</span><span class="token variable" style="color:#36acaa"></span><br></span><span class="token-line" style="color:#393A34"><span class="token variable" style="color:#36acaa"> -d </span><span class="token variable string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token variable string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token variable string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token variable string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;What is 1+1?&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token variable string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token variable string" style="color:#e3116c"> }&#x27;</span><span class="token variable" style="color:#36acaa"> </span><span class="token variable operator" style="color:#393A34">|</span><span class="token variable" style="color:#36acaa"> jq -r </span><span class="token variable string" style="color:#e3116c">&#x27;.model&#x27;</span><span class="token variable" style="color:#36acaa">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">if</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;</span><span class="token string variable" style="color:#36acaa">$model</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token operator" style="color:#393A34">==</span><span class="token plain"> *</span><span class="token string" style="color:#e3116c">&quot;gpt-4&quot;</span><span class="token plain">* </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">then</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token variable punctuation" style="color:#393A34">((</span><span class="token variable" style="color:#36acaa">openai_count</span><span class="token variable operator" style="color:#393A34">++</span><span class="token variable punctuation" style="color:#393A34">))</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">elif</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;</span><span class="token string variable" style="color:#36acaa">$model</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token operator" style="color:#393A34">==</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">then</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token variable punctuation" style="color:#393A34">((</span><span class="token variable" style="color:#36acaa">deepseek_count</span><span class="token variable operator" style="color:#393A34">++</span><span class="token variable punctuation" style="color:#393A34">))</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">fi</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">done</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token builtin class-name">echo</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;OpenAI responses: </span><span class="token string variable" style="color:#36acaa">$openai_count</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token builtin class-name">echo</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;DeepSeek responses: </span><span class="token string variable" style="color:#36acaa">$deepseek_count</span><span class="token string" style="color:#e3116c">&quot;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should see a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 text"><pre tabindex="0" class="prism-code language-text codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token plain">OpenAI responses: 8</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain">DeepSeek responses: 2</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="configure-instance-priority-and-rate-limiting"></a>Configure Instance Priority and Rate Limiting<a class="hash-link" href="#configure-instance-priority-and-rate-limiting" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can configure two models with different priorities and apply rate limiting on the instance with a higher priority. In the case where <code>fallback_strategy</code> is set to <code>[&quot;rate_limiting&quot;]</code>, the Plugin should continue to forward requests to the low priority instance once the high priority instance&#x27;s rate limiting quota is fully consumed.</p><p>Create a Route as such and update with your LLM providers, models, API keys, and endpoints if applicable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-proxy-multi-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy-multi&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;fallback_strategy: [&quot;rate_limiting&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;openai-instance&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;priority&quot;: 1,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;weight&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token string variable" style="color:#36acaa">$OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain">&#x27;</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">options</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">model</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">gpt-4</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">name</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">deepseek-instance</span><span class="token string" style="color:#e3116c">&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">provider</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">deepseek</span><span class="token string" style="color:#e3116c">&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">priority</span><span class="token string" style="color:#e3116c">&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">weight</span><span class="token string" style="color:#e3116c">&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">auth</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">header</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">Authorization</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">Bearer </span><span class="token string" style="color:#e3116c">&#x27;&quot;$DEEPSEEK_API_KEY&quot;&#x27;</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">options</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">model</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">deepseek-chat</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">ai-rate-limiting</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">instances</span><span class="token string" style="color:#e3116c">&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">name</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">openai-instance</span><span class="token string" style="color:#e3116c">&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">limit</span><span class="token string" style="color:#e3116c">&quot;: 10,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">time_window</span><span class="token string" style="color:#e3116c">&quot;: 60</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">limit_strategy</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">total_tokens&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with a system prompt and a sample user question in the request body:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;What is 1+1?&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4-0613&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;1+1 equals 2.&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;refusal&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">23</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">8</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">31</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;cached_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;reasoning_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;accepted_prediction_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;rejected_prediction_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;service_tier&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;default&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;system_fingerprint&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Since the <code>total_tokens</code> value exceeds the configured quota of <code>10</code>, the next request within the 60-second window is expected to be forwarded to the other instance.</p><p>Within the same 60-second window, send another POST request to the route:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Explain Newton law&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should see a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Certainly! Newton&#x27;s laws of motion are three fundamental principles that describe the relationship between the motion of an object and the forces acting on it. They were formulated by Sir Isaac Newton in the late 17th century and are foundational to classical mechanics.\n\n---\n\n### **1. Newton&#x27;s First Law (Law of Inertia):**\n- **Statement:** An object at rest will remain at rest, and an object in motion will continue moving at a constant velocity (in a straight line at a constant speed), unless acted upon by an external force.\n- **Key Idea:** This law introduces the concept of **inertia**, which is the tendency of an object to resist changes in its state of motion.\n- **Example:** If you slide a book across a table, it eventually stops because of the force of friction acting on it. Without friction, the book would keep moving indefinitely.\n\n---\n\n### **2. Newton&#x27;s Second Law (Law of Acceleration):**\n- **Statement:** The acceleration of an object is directly proportional to the net force acting on it and inversely proportional to its mass. Mathematically, this is expressed as:\n \\[\n F = ma\n \\]\n where:\n - \\( F \\) = net force applied (in Newtons),\n -&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="load-balance-and-rate-limit-by-consumers"></a>Load Balance and Rate Limit by Consumers<a class="hash-link" href="#load-balance-and-rate-limit-by-consumers" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can configure two models for load balancing and apply rate limiting by consumer.</p><p>Create a Consumer <code>johndoe</code> and a rate limiting quota of 10 tokens in a 60-second window on <code>openai-instance</code> instance:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/consumers&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;username&quot;: &quot;johndoe&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-rate-limiting&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;openai-instance&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;limit&quot;: 10,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;time_window&quot;: 60</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;rejected_code&quot;: 429,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;limit_strategy&quot;: &quot;total_tokens&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Configure <code>key-auth</code> credential for <code>johndoe</code>:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/consumers/johndoe/credentials&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;cred-john-key-auth&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;key-auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;key&quot;: &quot;john-key&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Create another Consumer <code>janedoe</code> and a rate limiting quota of 10 tokens in a 60-second window on <code>deepseek-instance</code> instance:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/consumers&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;username&quot;: &quot;johndoe&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-rate-limiting&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;deepseek-instance&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;limit&quot;: 10,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;time_window&quot;: 60</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;rejected_code&quot;: 429,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;limit_strategy&quot;: &quot;total_tokens&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Configure <code>key-auth</code> credential for <code>janedoe</code>:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/consumers/janedoe/credentials&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;cred-jane-key-auth&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;key-auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;key&quot;: &quot;jane-key&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Create a Route as such and update with your LLM providers, models, API keys, and endpoints if applicable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-proxy-multi-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;key-auth&quot;: {},</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy-multi&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;fallback_strategy: [&quot;rate_limiting&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;openai-instance&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;weight&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token string variable" style="color:#36acaa">$OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain">&#x27;</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">options</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">model</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">gpt-4</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">name</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">deepseek-instance</span><span class="token string" style="color:#e3116c">&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">provider</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">deepseek</span><span class="token string" style="color:#e3116c">&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">weight</span><span class="token string" style="color:#e3116c">&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">auth</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">header</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">Authorization</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">Bearer </span><span class="token string" style="color:#e3116c">&#x27;&quot;$DEEPSEEK_API_KEY&quot;&#x27;</span><span class="token string" style="color:#e3116c">&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">options</span><span class="token string" style="color:#e3116c">&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;</span><span class="token plain">model</span><span class="token string" style="color:#e3116c">&quot;: &quot;</span><span class="token plain">deepseek-chat&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route without any consumer key:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> -i </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;What is 1+1?&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive an <code>HTTP/1.1 401 Unauthorized</code> response.</p><p>Send a POST request to the Route with <code>johndoe</code>&#x27;s key:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&#x27;apikey: john-key&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;What is 1+1?&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4-0613&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;1+1 equals 2.&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;refusal&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">23</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">8</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">31</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;cached_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;reasoning_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;accepted_prediction_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;rejected_prediction_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;service_tier&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;default&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;system_fingerprint&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Since the <code>total_tokens</code> value exceeds the configured quota of the <code>openai</code> instance for <code>johndoe</code>, the next request within the 60-second window from <code>johndoe</code> is expected to be forwarded to the <code>deepseek</code> instance.</p><p>Within the same 60-second window, send another POST request to the Route with <code>johndoe</code>&#x27;s key:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&#x27;apikey: john-key&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Explain Newtons laws to me&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should see a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Certainly! Newton&#x27;s laws of motion are three fundamental principles that describe the relationship between the motion of an object and the forces acting on it. They were formulated by Sir Isaac Newton in the late 17th century and are foundational to classical mechanics.\n\n---\n\n### **1. Newton&#x27;s First Law (Law of Inertia):**\n- **Statement:** An object at rest will remain at rest, and an object in motion will continue moving at a constant velocity (in a straight line at a constant speed), unless acted upon by an external force.\n- **Key Idea:** This law introduces the concept of **inertia**, which is the tendency of an object to resist changes in its state of motion.\n- **Example:** If you slide a book across a table, it eventually stops because of the force of friction acting on it. Without friction, the book would keep moving indefinitely.\n\n---\n\n### **2. Newton&#x27;s Second Law (Law of Acceleration):**\n- **Statement:** The acceleration of an object is directly proportional to the net force acting on it and inversely proportional to its mass. Mathematically, this is expressed as:\n \\[\n F = ma\n \\]\n where:\n - \\( F \\) = net force applied (in Newtons),\n -&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with <code>janedoe</code>&#x27;s key:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&#x27;apikey: jane-key&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;What is 1+1?&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;The sum of 1 and 1 is 2. This is a basic arithmetic operation where you combine two units to get a total of two units.&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">14</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">31</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">45</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;cached_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_cache_hit_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_cache_miss_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">14</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;system_fingerprint&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;fp_3a5770e1b4_prod0225&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Since the <code>total_tokens</code> value exceeds the configured quota of the <code>deepseek</code> instance for <code>janedoe</code>, the next request within the 60-second window from <code>janedoe</code> is expected to be forwarded to the <code>openai</code> instance.</p><p>Within the same 60-second window, send another POST request to the Route with <code>janedoe</code>&#x27;s key:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&#x27;apikey: jane-key&#x27;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Explain Newtons laws to me&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should see a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4-0613&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Sure, here are Newton&#x27;s three laws of motion:\n\n1) Newton&#x27;s First Law, also known as the Law of Inertia, states that an object at rest will stay at rest, and an object in motion will stay in motion, unless acted on by an external force. In simple words, this law suggests that an object will keep doing whatever it is doing until something causes it to do otherwise. \n\n2) Newton&#x27;s Second Law states that the force acting on an object is equal to the mass of that object times its acceleration (F=ma). This means that force is directly proportional to mass and acceleration. The heavier the object and the faster it accelerates, the greater the force.\n\n3) Newton&#x27;s Third Law, also known as the law of action and reaction, states that for every action, there is an equal and opposite reaction. Essentially, any force exerted onto a body will create a force of equal magnitude but in the opposite direction on the object that exerted the first force.\n\nRemember, these laws become less accurate when considering speeds near the speed of light (where Einstein&#x27;s theory of relativity becomes more appropriate) or objects very small or very large. However, for everyday situations, they provide a good model of how things move.&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;refusal&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>This shows <code>ai-proxy-multi</code> load balance the traffic with respect to the rate limiting rules in <code>ai-rate-limiting</code> by consumers.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="restrict-maximum-number-of-completion-tokens"></a>Restrict Maximum Number of Completion Tokens<a class="hash-link" href="#restrict-maximum-number-of-completion-tokens" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can restrict the number of <code>completion_tokens</code> used when generating the chat completion.</p><p>For demonstration and easier differentiation, you will be configuring one OpenAI instance and one DeepSeek instance as the upstream LLM services.</p><p>Create a Route as such and update with your LLM providers, models, API keys, and endpoints if applicable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-proxy-multi-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy-multi&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;openai-instance&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;weight&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token plain">&quot;</span><span class="token variable" style="color:#36acaa">$OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;max_tokens&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">50</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;name&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-instance&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;provider&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;weight&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;auth&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;header&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Authorization&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Bearer &#x27;&quot;</span><span class="token variable" style="color:#36acaa">$DEEPSEEK_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;max_tokens&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">100</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with a system prompt and a sample user question in the request body:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Explain Newtons law&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>If the request is proxied to OpenAI, you should see a response similar to the following, where the content is truncated per 50 <code>max_tokens</code> threshold:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4-0613&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Newton&#x27;s Laws of Motion are three physical laws that form the bedrock for classical mechanics. They describe the relationship between a body and the forces acting upon it, and the body&#x27;s motion in response to those forces. \n\n1. Newton&#x27;s First Law&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;refusal&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;length&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">20</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">50</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">70</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;cached_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;reasoning_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;accepted_prediction_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;rejected_prediction_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;service_tier&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;default&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;system_fingerprint&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>If the request is proxied to DeepSeek, you should see a response similar to the following, where the content is truncated per 100 <code>max_tokens</code> threshold:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Newton&#x27;s Laws of Motion are three fundamental principles that form the foundation of classical mechanics. They describe the relationship between a body and the forces acting upon it, and the body&#x27;s motion in response to those forces. Here&#x27;s a brief explanation of each law:\n\n1. **Newton&#x27;s First Law (Law of Inertia):**\n - **Statement:** An object will remain at rest or in uniform motion in a straight line unless acted upon by an external force.\n - **Explanation:** This law&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;length&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">10</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">100</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">110</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;cached_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_cache_hit_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_cache_miss_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">10</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;system_fingerprint&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;fp_3a5770e1b4_prod0225&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="proxy-to-embedding-models"></a>Proxy to Embedding Models<a class="hash-link" href="#proxy-to-embedding-models" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can configure the <code>ai-proxy-multi</code> Plugin to proxy requests and load balance between embedding models.</p><p>Create a Route as such and update with your LLM providers, embedding models, API keys, and endpoints:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-proxy-multi-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy-multi&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;openai-instance&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;weight&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token plain">&quot;</span><span class="token variable" style="color:#36acaa">$OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;text-embedding-3-small&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;override&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;endpoint&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;https://api.openai.com/v1/embeddings&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;name&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;az-openai-instance&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;provider&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;openai-compatible&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;weight&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;auth&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;header&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Authorization&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Bearer &#x27;&quot;</span><span class="token variable" style="color:#36acaa">$AZ_OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;text-embedding-3-small&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;override&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;endpoint&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;https://ai-plugin-developer.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2023-05-15&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with an input string:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/embeddings&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;input&quot;: &quot;hello world&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;object&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;list&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;data&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;object&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;embedding&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;embedding&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">-0.0067144386</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">-0.039197803</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">0.034177095</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">0.028763203</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">-0.024785956</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">-0.04201061</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;text-embedding-3-small&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">2</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">2</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="enable-active-health-checks"></a>Enable Active Health Checks<a class="hash-link" href="#enable-active-health-checks" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can configure the <code>ai-proxy-multi</code> Plugin to proxy requests and load balance between models, and enable active health check to improve service availability. You can enable health check on one or multiple instances.</p><p>Create a Route as such and update the LLM providers, embedding models, API keys, and health check related configurations:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-proxy-multi-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy-multi&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;instances&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;name&quot;: &quot;llm-instance-1&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai-compatible&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;weight&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token plain">&quot;</span><span class="token variable" style="color:#36acaa">$YOUR_LLM_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token variable" style="color:#36acaa">$YOUR_LLM_MODEL</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;name&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;llm-instance-2&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;provider&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;openai-compatible&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;weight&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;auth&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;header&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Authorization&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Bearer &#x27;&quot;</span><span class="token variable" style="color:#36acaa">$YOUR_LLM_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token variable" style="color:#36acaa">$YOUR_LLM_MODEL</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;checks&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;active&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;type&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;https&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;host&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;yourhost.com&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http_path&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;/your/probe/path&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;healthy&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;interval&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">2</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;successes&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">1</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;unhealthy&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;interval&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">1</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http_failures&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">3</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>For verification, the behaviours should be consistent with the verification in <a href="/docs/apisix/next/tutorials/health-check/">active health checks</a>.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="include-llm-information-in-access-log"></a>Include LLM Information in Access Log<a class="hash-link" href="#include-llm-information-in-access-log" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can log LLM request related information in the gateway&#x27;s access log to improve analytics and audit. The following variables are available:</p><ul><li><code>request_llm_model</code>: LLM model name specified in the request.</li><li><code>apisix_upstream_response_time</code>: Time taken for APISIX to send the request to the upstream service and receive the full response</li><li><code>request_type</code>: Type of request, where the value could be <code>traditional_http</code>, <code>ai_chat</code>, or <code>ai_stream</code>.</li><li><code>llm_time_to_first_token</code>: Duration from request sending to the first token received from the LLM service, in milliseconds.</li><li><code>llm_model</code>: LLM model.</li><li><code>llm_prompt_tokens</code>: Number of tokens in the prompt.</li><li><code>llm_completion_tokens</code>: Number of chat completion tokens in the prompt.</li></ul><p>Update the access log format in your configuration file to include additional LLM related variables:</p><div class="codeBlockContainer_EiTO"><div style="color:#393A34;background-color:#f6f8fa" class="codeBlockTitle_PQMO">conf/config.yaml</div><div class="codeBlockContent_X2I6 yaml"><pre tabindex="0" class="prism-code language-yaml codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token key atrule" style="color:#00a4db">nginx_config</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">http</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">access_log_format</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;$remote_addr - $remote_user [$time_local] $http_host \&quot;$request_line\&quot; $status $body_bytes_sent $request_time \&quot;$http_referer\&quot; \&quot;$http_user_agent\&quot; $upstream_addr $upstream_status $apisix_upstream_response_time \&quot;$upstream_scheme://$upstream_host$upstream_uri\&quot; \&quot;$apisix_request_id\&quot; \&quot;$request_type\&quot; \&quot;$llm_time_to_first_token\&quot; \&quot;$llm_model\&quot; \&quot;$request_llm_model\&quot; \&quot;$llm_prompt_tokens\&quot; \&quot;$llm_completion_tokens\&quot;&quot;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Reload APISIX for configuration changes to take effect.</p><p>Next, create a Route with the <code>ai-proxy-multi</code> Plugin and send a request. For instance, if the request is forwarded to OpenAI and you receive the following response:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4-0613&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;1+1 equals 2.&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;refusal&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;annotations&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">23</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">8</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">31</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;cached_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;service_tier&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;default&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;system_fingerprint&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>In the gateway&#x27;s access log, you should see a log entry similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 text"><pre tabindex="0" class="prism-code language-text codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token plain">192.168.215.1 - - [21/Mar/2025:04:28:03 +0000] api.openai.com &quot;POST /anything HTTP/1.1&quot; 200 804 2.858 &quot;-&quot; &quot;curl/8.6.0&quot; - - - 5765 &quot;http://api.openai.com&quot; &quot;5c5e0b95f8d303cb81e4dc456a4b12d9&quot; &quot;ai_chat&quot; &quot;2858&quot; &quot;gpt-4&quot; &quot;gpt-4&quot; &quot;23&quot; &quot;8&quot;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>The access log entry shows the request type is <code>ai_chat</code>, Apisix upstream response time is <code>5765</code> milliseconds, time to first token is <code>2858</code> milliseconds, Requested LLM model is <code>gpt-4</code>. LLM model is <code>gpt-4</code>, prompt token usage is <code>23</code>, and completion token usage is <code>8</code>.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="/edit#https://github.com/apache/apisix/edit/master/docs/en/latest/plugins/ai-proxy-multi.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_mt2f"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/docs/apisix/next/plugins/ai-proxy/"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">« ai-proxy</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/docs/apisix/next/plugins/ai-rate-limiting/"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">ai-rate-limiting »</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_vrFS thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#description" class="table-of-contents__link">Description</a></li><li><a href="#request-format" class="table-of-contents__link">Request Format</a></li><li><a href="#attributes" class="table-of-contents__link">Attributes</a></li><li><a href="#examples" class="table-of-contents__link">Examples</a><ul><li><a href="#load-balance-between-instances" class="table-of-contents__link">Load Balance between Instances</a></li><li><a href="#configure-instance-priority-and-rate-limiting" class="table-of-contents__link">Configure Instance Priority and Rate Limiting</a></li><li><a href="#load-balance-and-rate-limit-by-consumers" class="table-of-contents__link">Load Balance and Rate Limit by Consumers</a></li><li><a href="#restrict-maximum-number-of-completion-tokens" class="table-of-contents__link">Restrict Maximum Number of Completion Tokens</a></li><li><a href="#proxy-to-embedding-models" class="table-of-contents__link">Proxy to Embedding Models</a></li><li><a href="#enable-active-health-checks" class="table-of-contents__link">Enable Active Health Checks</a></li><li><a href="#include-llm-information-in-access-log" class="table-of-contents__link">Include LLM Information in Access Log</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="container_MP5Z"><div class="linksRow_iwpv"><div class="linksCol_a1ec"><div>ASF</div><ul><li class="footer__item"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer"><span></span><span>Foundation</span></a></li><li class="footer__item"><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer"><span></span><span>License</span></a></li><li class="footer__item"><a href="https://www.apache.org/events/" target="_blank" rel="noopener noreferrer"><span></span><span>Events</span></a></li><li class="footer__item"><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer"><span></span><span>Security</span></a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer"><span></span><span>Sponsorship</span></a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer"><span></span><span>Thanks</span></a></li></ul></div><div class="linksCol_a1ec"><div>Community</div><ul><li class="footer__item"><a href="https://github.com/apache/apisix/issues" target="_blank" rel="noopener noreferrer"><span></span><span>GitHub</span></a></li><li class="footer__item"><a href="/docs/general/join/"><span></span><span>Slack</span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheAPISIX" target="_blank" rel="noopener noreferrer"><span></span><span>Twitter</span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCgPD18cMhOg5rmPVnQhAC8g" target="_blank" rel="noopener noreferrer"><span></span><span>YouTube</span></a></li></ul></div><div class="linksCol_a1ec"><div>More</div><ul><li class="footer__item"><a target="_parent" href="/blog/"><span></span><span>Blog</span></a></li><li class="footer__item"><a target="_parent" href="/showcase/"><span></span><span>Showcase</span></a></li><li class="footer__item"><a target="_parent" href="/plugins/"><span></span><span>Plugin Hub</span></a></li><li class="footer__item"><a href="https://github.com/apache/apisix/milestones" target="_parent" rel="noopener noreferrer"><span></span><span>Roadmap</span></a></li></ul></div></div><div class="copyright_ZfFh"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer"><span style="display:inline-block;width:231.25px;height:40px"></span></a><div>Copyright © 2019-2025 The Apache Software Foundation. Apache APISIX, APISIX®, Apache, the Apache feather logo, and the Apache APISIX project logo are either registered trademarks or trademarks of the Apache Software Foundation.</div></div></footer></div>
<script src="https://apisix-website-static.apiseven.com/assets/js/runtime~main.e29fe45b.js"></script>
<script src="https://apisix-website-static.apiseven.com/assets/js/main.121d6aaf.js"></script>
</body>
</html>