blob: 932acfd93c6d6e84b0bdc15ff556b706504c185c [file] [log] [blame]
<!doctype html>
<html class="docs-version-current" lang="en" dir="ltr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="ahrefs-site-verification" content="c2f7370ecf46173f4fb25f114e74c97e0a2976d4f02f61c9b00a9d7d34e34698">
<meta name="generator" content="Docusaurus v2.0.0-beta.6">
<link rel="search" type="application/opensearchdescription+xml" title="Apache APISIX® -- Cloud-Native API Gateway and AI Gateway" href="/opensearch.xml">
<script type="application/ld+json">{"@context":"https://schema.org","@type":"WebSite","name":"Apache APISIX","url":"https://apisix.apache.org"}</script>
<script src="https://widget.kapa.ai/kapa-widget.bundle.js" data-website-id="24b59d9a-682e-4c3d-9e83-bf2ee85cdc19" data-project-name="APISIX" data-project-color="#E8442E" data-project-logo="https://static.apiseven.com/202202/apache-apisix.png" data-modal-disclaimer="This is a custom LLM for APISIX with access to all developer documentation, GitHub issues and discussions." data-modal-example-questions="How to set up canary release in APISIX?,How to develop a custom APISIX plugin?,How to use custom NGINX configuration in APISIX?,How to configure mTLS between clients and APISIX?,How to only allow a specific APISIX consumer to access special services or routes?" async></script><title data-react-helmet="true">ai-proxy | Apache APISIX® -- Cloud-Native API Gateway and AI Gateway</title><meta data-react-helmet="true" property="og:image" content="https://static.apiseven.com/202202/apache-apisix.png"><meta data-react-helmet="true" name="twitter:image" content="https://static.apiseven.com/202202/apache-apisix.png"><meta data-react-helmet="true" property="og:url" content="https://apisix.apache.org/docs/apisix/next/plugins/ai-proxy/"><meta data-react-helmet="true" name="docsearch:language" content="en"><meta data-react-helmet="true" name="docsearch:version" content="current"><meta data-react-helmet="true" name="docsearch:docusaurus_tag" content="docs-docs-apisix-current"><meta data-react-helmet="true" name="robots" content="index,follow"><meta data-react-helmet="true" name="twitter:card" content="summary"><meta data-react-helmet="true" property="og:title" content="ai-proxy | Apache APISIX® -- Cloud-Native API Gateway and AI Gateway"><meta data-react-helmet="true" name="description" content="The ai-proxy Plugin simplifies access to LLM and embedding models providers by converting Plugin configurations into the required request format for OpenAI, DeepSeek, Azure, AIMLAPI, and other OpenAI-compatible APIs."><meta data-react-helmet="true" property="og:description" content="The ai-proxy Plugin simplifies access to LLM and embedding models providers by converting Plugin configurations into the required request format for OpenAI, DeepSeek, Azure, AIMLAPI, and other OpenAI-compatible APIs."><meta data-react-helmet="true" name="keywords" content="Apache APISIX,API Gateway,Plugin,ai-proxy,AI,LLM"><link data-react-helmet="true" rel="shortcut icon" href="https://static.apiseven.com/202202/favicon.png"><link data-react-helmet="true" rel="alternate" href="https://apisix.apache.org/docs/apisix/next/plugins/ai-proxy/" hreflang="en"><link data-react-helmet="true" rel="alternate" href="https://apisix.apache.org/zh/docs/apisix/next/plugins/ai-proxy/" hreflang="zh"><link data-react-helmet="true" rel="alternate" href="https://apisix.apache.org/docs/apisix/next/plugins/ai-proxy/" hreflang="x-default"><link data-react-helmet="true" rel="preconnect" href="https://38VC84A2WJ-dsn.algolia.net" crossorigin="anonymous"><link data-react-helmet="true" rel="canonical" href="https://docs.api7.ai/hub/ai-proxy"><link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-Medium.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-Bold.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-Light.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-Demi.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://static.apiseven.com/202202/MaisonNeue-ExtraBold.otf" as="font" type="font/otf" crossorigin>
<link rel="preload" href="https://apisix-website-static.apiseven.com/assets/js/runtime~main.e29fe45b.js" as="script">
<link rel="preload" href="https://apisix-website-static.apiseven.com/assets/js/main.121d6aaf.js" as="script">
<link rel="stylesheet" href="https://apisix-website-static.apiseven.com/assets/css/styles.8de0825e.css">
<script>var _paq=window._paq=window._paq||[];_paq.push(["disableCookies"]),_paq.push(["trackPageView"]),_paq.push(["enableLinkTracking"]),function(){var a="https://analytics.apache.org/";_paq.push(["setTrackerUrl",a+"matomo.php"]),_paq.push(["setSiteId","17"]);var e=document,p=e.createElement("script"),t=e.getElementsByTagName("script")[0];p.async=!0,p.src=a+"matomo.js",t.parentNode.insertBefore(p,t)}()</script>
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus">
<div><a href="#" class="skipToContent_OuoZ">Skip to main content</a></div><div class="announcementBar_axC9" style="background-color:#e8433e;color:white" role="banner"><div class="announcementBarPlaceholder_xYHE"></div><div class="announcementBarContent_6uhP">🤔 Introducing APISIX AI Gateway – Built for LLMs and AI workloads. <a target="_blank" rel="noopener noreferrer" href="/ai-gateway/"> Learn More</a></div><button type="button" class="clean-btn close announcementBarClose_A3A1" aria-label="Close"><svg viewBox="0 0 24 24" width="14" height="14" fill="currentColor"><path d="M24 20.188l-8.315-8.209 8.2-8.282-3.697-3.697-8.212 8.318-8.31-8.203-3.666 3.666 8.321 8.24-8.206 8.313 3.666 3.666 8.237-8.318 8.285 8.203z"></path></svg></button></div><nav class="navbar navbar--fixed-top navbarHideable_RReh"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a target="_parent" class="navbar__brand" href="/"><img src="/img/logo2.svg" alt="Apache APISIX®" class="themedImage_TMUO themedImage--light_4Vu1 navbar__logo"><img src="/img/logo2.svg" alt="Apache APISIX®" class="themedImage_TMUO themedImage--dark_uzRr navbar__logo"><b class="navbar__title">Apache APISIX®</b></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a aria-current="page" class="navbar__link" target="_parent" href="/docs/">Docs</a><ul class="dropdown__menu"><li><a class="dropdown__link" target="_parent" href="/docs/apisix/getting-started/">Apache APISIX®️</a></li><li><a class="dropdown__link" target="_parent" href="/docs/apisix/next/dashboard/">Apache APISIX®️ Dashboard</a></li><li><a class="dropdown__link" target="_parent" href="/docs/ingress-controller/overview/">Apache APISIX®️ Ingress Controller</a></li><li><a class="dropdown__link" target="_parent" href="/docs/helm-chart/apisix/">Apache APISIX®️ Helm Charts</a></li><li><a class="dropdown__link" target="_parent" href="/docs/docker/build/">Apache APISIX®️ Docker</a></li><li><a class="dropdown__link" target="_parent" href="/docs/java-plugin-runner/development/">Apache APISIX®️ Java Plugin Runner</a></li><li><a class="dropdown__link" target="_parent" href="/docs/go-plugin-runner/getting-started/">Apache APISIX®️ Go Plugin Runner</a></li><li><a class="dropdown__link" target="_parent" href="/docs/python-plugin-runner/getting-started/">Apache APISIX®️ Python Plugin Runner</a></li><li><a class="dropdown__link" target="_parent" href="/docs/general/join/">General</a></li></ul></div><a class="navbar__item navbar__link" target="_parent" href="/blog/">Blog</a><a class="navbar__item navbar__link" target="_parent" href="/blog/tags/case-studies/">Case Studies</a><a class="navbar__item navbar__link" target="_parent" href="/downloads/">Downloads</a><a class="navbar__item navbar__link" target="_parent" href="/help/">Help</a><a class="navbar__item navbar__link" target="_parent" href="/team/">Team</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link">Resources</a><ul class="dropdown__menu"><li><a class="dropdown__link" target="_parent" href="/showcase/">Showcase</a></li><li><a class="dropdown__link" target="_parent" href="/docs/general/code-samples/">Code Samples</a></li><li><a class="dropdown__link" target="_parent" href="/plugins/">PluginHub</a></li><li><a class="dropdown__link" target="_parent" href="/docs/general/join/">Community</a></li><li><a class="dropdown__link" target="_parent" href="/docs/general/events/">Events</a></li><li><a href="https://github.com/apache/apisix/milestones" target="_parent" rel="noopener noreferrer" class="dropdown__link">Roadmap</a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" class="navbar__link"><span><svg viewBox="0 0 20 20" width="20" height="20" aria-hidden="true" class="iconLanguage_zID8"><path fill="currentColor" d="M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"></path></svg><span>English</span></span></a><ul class="dropdown__menu"><li><a href="/docs/apisix/next/plugins/ai-proxy/" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active" style="text-transform:capitalize">English</a></li><li><a href="/zh/docs/apisix/next/plugins/ai-proxy/" target="_self" rel="noopener noreferrer" class="dropdown__link" style="text-transform:capitalize">简体中文</a></li></ul></div><div class="react-toggle toggle_2i4l react-toggle--disabled"><div class="react-toggle-track" role="button" tabindex="-1"><div class="react-toggle-track-check"><span class="toggle_iYfV">🌜</span></div><div class="react-toggle-track-x"><span class="toggle_iYfV">🌞</span></div><div class="react-toggle-thumb"></div></div><input type="checkbox" class="react-toggle-screenreader-only" aria-label="Switch between dark and light mode"></div><div class="searchBox_fBfG"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div class="main-wrapper docs-wrapper docs-doc-page"><div class="docPage_GMj9"><button class="clean-btn backToTopButton_i9tI" type="button"><svg viewBox="0 0 24 24" width="28"><path d="M7.41 15.41L12 10.83l4.59 4.58L18 14l-6-6-6 6z" fill="currentColor"></path></svg></button><aside class="docSidebarContainer_k0Pq"><div class="sidebar_LIo8 sidebarWithHideableNavbar_CMI-"><a target="_parent" tabindex="-1" class="sidebarLogo_P87M" href="/"><img src="/img/logo2.svg" alt="Apache APISIX®" class="themedImage_TMUO themedImage--light_4Vu1"><img src="/img/logo2.svg" alt="Apache APISIX®" class="themedImage_TMUO themedImage--dark_uzRr"><b>Apache APISIX®</b></a><div class="sidebarVersionSwitch_0QIZ">Version:<div class="navbar__item dropdown dropdown--hoverable"><a class="navbar__link" href="/docs/apisix/next/getting-started/README/">Next</a><ul class="dropdown__menu"><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/apisix/next/plugins/ai-proxy/"><div>Next</div></a></li><li><a class="dropdown__link" href="/docs/apisix/plugins/ai-proxy/"><div>3.14<div class="badge_6FVu Latest_oyqS">Latest</div></div></a></li><li><a class="dropdown__link" href="/docs/apisix/3.13/plugins/ai-proxy/"><div>3.13</div></a></li><li><a class="dropdown__link" href="/docs/apisix/3.12/plugins/ai-proxy/"><div>3.12</div></a></li><li><a class="dropdown__link" href="/docs/apisix/3.11/plugins/ai-proxy/"><div>3.11</div></a></li><li><a class="dropdown__link" href="/docs/apisix/3.10/getting-started/README/"><div>3.10</div></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.9/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.9<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.8/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.8<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.7/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.7<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.6/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.6<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.5/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.5<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.4/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.4<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.3/getting-started/readme/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.3<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.2/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.2<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.1/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.1<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/3.0/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>3.0<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.15/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.15<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.14/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.14<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.13/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.13<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.12/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.12<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.11/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.11<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.10/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.10<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.9/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.9<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.8/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.8<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.7/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.7<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.6/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.6<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.5/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.5<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li><a href="https://apache-apisix.netlify.app/docs/apisix/2.4/getting-started/" target="_blank" rel="noopener noreferrer" class="dropdown__link"><span>2.4<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li></ul></div></div><nav class="menu thin-scrollbar menu_oAhv menuWithAnnouncementBar_IVfW"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Getting Started</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/installation-guide/">Installation</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/architecture-design/apisix/">Architecture</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Tutorials</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Terminology</a></li><li class="theme-doc-sidebar-item-category menu__list-item"><a class="menu__link menu__link--sublist menu__link--active" href="#">Plugins</a><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-category menu__list-item"><a class="menu__link menu__link--sublist menu__link--active" href="#" tabindex="0">AI</a><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/apisix/next/plugins/ai-proxy/">ai-proxy</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-proxy-multi/">ai-proxy-multi</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-rate-limiting/">ai-rate-limiting</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-prompt-guard/">ai-prompt-guard</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-aws-content-moderation/">ai-aws-content-moderation</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-aliyun-content-moderation/">ai-aliyun-content-moderation</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-prompt-decorator/">ai-prompt-decorator</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-prompt-template/">ai-prompt-template</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-rag/">ai-rag</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" tabindex="0" href="/docs/apisix/next/plugins/ai-request-rewrite/">ai-request-rewrite</a></li></ul></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">General</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Transformation</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Authentication</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Security</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Traffic</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Observability</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Serverless</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#" tabindex="0">Other protocols</a></li></ul></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">API</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/dashboard/">Apache APISIX Dashboard</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Development</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/deployment-modes/">Deployment modes</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/FAQ/">FAQ</a></li><li class="theme-doc-sidebar-item-category menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#">Others</a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a href="https://github.com/apache/apisix/blob/master/CHANGELOG.md" target="_blank" rel="noopener noreferrer" class="menu__link"><span>CHANGELOG<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_wgqa"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></span></a></li><li class="theme-doc-sidebar-item-link menu__list-item"><a class="menu__link" href="/docs/apisix/next/upgrade-guide-from-2.15.x-to-3.0.0/">Upgrade Guide</a></li></ul></nav><button type="button" title="Collapse sidebar" aria-label="Collapse sidebar" class="button button--secondary button--outline collapseSidebarButton_EBxv"><svg width="20" height="20" aria-hidden="true" class="collapseSidebarButtonIcon_AF9Q"><g fill="#7a7a7a"><path d="M9.992 10.023c0 .2-.062.399-.172.547l-4.996 7.492a.982.982 0 01-.828.454H1c-.55 0-1-.453-1-1 0-.2.059-.403.168-.551l4.629-6.942L.168 3.078A.939.939 0 010 2.528c0-.548.45-.997 1-.997h2.996c.352 0 .649.18.828.45L9.82 9.472c.11.148.172.347.172.55zm0 0"></path><path d="M19.98 10.023c0 .2-.058.399-.168.547l-4.996 7.492a.987.987 0 01-.828.454h-3c-.547 0-.996-.453-.996-1 0-.2.059-.403.168-.551l4.625-6.942-4.625-6.945a.939.939 0 01-.168-.55 1 1 0 01.996-.997h3c.348 0 .649.18.828.45l4.996 7.492c.11.148.168.347.168.55zm0 0"></path></g></svg></button></div></aside><main class="docMainContainer_Q970"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_zHA2"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is unreleased documentation for Apache APISIX® -- Cloud-Native API Gateway and AI Gateway <b>Next</b> version.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/docs/apisix/plugins/ai-proxy/">latest version</a></b> (3.14).</div></div><div class="docItemContainer_oiyr"><article><span class="theme-doc-version-badge badge badge--secondary">Version: Next</span><div class="tocCollapsible_aw-L theme-doc-toc-mobile tocMobile_Tx6Y"><button type="button" class="clean-btn tocCollapsibleButton_zr6a">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>ai-proxy</h1></header><h2><a aria-hidden="true" tabindex="-1" class="anchor anchor__h2 anchorWithHideOnScrollNavbar_3ly5" id="description"></a>Description<a class="hash-link" href="#description" title="Direct link to heading">#</a></h2><p>The <code>ai-proxy</code> Plugin simplifies access to LLM and embedding models by transforming Plugin configurations into the designated request format. It supports the integration with OpenAI, DeepSeek, Azure, AIMLAPI, and other OpenAI-compatible APIs.</p><p>In addition, the Plugin also supports logging LLM request information in the access log, such as token usage, model, time to the first response, and more.</p><h2><a aria-hidden="true" tabindex="-1" class="anchor anchor__h2 anchorWithHideOnScrollNavbar_3ly5" id="request-format"></a>Request Format<a class="hash-link" href="#request-format" title="Direct link to heading">#</a></h2><table><thead><tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr></thead><tbody><tr><td><code>messages</code></td><td>Array</td><td>True</td><td>An array of message objects.</td></tr><tr><td><code>messages.role</code></td><td>String</td><td>True</td><td>Role of the message (<code>system</code>, <code>user</code>, <code>assistant</code>).</td></tr><tr><td><code>messages.content</code></td><td>String</td><td>True</td><td>Content of the message.</td></tr></tbody></table><h2><a aria-hidden="true" tabindex="-1" class="anchor anchor__h2 anchorWithHideOnScrollNavbar_3ly5" id="attributes"></a>Attributes<a class="hash-link" href="#attributes" title="Direct link to heading">#</a></h2><table><thead><tr><th>Name</th><th>Type</th><th>Required</th><th>Default</th><th>Valid values</th><th>Description</th></tr></thead><tbody><tr><td>provider</td><td>string</td><td>True</td><td></td><td>[openai, deepseek, azure-openai, aimlapi, openai-compatible]</td><td>LLM service provider. When set to <code>openai</code>, the Plugin will proxy the request to <code>https://api.openai.com/chat/completions</code>. When set to <code>deepseek</code>, the Plugin will proxy the request to <code>https://api.deepseek.com/chat/completions</code>. When set to <code>aimlapi</code>, the Plugin uses the OpenAI-compatible driver and proxies the request to <code>https://api.aimlapi.com/v1/chat/completions</code> by default. When set to <code>openai-compatible</code>, the Plugin will proxy the request to the custom endpoint configured in <code>override</code>.</td></tr><tr><td>auth</td><td>object</td><td>True</td><td></td><td></td><td>Authentication configurations.</td></tr><tr><td>auth.header</td><td>object</td><td>False</td><td></td><td></td><td>Authentication headers. At least one of <code>header</code> or <code>query</code> must be configured.</td></tr><tr><td>auth.query</td><td>object</td><td>False</td><td></td><td></td><td>Authentication query parameters. At least one of <code>header</code> or <code>query</code> must be configured.</td></tr><tr><td>options</td><td>object</td><td>False</td><td></td><td></td><td>Model configurations. In addition to <code>model</code>, you can configure additional parameters and they will be forwarded to the upstream LLM service in the request body. For instance, if you are working with OpenAI, you can configure additional parameters such as <code>temperature</code>, <code>top_p</code>, and <code>stream</code>. See your LLM provider&#x27;s API documentation for more available options.</td></tr><tr><td>options.model</td><td>string</td><td>False</td><td></td><td></td><td>Name of the LLM model, such as <code>gpt-4</code> or <code>gpt-3.5</code>. Refer to the LLM provider&#x27;s API documentation for available models.</td></tr><tr><td>override</td><td>object</td><td>False</td><td></td><td></td><td>Override setting.</td></tr><tr><td>override.endpoint</td><td>string</td><td>False</td><td></td><td></td><td>Custom LLM provider endpoint, required when <code>provider</code> is <code>openai-compatible</code>.</td></tr><tr><td>logging</td><td>object</td><td>False</td><td></td><td></td><td>Logging configurations.</td></tr><tr><td>logging.summaries</td><td>boolean</td><td>False</td><td>false</td><td></td><td>If true, logs request LLM model, duration, request, and response tokens.</td></tr><tr><td>logging.payloads</td><td>boolean</td><td>False</td><td>false</td><td></td><td>If true, logs request and response payload.</td></tr><tr><td>timeout</td><td>integer</td><td>False</td><td>30000</td><td>≥ 1</td><td>Request timeout in milliseconds when requesting the LLM service.</td></tr><tr><td>keepalive</td><td>boolean</td><td>False</td><td>true</td><td></td><td>If true, keeps the connection alive when requesting the LLM service.</td></tr><tr><td>keepalive_timeout</td><td>integer</td><td>False</td><td>60000</td><td>≥ 1000</td><td>Keepalive timeout in milliseconds when connecting to the LLM service.</td></tr><tr><td>keepalive_pool</td><td>integer</td><td>False</td><td>30</td><td></td><td>Keepalive pool size for the LLM service connection.</td></tr><tr><td>ssl_verify</td><td>boolean</td><td>False</td><td>true</td><td></td><td>If true, verifies the LLM service&#x27;s certificate.</td></tr></tbody></table><h2><a aria-hidden="true" tabindex="-1" class="anchor anchor__h2 anchorWithHideOnScrollNavbar_3ly5" id="examples"></a>Examples<a class="hash-link" href="#examples" title="Direct link to heading">#</a></h2><p>The examples below demonstrate how you can configure <code>ai-proxy</code> for different scenarios.</p><div class="admonition admonition-note alert alert--secondary"><div class="admonition-heading"><h5><span class="admonition-icon"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="16" viewBox="0 0 14 16"><path fill-rule="evenodd" d="M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"></path></svg></span>note</h5></div><div class="admonition-content"><p>You can fetch the <code>admin_key</code> from <code>config.yaml</code> and save to an environment variable with the following command:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 bash"><pre tabindex="0" class="prism-code language-bash codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token assign-left variable" style="color:#36acaa">admin_key</span><span class="token operator" style="color:#393A34">=</span><span class="token variable" style="color:#36acaa">$(</span><span class="token variable" style="color:#36acaa">yq </span><span class="token variable string" style="color:#e3116c">&#x27;.deployment.admin.admin_key[0].key&#x27;</span><span class="token variable" style="color:#36acaa"> conf/config.yaml </span><span class="token variable operator" style="color:#393A34">|</span><span class="token variable" style="color:#36acaa"> </span><span class="token variable function" style="color:#d73a49">sed</span><span class="token variable" style="color:#36acaa"> </span><span class="token variable string" style="color:#e3116c">&#x27;s/&quot;//g&#x27;</span><span class="token variable" style="color:#36acaa">)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="proxy-to-openai"></a>Proxy to OpenAI<a class="hash-link" href="#proxy-to-openai" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can configure the API key, model, and other parameters in the <code>ai-proxy</code> Plugin and configure the Plugin on a Route to proxy user prompts to OpenAI.</p><p>Obtain the OpenAI <a href="https://openai.com/blog/openai-api" target="_blank" rel="noopener noreferrer">API key</a> and save it to an environment variable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token builtin class-name">export</span><span class="token plain"> </span><span class="token assign-left variable" style="color:#36acaa">OPENAI_API_KEY</span><span class="token operator" style="color:#393A34">=</span><span class="token operator" style="color:#393A34">&lt;</span><span class="token plain">your-api-key</span><span class="token operator" style="color:#393A34">&gt;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Create a Route and configure the <code>ai-proxy</code> Plugin as such:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-proxy-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token plain">&quot;</span><span class="token variable" style="color:#36acaa">$OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token plain">:</span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with a system prompt and a sample user question in the request body:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Host: api.openai.com&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;system&quot;, &quot;content&quot;: &quot;You are a mathematician&quot; },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> { &quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;What is 1+1?&quot; }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4-0613&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;1+1 equals 2.&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;refusal&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="proxy-to-deepseek"></a>Proxy to DeepSeek<a class="hash-link" href="#proxy-to-deepseek" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can configure the <code>ai-proxy</code> Plugin to proxy requests to DeekSeek.</p><p>Obtain the DeekSeek API key and save it to an environment variable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token builtin class-name">export</span><span class="token plain"> </span><span class="token assign-left variable" style="color:#36acaa">DEEPSEEK_API_KEY</span><span class="token operator" style="color:#393A34">=</span><span class="token operator" style="color:#393A34">&lt;</span><span class="token plain">your-api-key</span><span class="token operator" style="color:#393A34">&gt;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Create a Route and configure the <code>ai-proxy</code> Plugin as such:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-proxy-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;deepseek&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token plain">&quot;</span><span class="token variable" style="color:#36acaa">$DEEPSEEK_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;deepseek-chat&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with a sample question in the request body:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;role&quot;: &quot;system&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;content&quot;: &quot;You are an AI assistant that helps people find information.&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;role&quot;: &quot;user&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;content&quot;: &quot;Write me a 50-word introduction for Apache APISIX.&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ]</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Apache APISIX is a dynamic, real-time, high-performance API gateway and cloud-native platform. It provides rich traffic management features like load balancing, dynamic upstream, canary release, circuit breaking, authentication, observability, and more. Designed for microservices and serverless architectures, APISIX ensures scalability, security, and seamless integration with modern DevOps workflows.&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="proxy-to-azure-openai"></a>Proxy to Azure OpenAI<a class="hash-link" href="#proxy-to-azure-openai" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can configure the <code>ai-proxy</code> Plugin to proxy requests to other LLM services, such as Azure OpenAI.</p><p>Obtain the Azure OpenAI API key and save it to an environment variable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token builtin class-name">export</span><span class="token plain"> </span><span class="token assign-left variable" style="color:#36acaa">AZ_OPENAI_API_KEY</span><span class="token operator" style="color:#393A34">=</span><span class="token operator" style="color:#393A34">&lt;</span><span class="token plain">your-api-key</span><span class="token operator" style="color:#393A34">&gt;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Create a Route and configure the <code>ai-proxy</code> Plugin as such:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-proxy-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/anything&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai-compatible&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;api-key&quot;: &quot;&#x27;</span><span class="token plain">&quot;</span><span class="token variable" style="color:#36acaa">$AZ_OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token plain">:</span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;override&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;endpoint&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;https://api7-auzre-openai.openai.azure.com/openai/deployments/gpt-4/chat/completions?api-version=2024-02-15-preview&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with a sample question in the request body:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/anything&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;messages&quot;: [</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;role&quot;: &quot;system&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;content&quot;: &quot;You are an AI assistant that helps people find information.&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> },</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;role&quot;: &quot;user&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;content&quot;: &quot;Write me a 50-word introduction for Apache APISIX.&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> ],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;max_tokens&quot;: 800,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;temperature&quot;: 0.7,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;frequency_penalty&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;presence_penalty&quot;: 0,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;top_p&quot;: 0.95,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;stop&quot;: null</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;Apache APISIX is a modern, cloud-native API gateway built to handle high-performance and low-latency use cases. It offers a wide range of features, including load balancing, rate limiting, authentication, and dynamic routing, making it an ideal choice for microservices and cloud-native architectures.&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="proxy-to-embedding-models"></a>Proxy to Embedding Models<a class="hash-link" href="#proxy-to-embedding-models" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can configure the <code>ai-proxy</code> Plugin to proxy requests to embedding models. This example will use the OpenAI embedding model endpoint.</p><p>Obtain the OpenAI <a href="https://openai.com/blog/openai-api" target="_blank" rel="noopener noreferrer">API key</a> and save it to an environment variable:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token builtin class-name">export</span><span class="token plain"> </span><span class="token assign-left variable" style="color:#36acaa">OPENAI_API_KEY</span><span class="token operator" style="color:#393A34">=</span><span class="token operator" style="color:#393A34">&lt;</span><span class="token plain">your-api-key</span><span class="token operator" style="color:#393A34">&gt;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Create a Route and configure the <code>ai-proxy</code> Plugin as such:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9180/apisix/admin/routes&quot;</span><span class="token plain"> -X PUT </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;X-API-KEY: </span><span class="token string variable" style="color:#36acaa">${admin_key}</span><span class="token string" style="color:#e3116c">&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;id&quot;: &quot;ai-proxy-route&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;uri&quot;: &quot;/embeddings&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;methods&quot;: [&quot;POST&quot;],</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;plugins&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;ai-proxy&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;provider&quot;: &quot;openai&quot;,</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;auth&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;header&quot;: {</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;Authorization&quot;: &quot;Bearer &#x27;</span><span class="token plain">&quot;</span><span class="token variable" style="color:#36acaa">$OPENAI_API_KEY</span><span class="token string" style="color:#e3116c">&quot;&#x27;&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;options&quot;</span><span class="token plain">:</span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;model&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;text-embedding-3-small&quot;</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;encoding_format&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;float&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">,</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;override&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;endpoint&quot;</span><span class="token builtin class-name">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;https://api.openai.com/v1/embeddings&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Send a POST request to the Route with an input string:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 shell"><pre tabindex="0" class="prism-code language-shell codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">curl</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;http://127.0.0.1:9080/embeddings&quot;</span><span class="token plain"> -X POST </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -H </span><span class="token string" style="color:#e3116c">&quot;Content-Type: application/json&quot;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> -d </span><span class="token string" style="color:#e3116c">&#x27;{</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> &quot;input&quot;: &quot;hello world&quot;</span><br></span><span class="token-line" style="color:#393A34"><span class="token string" style="color:#e3116c"> }&#x27;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>You should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;object&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;list&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;data&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;object&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;embedding&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;embedding&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">-0.0067144386</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">-0.039197803</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">0.034177095</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">0.028763203</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">-0.024785956</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">-0.04201061</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;text-embedding-3-small&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">2</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">2</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor anchor__h3 anchorWithHideOnScrollNavbar_3ly5" id="include-llm-information-in-access-log"></a>Include LLM Information in Access Log<a class="hash-link" href="#include-llm-information-in-access-log" title="Direct link to heading">#</a></h3><p>The following example demonstrates how you can log LLM request related information in the gateway&#x27;s access log to improve analytics and audit. The following variables are available:</p><ul><li><code>request_llm_model</code>: LLM model name specified in the request.</li><li><code>apisix_upstream_response_time</code>: Time taken for APISIX to send the request to the upstream service and receive the full response</li><li><code>request_type</code>: Type of request, where the value could be <code>traditional_http</code>, <code>ai_chat</code>, or <code>ai_stream</code>.</li><li><code>llm_time_to_first_token</code>: Duration from request sending to the first token received from the LLM service, in milliseconds.</li><li><code>llm_model</code>: LLM model.</li><li><code>llm_prompt_tokens</code>: Number of tokens in the prompt.</li><li><code>llm_completion_tokens</code>: Number of chat completion tokens in the prompt.</li></ul><p>Update the access log format in your configuration file to include additional LLM related variables:</p><div class="codeBlockContainer_EiTO"><div style="color:#393A34;background-color:#f6f8fa" class="codeBlockTitle_PQMO">conf/config.yaml</div><div class="codeBlockContent_X2I6 yaml"><pre tabindex="0" class="prism-code language-yaml codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token key atrule" style="color:#00a4db">nginx_config</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">http</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">access_log_format</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;$remote_addr - $remote_user [$time_local] $http_host \&quot;$request_line\&quot; $status $body_bytes_sent $request_time \&quot;$http_referer\&quot; \&quot;$http_user_agent\&quot; $upstream_addr $upstream_status $apisix_upstream_response_time \&quot;$upstream_scheme://$upstream_host$upstream_uri\&quot; \&quot;$apisix_request_id\&quot; \&quot;$request_type\&quot; \&quot;$llm_time_to_first_token\&quot; \&quot;$llm_model\&quot; \&quot;$request_llm_model\&quot; \&quot;$llm_prompt_tokens\&quot; \&quot;$llm_completion_tokens\&quot;&quot;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>Reload APISIX for configuration changes to take effect.</p><p>Now if you create a Route and send a request following the <a href="#proxy-to-openai">Proxy to OpenAI example</a>, you should receive a response similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 json"><pre tabindex="0" class="prism-code language-json codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;model&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;gpt-4-0613&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;choices&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;index&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;message&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;role&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;assistant&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;content&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;1+1 equals 2.&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;refusal&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;annotations&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">[</span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;logprobs&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;finish_reason&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;stop&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;usage&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">23</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;completion_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">8</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;total_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">31</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;prompt_tokens_details&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;cached_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;audio_tokens&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;service_tier&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;default&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token property" style="color:#36acaa">&quot;system_fingerprint&quot;</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token null keyword" style="color:#00009f">null</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>In the gateway&#x27;s access log, you should see a log entry similar to the following:</p><div class="codeBlockContainer_EiTO"><div class="codeBlockContent_X2I6 text"><pre tabindex="0" class="prism-code language-text codeBlock_UxnK thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_W6UD"><span class="token-line" style="color:#393A34"><span class="token plain">192.168.215.1 - - [21/Mar/2025:04:28:03 +0000] api.openai.com &quot;POST /anything HTTP/1.1&quot; 200 804 2.858 &quot;-&quot; &quot;curl/8.6.0&quot; - - - 5765 &quot;http://api.openai.com&quot; &quot;5c5e0b95f8d303cb81e4dc456a4b12d9&quot; &quot;ai_chat&quot; &quot;2858&quot; &quot;gpt-4&quot; &quot;gpt-4&quot; &quot;23&quot; &quot;8&quot;</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" class="copyButton_V-PD clean-btn">Copy</button></div></div><p>The access log entry shows the request type is <code>ai_chat</code>, Apisix upstream response time is <code>5765</code> milliseconds, time to first token is <code>2858</code> milliseconds, Requested LLM model is <code>gpt-4</code>. LLM model is <code>gpt-4</code>, prompt token usage is <code>23</code>, and completion token usage is <code>8</code>.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="/edit#https://github.com/apache/apisix/edit/master/docs/en/latest/plugins/ai-proxy.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_mS5F" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_mt2f"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/docs/apisix/next/terminology/secret/"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">« Secret</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/docs/apisix/next/plugins/ai-proxy-multi/"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">ai-proxy-multi »</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_vrFS thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#description" class="table-of-contents__link">Description</a></li><li><a href="#request-format" class="table-of-contents__link">Request Format</a></li><li><a href="#attributes" class="table-of-contents__link">Attributes</a></li><li><a href="#examples" class="table-of-contents__link">Examples</a><ul><li><a href="#proxy-to-openai" class="table-of-contents__link">Proxy to OpenAI</a></li><li><a href="#proxy-to-deepseek" class="table-of-contents__link">Proxy to DeepSeek</a></li><li><a href="#proxy-to-azure-openai" class="table-of-contents__link">Proxy to Azure OpenAI</a></li><li><a href="#proxy-to-embedding-models" class="table-of-contents__link">Proxy to Embedding Models</a></li><li><a href="#include-llm-information-in-access-log" class="table-of-contents__link">Include LLM Information in Access Log</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="container_MP5Z"><div class="linksRow_iwpv"><div class="linksCol_a1ec"><div>ASF</div><ul><li class="footer__item"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer"><span></span><span>Foundation</span></a></li><li class="footer__item"><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer"><span></span><span>License</span></a></li><li class="footer__item"><a href="https://www.apache.org/events/" target="_blank" rel="noopener noreferrer"><span></span><span>Events</span></a></li><li class="footer__item"><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer"><span></span><span>Security</span></a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer"><span></span><span>Sponsorship</span></a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer"><span></span><span>Thanks</span></a></li></ul></div><div class="linksCol_a1ec"><div>Community</div><ul><li class="footer__item"><a href="https://github.com/apache/apisix/issues" target="_blank" rel="noopener noreferrer"><span></span><span>GitHub</span></a></li><li class="footer__item"><a href="/docs/general/join/"><span></span><span>Slack</span></a></li><li class="footer__item"><a href="https://twitter.com/ApacheAPISIX" target="_blank" rel="noopener noreferrer"><span></span><span>Twitter</span></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UCgPD18cMhOg5rmPVnQhAC8g" target="_blank" rel="noopener noreferrer"><span></span><span>YouTube</span></a></li></ul></div><div class="linksCol_a1ec"><div>More</div><ul><li class="footer__item"><a target="_parent" href="/blog/"><span></span><span>Blog</span></a></li><li class="footer__item"><a target="_parent" href="/showcase/"><span></span><span>Showcase</span></a></li><li class="footer__item"><a target="_parent" href="/plugins/"><span></span><span>Plugin Hub</span></a></li><li class="footer__item"><a href="https://github.com/apache/apisix/milestones" target="_parent" rel="noopener noreferrer"><span></span><span>Roadmap</span></a></li></ul></div></div><div class="copyright_ZfFh"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer"><span style="display:inline-block;width:231.25px;height:40px"></span></a><div>Copyright © 2019-2025 The Apache Software Foundation. Apache APISIX, APISIX®, Apache, the Apache feather logo, and the Apache APISIX project logo are either registered trademarks or trademarks of the Apache Software Foundation.</div></div></footer></div>
<script src="https://apisix-website-static.apiseven.com/assets/js/runtime~main.e29fe45b.js"></script>
<script src="https://apisix-website-static.apiseven.com/assets/js/main.121d6aaf.js"></script>
</body>
</html>