| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| BEGIN { |
| if ($ENV{TEST_NGINX_CHECK_LEAK}) { |
| $SkipReason = "unavailable for the hup tests"; |
| |
| } else { |
| $ENV{TEST_NGINX_USE_HUP} = 1; |
| undef $ENV{TEST_NGINX_USE_STAP}; |
| } |
| } |
| |
| use t::APISIX 'no_plan'; |
| |
| repeat_each(1); |
| no_long_string(); |
| no_shuffle(); |
| no_root_location(); |
| |
| add_block_preprocessor(sub { |
| my ($block) = @_; |
| |
| if (!defined $block->request) { |
| $block->set_value("request", "GET /t"); |
| } |
| my $user_yaml_config = <<_EOC_; |
| plugin_attr: |
| prometheus: |
| refresh_interval: 0.1 |
| plugins: |
| - ai-proxy-multi |
| - prometheus |
| - public-api |
| _EOC_ |
| $block->set_value("extra_yaml_config", $user_yaml_config); |
| my $http_config = $block->http_config // <<_EOC_; |
| server { |
| listen 6724; |
| |
| default_type 'application/json'; |
| |
| location /v1/chat/completions { |
| content_by_lua_block { |
| ngx.exec("\@chat") |
| } |
| } |
| |
| |
| location /delay/v1/chat/completions { |
| content_by_lua_block { |
| ngx.sleep(2) |
| ngx.exec("\@chat") |
| } |
| } |
| |
| location \@chat { |
| content_by_lua_block { |
| ngx.status = 200 |
| ngx.say([[ |
| { |
| "choices": [ |
| { |
| "message": { |
| "content": "1 + 1 = 2.", |
| "role": "assistant" |
| } |
| } |
| ], |
| "usage": { |
| "completion_tokens": 5, |
| "prompt_tokens": 8, |
| "total_tokens": 13 |
| } |
| } |
| ]]) |
| } |
| } |
| } |
| _EOC_ |
| |
| $block->set_value("http_config", $http_config); |
| }); |
| |
| run_tests; |
| |
| __DATA__ |
| |
| === TEST 1: create a route with prometheus and ai-proxy-multi plugin |
| --- config |
| location /t { |
| content_by_lua_block { |
| local data = { |
| { |
| url = "/apisix/admin/routes/1", |
| data = [[{ |
| "plugins": { |
| "prometheus": {}, |
| "ai-proxy-multi": { |
| "instances": [ |
| { |
| "name": "openai-gpt4", |
| "provider": "openai", |
| "weight": 1, |
| "auth": { |
| "header": { |
| "Authorization": "Bearer token" |
| } |
| }, |
| "options": { |
| "model": "gpt-4" |
| }, |
| "override": { |
| "endpoint": "http://localhost:6724" |
| } |
| } |
| ] |
| } |
| }, |
| "uri": "/chat" |
| }]], |
| }, |
| { |
| url = "/apisix/admin/routes/metrics", |
| data = [[{ |
| "plugins": { |
| "public-api": {} |
| }, |
| "uri": "/apisix/prometheus/metrics" |
| }]] |
| }, |
| } |
| |
| local t = require("lib.test_admin").test |
| |
| for _, data in ipairs(data) do |
| local _, body = t(data.url, ngx.HTTP_PUT, data.data) |
| ngx.say(body) |
| end |
| } |
| } |
| --- response_body eval |
| "passed\n" x 2 |
| |
| |
| |
| === TEST 2: send a chat request |
| --- request |
| POST /chat |
| {"messages":[{"role":"user","content":"What is 1+1?"}], "model": "gpt-3"} |
| --- error_code: 200 |
| |
| |
| |
| === TEST 3: assert llm_lantency_bucket metric |
| --- request |
| GET /apisix/prometheus/metrics |
| --- response_body eval |
| qr/apisix_llm_latency_bucket\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",request_llm_model="gpt-3",llm_model="gpt-4",le="\d+"\} 1/ |
| |
| |
| |
| === TEST 4: assert llm_lantency_count metric |
| --- request |
| GET /apisix/prometheus/metrics |
| --- response_body eval |
| qr/apisix_llm_latency_count\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",request_llm_model="gpt-3",llm_model="gpt-4"\} 1/ |
| |
| |
| |
| === TEST 5: assert llm_lantency_sum metric |
| --- request |
| GET /apisix/prometheus/metrics |
| --- response_body eval |
| qr/apisix_llm_latency_count\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",request_llm_model="gpt-3",llm_model="gpt-4"\} \d+/ |
| |
| |
| |
| === TEST 6: assert llm_prompt_tokens metric |
| --- request |
| GET /apisix/prometheus/metrics |
| --- response_body eval |
| qr/apisix_llm_prompt_tokens\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",request_llm_model="gpt-3",llm_model="gpt-4"\} 8/ |
| |
| |
| |
| === TEST 7: assert llm_completion_tokens metric |
| --- request |
| GET /apisix/prometheus/metrics |
| --- response_body eval |
| qr/apisix_llm_completion_tokens\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",request_llm_model="gpt-3",llm_model="gpt-4"\} 5/ |
| |
| |
| |
| === TEST 8: assert llm_active_connections metric |
| --- request |
| GET /apisix/prometheus/metrics |
| --- response_body eval |
| qr/apisix_llm_active_connections\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",request_llm_model="gpt-3",llm_model="gpt-4"\} 0/ |
| |
| |
| |
| === TEST 9: change ai-proxy-multi to use a slower ai endpoint |
| --- config |
| location /t { |
| content_by_lua_block { |
| local data = { |
| { |
| url = "/apisix/admin/routes/1", |
| data = [[{ |
| "plugins": { |
| "prometheus": {}, |
| "ai-proxy-multi": { |
| "instances": [ |
| { |
| "name": "openai-gpt4", |
| "provider": "openai", |
| "weight": 1, |
| "auth": { |
| "header": { |
| "Authorization": "Bearer token" |
| } |
| }, |
| "options": { |
| "model": "gpt-4" |
| }, |
| "override": { |
| "endpoint": "http://localhost:6724/delay/v1/chat/completions" |
| } |
| } |
| ] |
| } |
| }, |
| "uri": "/chat" |
| }]], |
| }, |
| } |
| local t = require("lib.test_admin").test |
| for _, data in ipairs(data) do |
| local _, body = t(data.url, ngx.HTTP_PUT, data.data) |
| ngx.say(body) |
| end |
| } |
| } |
| --- response_body eval |
| "passed\n" |
| |
| |
| |
| === TEST 10: assert llm_active_connections metric when the ai endpoint is slow |
| --- config |
| location /t { |
| content_by_lua_block { |
| local core = require("apisix.core") |
| local res_list = {} |
| for i = 1, 3 do |
| local url = "http://127.0.0.1:" .. ngx.var.server_port .. "/chat" |
| local function send_chat_request(idx) |
| local http = require "resty.http" |
| local httpc = http.new() |
| local res = httpc:request_uri( |
| url, |
| { |
| method = "POST", |
| body = [[ {"messages":[{"role":"user","content":"What is 1+1?"}]} ]], |
| }) |
| res_list[idx] = res |
| end |
| ngx.timer.at(0, send_chat_request, i) |
| end |
| ngx.sleep(1) |
| local http = require "resty.http" |
| local httpc = http.new() |
| local metric_resp = httpc:request_uri("http://127.0.0.1:" .. ngx.var.server_port .. "/apisix/prometheus/metrics") |
| if not string.find(metric_resp.body, [[apisix_llm_active_connections{.*} 3]]) then |
| ngx.say(metric_resp.body) |
| ngx.say("llm_active_connections should be 3") |
| return |
| end |
| ngx.sleep(1) |
| for _, res in ipairs(res_list) do |
| if res.status ~= 200 then |
| ngx.say("failed to send chat request") |
| return |
| end |
| end |
| metric_resp = httpc:request_uri("http://127.0.0.1:" .. ngx.var.server_port .. "/apisix/prometheus/metrics") |
| if not string.find(metric_resp.body, [[apisix_llm_active_connections{.*} 0]]) then |
| ngx.say(metric_resp.body) |
| ngx.say("llm_active_connections should be 0 after all requests are done") |
| return |
| end |
| ngx.say("success") |
| } |
| } |
| --- request |
| GET /t |
| --- response_body |
| success |