| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| use t::APISIX 'no_plan'; |
| |
| log_level("info"); |
| repeat_each(1); |
| no_long_string(); |
| no_root_location(); |
| |
| |
| my $resp_file = 't/assets/ai-proxy-response.json'; |
| open(my $fh, '<', $resp_file) or die "Could not open file '$resp_file' $!"; |
| my $resp = do { local $/; <$fh> }; |
| close($fh); |
| |
| print "Hello, World!\n"; |
| print $resp; |
| |
| |
| add_block_preprocessor(sub { |
| my ($block) = @_; |
| |
| if (!defined $block->request) { |
| $block->set_value("request", "GET /t"); |
| } |
| |
| my $http_config = $block->http_config // <<_EOC_; |
| server { |
| server_name openai; |
| listen 16724; |
| |
| default_type 'application/json'; |
| |
| location /anything { |
| content_by_lua_block { |
| local json = require("cjson.safe") |
| |
| if ngx.req.get_method() ~= "POST" then |
| ngx.status = 400 |
| ngx.say("Unsupported request method: ", ngx.req.get_method()) |
| end |
| ngx.req.read_body() |
| local body = ngx.req.get_body_data() |
| |
| if body ~= "SELECT * FROM STUDENTS" then |
| ngx.status = 503 |
| ngx.say("passthrough doesn't work") |
| return |
| end |
| ngx.say('{"foo", "bar"}') |
| } |
| } |
| |
| location /v1/chat/completions { |
| content_by_lua_block { |
| local json = require("cjson.safe") |
| |
| if ngx.req.get_method() ~= "POST" then |
| ngx.status = 400 |
| ngx.say("Unsupported request method: ", ngx.req.get_method()) |
| end |
| ngx.req.read_body() |
| local body, err = ngx.req.get_body_data() |
| body, err = json.decode(body) |
| |
| local test_type = ngx.req.get_headers()["test-type"] |
| if test_type == "options" then |
| if body.foo == "bar" then |
| ngx.status = 200 |
| ngx.say("options works") |
| else |
| ngx.status = 500 |
| ngx.say("model options feature doesn't work") |
| end |
| return |
| end |
| |
| local header_auth = ngx.req.get_headers()["authorization"] |
| local query_auth = ngx.req.get_uri_args()["apikey"] |
| |
| if header_auth ~= "Bearer token" and query_auth ~= "apikey" then |
| ngx.status = 401 |
| ngx.say("Unauthorized") |
| return |
| end |
| |
| if header_auth == "Bearer token" or query_auth == "apikey" then |
| ngx.req.read_body() |
| local body, err = ngx.req.get_body_data() |
| body, err = json.decode(body) |
| |
| if not body.messages or #body.messages < 1 then |
| ngx.status = 400 |
| ngx.say([[{ "error": "bad request"}]]) |
| return |
| end |
| |
| if body.messages[1].content == "write an SQL query to get all rows from student table" then |
| ngx.print("SELECT * FROM STUDENTS") |
| return |
| end |
| |
| ngx.status = 200 |
| ngx.say(string.format([[ |
| { |
| "choices": [ |
| { |
| "finish_reason": "stop", |
| "index": 0, |
| "message": { "content": "1 + 1 = 2.", "role": "assistant" } |
| } |
| ], |
| "created": 1723780938, |
| "id": "chatcmpl-9wiSIg5LYrrpxwsr2PubSQnbtod1P", |
| "model": "%s", |
| "object": "chat.completion", |
| "system_fingerprint": "fp_abc28019ad", |
| "usage": { "completion_tokens": 5, "prompt_tokens": 8, "total_tokens": 10 } |
| } |
| ]], body.model)) |
| return |
| end |
| |
| |
| ngx.status = 503 |
| ngx.say("reached the end of the test suite") |
| } |
| } |
| |
| location /random { |
| content_by_lua_block { |
| ngx.say("path override works") |
| } |
| } |
| } |
| _EOC_ |
| |
| $block->set_value("http_config", $http_config); |
| }); |
| |
| run_tests(); |
| |
| __DATA__ |
| |
| === TEST 1: sanity |
| --- config |
| location /t { |
| content_by_lua_block { |
| local configs = { |
| { |
| time_window = 60, |
| }, |
| { |
| limit = 30, |
| }, |
| { |
| limit = 30, |
| time_window = 60, |
| rejected_code = 199, |
| }, |
| { |
| limit = 30, |
| time_window = 60, |
| limit_strategy = "invalid", |
| }, |
| { |
| limit = 30, |
| time_window = 60, |
| instances = { |
| { |
| name = "instance1", |
| limit = 30, |
| time_window = 60, |
| }, |
| { |
| limit = 30, |
| time_window = 60, |
| } |
| }, |
| }, |
| { |
| time_window = 60, |
| instances = { |
| { |
| name = "instance1", |
| limit = 30, |
| time_window = 60, |
| } |
| }, |
| }, |
| { |
| limit = 30, |
| instances = { |
| { |
| name = "instance1", |
| limit = 30, |
| time_window = 60, |
| } |
| }, |
| }, |
| { |
| instances = { |
| { |
| name = "instance1", |
| limit = 30, |
| time_window = 60, |
| } |
| }, |
| }, |
| { |
| limit = 30, |
| time_window = 60, |
| rejected_code = 403, |
| rejected_msg = "rate limit exceeded", |
| limit_strategy = "completion_tokens", |
| }, |
| { |
| limit = 30, |
| time_window = 60, |
| instances = { |
| { |
| name = "instance1", |
| limit = 30, |
| time_window = 60, |
| } |
| }, |
| } |
| } |
| local core = require("apisix.core") |
| local plugin = require("apisix.plugins.ai-rate-limiting") |
| for _, config in ipairs(configs) do |
| local ok, err = plugin.check_schema(config) |
| if not ok then |
| ngx.say(err) |
| else |
| ngx.say("passed") |
| end |
| end |
| ngx.say("done") |
| } |
| } |
| --- response_body |
| property "limit" is required when "time_window" is set |
| property "time_window" is required when "limit" is set |
| property "rejected_code" validation failed: expected 199 to be at least 200 |
| property "limit_strategy" validation failed: matches none of the enum values |
| property "instances" validation failed: failed to validate item 2: property "name" is required |
| property "limit" is required when "time_window" is set |
| property "time_window" is required when "limit" is set |
| passed |
| passed |
| passed |
| done |
| |
| |
| |
| === TEST 2: set route 1, default limit_strategy: total_tokens |
| --- config |
| location /t { |
| content_by_lua_block { |
| local t = require("lib.test_admin").test |
| local code, body = t('/apisix/admin/routes/1', |
| ngx.HTTP_PUT, |
| [[{ |
| "uri": "/ai", |
| "plugins": { |
| "ai-proxy": { |
| "provider": "openai", |
| "auth": { |
| "header": { |
| "Authorization": "Bearer token" |
| } |
| }, |
| "options": { |
| "model": "gpt-35-turbo-instruct", |
| "max_tokens": 512, |
| "temperature": 1.0 |
| }, |
| "override": { |
| "endpoint": "http://localhost:16724" |
| }, |
| "ssl_verify": false |
| }, |
| "ai-rate-limiting": { |
| "limit": 30, |
| "time_window": 60 |
| } |
| }, |
| "upstream": { |
| "type": "roundrobin", |
| "nodes": { |
| "canbeanything.com": 1 |
| } |
| } |
| }]] |
| ) |
| |
| if code >= 300 then |
| ngx.status = code |
| end |
| ngx.say(body) |
| } |
| } |
| --- response_body |
| passed |
| |
| |
| |
| === TEST 3: reject the 3th request |
| --- pipelined_requests eval |
| [ |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| ] |
| --- more_headers |
| Authorization: Bearer token |
| --- error_code eval |
| [200, 200, 200, 503] |
| |
| |
| |
| === TEST 4: set rejected_code to 403, rejected_msg to "rate limit exceeded" |
| --- config |
| location /t { |
| content_by_lua_block { |
| local t = require("lib.test_admin").test |
| local code, body = t('/apisix/admin/routes/1', |
| ngx.HTTP_PUT, |
| [[{ |
| "uri": "/ai", |
| "plugins": { |
| "ai-proxy": { |
| "provider": "openai", |
| "auth": { |
| "header": { |
| "Authorization": "Bearer token" |
| } |
| }, |
| "options": { |
| "model": "gpt-35-turbo-instruct", |
| "max_tokens": 512, |
| "temperature": 1.0 |
| }, |
| "override": { |
| "endpoint": "http://localhost:16724" |
| }, |
| "ssl_verify": false |
| }, |
| "ai-rate-limiting": { |
| "limit": 30, |
| "time_window": 60, |
| "rejected_code": 403, |
| "rejected_msg": "rate limit exceeded" |
| } |
| }, |
| "upstream": { |
| "type": "roundrobin", |
| "nodes": { |
| "canbeanything.com": 1 |
| } |
| } |
| }]] |
| ) |
| |
| if code >= 300 then |
| ngx.status = code |
| end |
| ngx.say(body) |
| } |
| } |
| --- response_body |
| passed |
| |
| |
| |
| === TEST 5: check code and message |
| --- pipelined_requests eval |
| [ |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| ] |
| --- more_headers |
| Authorization: Bearer token |
| --- error_code eval |
| [200, 200, 200, 403] |
| --- response_body eval |
| [ |
| qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/, |
| qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/, |
| qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/, |
| qr/\{"error_msg":"rate limit exceeded"\}/, |
| ] |
| |
| |
| |
| === TEST 6: check rate limit headers |
| --- request |
| POST /ai |
| { "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?"} ] } |
| --- more_headers |
| Authorization: Bearer token |
| --- response_headers |
| X-AI-RateLimit-Limit-ai-proxy-openai: 30 |
| X-AI-RateLimit-Remaining-ai-proxy-openai: 29 |
| X-AI-RateLimit-Reset-ai-proxy-openai: 60 |
| |
| |
| |
| === TEST 7: check rate limit headers after 4 requests |
| --- pipelined_requests eval |
| [ |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| ] |
| --- more_header |
| Authorization: Bearer token |
| --- error_code eval |
| [200, 200, 200, 403] |
| --- response_headers eval |
| [ |
| "X-AI-RateLimit-Remaining-ai-proxy-openai: 29", |
| "X-AI-RateLimit-Remaining-ai-proxy-openai: 19", |
| "X-AI-RateLimit-Remaining-ai-proxy-openai: 9", |
| "X-AI-RateLimit-Remaining-ai-proxy-openai: 0", |
| ] |
| |
| |
| |
| === TEST 8: set route2 with limit_strategy: completion_tokens |
| --- config |
| location /t { |
| content_by_lua_block { |
| local t = require("lib.test_admin").test |
| local code, body = t('/apisix/admin/routes/2', |
| ngx.HTTP_PUT, |
| [[{ |
| "uri": "/ai2", |
| "plugins": { |
| "ai-proxy": { |
| "provider": "openai", |
| "auth": { |
| "header": { |
| "Authorization": "Bearer token" |
| } |
| }, |
| "options": { |
| "model": "gpt-35-turbo-instruct", |
| "max_tokens": 512, |
| "temperature": 1.0 |
| }, |
| "override": { |
| "endpoint": "http://localhost:16724" |
| }, |
| "ssl_verify": false |
| }, |
| "ai-rate-limiting": { |
| "limit": 20, |
| "time_window": 45, |
| "limit_strategy": "completion_tokens" |
| } |
| }, |
| "upstream": { |
| "type": "roundrobin", |
| "nodes": { |
| "canbeanything.com": 1 |
| } |
| } |
| }]] |
| ) |
| |
| if code >= 300 then |
| ngx.status = code |
| end |
| ngx.say(body) |
| } |
| } |
| --- response_body |
| passed |
| |
| |
| |
| === TEST 9: reject the 5th request |
| --- pipelined_requests eval |
| [ |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| ] |
| --- more_headers |
| Authorization: Bearer token |
| --- error_code eval |
| [200, 200, 200, 200, 503] |
| |
| |
| |
| === TEST 10: check rate limit headers |
| --- request |
| POST /ai2 |
| { "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?"} ] } |
| --- more_headers |
| Authorization: Bearer token |
| --- response_headers |
| X-AI-RateLimit-Limit-ai-proxy-openai: 20 |
| X-AI-RateLimit-Remaining-ai-proxy-openai: 19 |
| X-AI-RateLimit-Reset-ai-proxy-openai: 45 |
| |
| |
| |
| === TEST 11: multi-request |
| --- pipelined_requests eval |
| [ |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| ] |
| --- more_header |
| Authorization: Bearer token |
| --- error_code eval |
| [200, 200, 200, 200, 503] |
| --- response_headers eval |
| [ |
| "X-AI-RateLimit-Remaining-ai-proxy-openai: 19", |
| "X-AI-RateLimit-Remaining-ai-proxy-openai: 14", |
| "X-AI-RateLimit-Remaining-ai-proxy-openai: 9", |
| "X-AI-RateLimit-Remaining-ai-proxy-openai: 4", |
| "X-AI-RateLimit-Remaining-ai-proxy-openai: 0", |
| ] |
| |
| |
| |
| === TEST 12: request route 1 and route 2 |
| --- pipelined_requests eval |
| [ |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| ] |
| --- more_headers |
| Authorization: Bearer token |
| --- error_code eval |
| [200, 200, 200, 200, 200, 200, 200, 403, 503] |
| |
| |
| |
| === TEST 13: ai-rate-limiting & ai-proxy-multi, with instance_health_and_rate_limiting strategy |
| --- config |
| location /t { |
| content_by_lua_block { |
| local t = require("lib.test_admin").test |
| local code, body = t('/apisix/admin/routes/1', |
| ngx.HTTP_PUT, |
| [[{ |
| "uri": "/ai", |
| "plugins": { |
| "ai-proxy-multi": { |
| "fallback_strategy": "instance_health_and_rate_limiting", |
| "instances": [ |
| { |
| "name": "openai-gpt4", |
| "provider": "openai", |
| "weight": 1, |
| "priority": 1, |
| "auth": { |
| "header": { |
| "Authorization": "Bearer token" |
| } |
| }, |
| "options": { |
| "model": "gpt-4" |
| }, |
| "override": { |
| "endpoint": "http://localhost:16724" |
| } |
| }, |
| { |
| "name": "openai-gpt3", |
| "provider": "openai", |
| "weight": 1, |
| "priority": 0, |
| "auth": {"header": {"Authorization": "Bearer token"}}, |
| "options": {"model": "gpt-3"}, |
| "override": {"endpoint": "http://localhost:16724"} |
| } |
| ], |
| "ssl_verify": false |
| }, |
| "ai-rate-limiting": { |
| "limit": 10, |
| "time_window": 60 |
| } |
| }, |
| "upstream": { |
| "type": "roundrobin", |
| "nodes": { |
| "canbeanything.com": 1 |
| } |
| } |
| }]] |
| ) |
| |
| if code >= 300 then |
| ngx.status = code |
| end |
| ngx.say(body) |
| } |
| } |
| --- response_body |
| passed |
| |
| |
| |
| === TEST 14: fallback strategy should works |
| --- config |
| location /t { |
| content_by_lua_block { |
| local t = require("lib.test_admin").test |
| local core = require("apisix.core") |
| local code, _, body = t("/ai", |
| ngx.HTTP_POST, |
| [[{ |
| "messages": [ |
| { "role": "system", "content": "You are a mathematician" }, |
| { "role": "user", "content": "What is 1+1?" } |
| ] |
| }]], |
| nil, |
| { |
| ["test-type"] = "options", |
| ["Content-Type"] = "application/json", |
| } |
| ) |
| |
| assert(code == 200, "first request should be successful") |
| assert(core.string.find(body, "gpt-4"), |
| "first request should be handled by higher priority instance") |
| |
| local code, _, body = t("/ai", |
| ngx.HTTP_POST, |
| [[{ |
| "messages": [ |
| { "role": "system", "content": "You are a mathematician" }, |
| { "role": "user", "content": "What is 1+1?" } |
| ] |
| }]], |
| nil, |
| { |
| ["test-type"] = "options", |
| ["Content-Type"] = "application/json", |
| } |
| ) |
| |
| assert(code == 200, "second request should be successful") |
| assert(core.string.find(body, "gpt-3"), |
| "second request should be handled by lower priority instance") |
| |
| local code, body = t("/ai", |
| ngx.HTTP_POST, |
| [[{ |
| "messages": [ |
| { "role": "system", "content": "You are a mathematician" }, |
| { "role": "user", "content": "What is 1+1?" } |
| ] |
| }]], |
| nil, |
| { |
| ["test-type"] = "options", |
| ["Content-Type"] = "application/json", |
| } |
| ) |
| |
| assert(code == 503, "third request should be failed") |
| assert(core.string.find(body, "all servers tried"), "all servers tried") |
| |
| ngx.say("passed") |
| } |
| } |
| --- response_body |
| passed |
| |
| |
| |
| === TEST 15: limiting to only one instance |
| --- config |
| location /t { |
| content_by_lua_block { |
| local t = require("lib.test_admin").test |
| local code, body = t('/apisix/admin/routes/1', |
| ngx.HTTP_PUT, |
| [[{ |
| "uri": "/ai", |
| "plugins": { |
| "ai-proxy-multi": { |
| "fallback_strategy": "instance_health_and_rate_limiting", |
| "instances": [ |
| { |
| "name": "openai-gpt4", |
| "provider": "openai", |
| "weight": 1, |
| "priority": 1, |
| "auth": {"header": {"Authorization": "Bearer token"}}, |
| "options": {"model": "gpt-4"}, |
| "override": {"endpoint": "http://localhost:16724"} |
| }, |
| { |
| "name": "openai-gpt3", |
| "provider": "openai", |
| "weight": 1, |
| "priority": 0, |
| "auth": {"header": {"Authorization": "Bearer token"}}, |
| "options": {"model": "gpt-3"}, |
| "override": {"endpoint": "http://localhost:16724"} |
| } |
| ], |
| "ssl_verify": false |
| }, |
| "ai-rate-limiting": { |
| "instances": [ |
| { |
| "name": "openai-gpt4", |
| "limit": 20, |
| "time_window": 60 |
| } |
| ] |
| } |
| }, |
| "upstream": { |
| "type": "roundrobin", |
| "nodes": { |
| "canbeanything.com": 1 |
| } |
| } |
| }]] |
| ) |
| |
| if code >= 300 then |
| ngx.status = code |
| end |
| ngx.say(body) |
| } |
| } |
| --- response_body |
| passed |
| |
| |
| |
| === TEST 16: 10 requests, 8 should be handled by gpt-3, 2 should be handled by gpt-4 |
| --- config |
| location /t { |
| content_by_lua_block { |
| local t = require("lib.test_admin").test |
| local core = require("apisix.core") |
| |
| local instances_count = {} |
| for i = 1, 10 do |
| local code, _, body = t("/ai", |
| ngx.HTTP_POST, |
| [[{ |
| "messages": [ |
| { "role": "system", "content": "You are a mathematician" }, |
| { "role": "user", "content": "What is 1+1?" } |
| ] |
| }]], |
| nil, |
| { |
| ["test-type"] = "options", |
| ["Content-Type"] = "application/json", |
| } |
| ) |
| assert(code == 200, "first request should be successful") |
| if core.string.find(body, "gpt-4") then |
| instances_count["gpt-4"] = (instances_count["gpt-4"] or 0) + 1 |
| else |
| instances_count["gpt-3"] = (instances_count["gpt-3"] or 0) + 1 |
| end |
| end |
| |
| ngx.log(ngx.INFO, "instances_count test:", core.json.delay_encode(instances_count)) |
| |
| assert(instances_count["gpt-4"] <= 2, "gpt-4 should be handled by higher priority instance") |
| assert(instances_count["gpt-3"] >= 8, "gpt-3 should be handled by lower priority instance") |
| ngx.say("passed") |
| } |
| } |
| --- response_body |
| passed |
| |
| |
| |
| === TEST 17: each instance uses different current limiting |
| --- config |
| location /t { |
| content_by_lua_block { |
| local t = require("lib.test_admin").test |
| local code, body = t('/apisix/admin/routes/1', |
| ngx.HTTP_PUT, |
| [[{ |
| "uri": "/ai", |
| "plugins": { |
| "ai-proxy-multi": { |
| "fallback_strategy": "instance_health_and_rate_limiting", |
| "instances": [ |
| { |
| "name": "openai-gpt4", |
| "provider": "openai", |
| "weight": 1, |
| "priority": 1, |
| "auth": { |
| "header": { |
| "Authorization": "Bearer token" |
| } |
| }, |
| "options": { |
| "model": "gpt-4" |
| }, |
| "override": { |
| "endpoint": "http://localhost:16724" |
| } |
| }, |
| { |
| "name": "openai-gpt3", |
| "provider": "openai", |
| "weight": 1, |
| "priority": 0, |
| "auth": {"header": {"Authorization": "Bearer token"}}, |
| "options": {"model": "gpt-3"}, |
| "override": {"endpoint": "http://localhost:16724"} |
| } |
| ], |
| "ssl_verify": false |
| }, |
| "ai-rate-limiting": {"instances": [{"name": "openai-gpt3","limit": 50,"time_window": 60},{"name": "openai-gpt4","limit": 20,"time_window": 60}] |
| } |
| }, |
| "upstream": { |
| "type": "roundrobin", |
| "nodes": { |
| "canbeanything.com": 1 |
| } |
| } |
| }]] |
| ) |
| |
| if code >= 300 then |
| ngx.status = code |
| end |
| ngx.say(body) |
| } |
| } |
| --- response_body |
| passed |
| |
| |
| |
| === TEST 18: gpt3 allows 5 requests, gpt4 allows 2 requests |
| --- pipelined_requests eval |
| [ |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| ] |
| --- more_headers |
| Authorization: Bearer token |
| --- error_code eval |
| [200, 200, 200, 200, 200, 200, 200, 503, 503] |
| |
| |
| |
| === TEST 19: set limit & instances |
| --- config |
| location /t { |
| content_by_lua_block { |
| local t = require("lib.test_admin").test |
| local code, body = t('/apisix/admin/routes/1', |
| ngx.HTTP_PUT, |
| [[{ |
| "uri": "/ai", |
| "plugins": { |
| "ai-proxy-multi": { |
| "fallback_strategy": "instance_health_and_rate_limiting", |
| "instances": [ |
| { |
| "name": "openai-gpt4", |
| "provider": "openai", |
| "weight": 1, |
| "priority": 1, |
| "auth": { |
| "header": { |
| "Authorization": "Bearer token" |
| } |
| }, |
| "options": { |
| "model": "gpt-4" |
| }, |
| "override": { |
| "endpoint": "http://localhost:16724" |
| } |
| }, |
| { |
| "name": "openai-gpt3", |
| "provider": "openai", |
| "weight": 1, |
| "priority": 0, |
| "auth": {"header": {"Authorization": "Bearer token"}}, |
| "options": {"model": "gpt-3"}, |
| "override": {"endpoint": "http://localhost:16724"} |
| } |
| ], |
| "ssl_verify": false |
| }, |
| "ai-rate-limiting": {"limit": 20, "time_window": 60, "instances": [{"name": "openai-gpt3","limit": 50,"time_window": 60}] |
| } |
| }, |
| "upstream": { |
| "type": "roundrobin", |
| "nodes": { |
| "canbeanything.com": 1 |
| } |
| } |
| }]] |
| ) |
| |
| if code >= 300 then |
| ngx.status = code |
| end |
| ngx.say(body) |
| } |
| } |
| --- response_body |
| passed |
| |
| |
| |
| === TEST 20: gpt3 allows 5 requests, gpt4 allows 2 requests |
| --- pipelined_requests eval |
| [ |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", |
| ] |
| --- more_headers |
| Authorization: Bearer token |
| --- error_code eval |
| [200, 200, 200, 200, 200, 200, 200, 503, 503] |