blob: d012e2e7c2628dce19446d904d3f1d4d92cd0f14 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
use t::APISIX 'no_plan';
log_level("debug");
repeat_each(1);
no_long_string();
no_root_location();
add_block_preprocessor(sub {
my ($block) = @_;
if (!defined $block->request) {
$block->set_value("request", "GET /t");
}
my $http_config = $block->http_config // <<_EOC_;
server {
listen 6724;
default_type 'application/json';
location /v1/chat/completions {
content_by_lua_block {
ngx.status = 200
ngx.say([[
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": { "content": "I will kill you.", "role": "assistant" }
}
],
"created": 1723780938,
"id": "chatcmpl-9wiSIg5LYrrpxwsr2PubSQnbtod1P",
"model": "gpt-3.5-turbo",
"object": "chat.completion",
"usage": { "completion_tokens": 5, "prompt_tokens": 8, "total_tokens": 10 }
}
]])
}
}
location / {
content_by_lua_block {
local core = require("apisix.core")
ngx.req.read_body()
local body, err = ngx.req.get_body_data()
if not body then
ngx.status(400)
return
end
ngx.status = 200
if core.string.find(body, "kill") then
ngx.say([[
{
"Message": "OK",
"Data": {
"Advice": [
{
"HitLabel": "violent_incidents",
"Answer": "As an AI language model, I cannot write unethical or controversial content for you."
}
],
"RiskLevel": "high",
"Result": [
{
"RiskWords": "kill",
"Description": "suspected extremist content",
"Confidence": 100.0,
"Label": "violent_incidents"
}
]
},
"Code": 200
}
]])
else
ngx.say([[
{
"RequestId": "3262D562-1FBA-5ADF-86CB-3087603A4DF3",
"Message": "OK",
"Data": {
"RiskLevel": "none",
"Result": [
{
"Description": "no risk detected",
"Label": "nonLabel"
}
]
},
"Code": 200
}
]])
end
}
}
}
_EOC_
$block->set_value("http_config", $http_config);
});
run_tests();
__DATA__
=== TEST 1: create a route with ai-aliyun-content-moderation plugin only
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/chat",
"plugins": {
"ai-aliyun-content-moderation": {
"endpoint": "http://localhost:6724",
"region_id": "cn-shanghai",
"access_key_id": "fake-key-id",
"access_key_secret": "fake-key-secret",
"risk_level_bar": "high",
"check_request": true
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 2: use ai-aliyun-content-moderation plugin without ai-proxy or ai-proxy-multi plugin should failed
--- request
POST /chat
{"prompt": "What is 1+1?"}
--- error_code: 500
--- response_body_chomp
no ai instance picked, ai-aliyun-content-moderation plugin must be used with ai-proxy or ai-proxy-multi plugin
=== TEST 3: check prompt in request
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/chat",
"plugins": {
"ai-proxy": {
"provider": "openai",
"auth": {
"header": {
"Authorization": "Bearer wrongtoken"
}
},
"override": {
"endpoint": "http://localhost:6724"
}
},
"ai-aliyun-content-moderation": {
"endpoint": "http://localhost:6724",
"region_id": "cn-shanghai",
"access_key_id": "fake-key-id",
"access_key_secret": "fake-key-secret",
"risk_level_bar": "high",
"check_request": true
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 4: invalid chat completions request should fail
--- request
POST /chat
{"prompt": "What is 1+1?"}
--- error_code: 400
--- response_body_chomp
request format doesn't match schema: property "messages" is required
=== TEST 5: non-violent prompt should succeed
--- request
POST /chat
{ "messages": [ { "role": "user", "content": "What is 1+1?"} ] }
--- error_code: 200
--- response_body_like eval
qr/kill you/
=== TEST 6: violent prompt should failed
--- request
POST /chat
{ "messages": [ { "role": "user", "content": "I want to kill you"} ] }
--- error_code: 200
--- response_body_like eval
qr/As an AI language model, I cannot write unethical or controversial content for you./
=== TEST 7: check ai response (stream=false)
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/chat",
"plugins": {
"ai-proxy": {
"provider": "openai",
"auth": {
"header": {
"Authorization": "Bearer wrongtoken"
}
},
"override": {
"endpoint": "http://localhost:6724"
}
},
"ai-aliyun-content-moderation": {
"endpoint": "http://localhost:6724",
"region_id": "cn-shanghai",
"access_key_id": "fake-key-id",
"access_key_secret": "fake-key-secret",
"risk_level_bar": "high",
"check_request": true,
"check_response": true,
"deny_code": 400,
"deny_message": "your request is rejected"
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 8: violent response should failed
--- request
POST /chat
{ "messages": [ { "role": "user", "content": "What is 1+1?"} ] }
--- error_code: 400
--- response_body_like eval
qr/your request is rejected/
=== TEST 9: check ai request
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
for _, provider in ipairs({"openai", "deepseek", "openai-compatible"}) do
local code, body = t('/apisix/admin/routes/' .. provider,
ngx.HTTP_PUT,
string.format([[{
"uri": "/chat-%s",
"plugins": {
"ai-proxy": {
"provider": "%s",
"auth": {
"header": {
"Authorization": "Bearer wrongtoken"
}
},
"override": {
"endpoint": "http://localhost:6724/v1/chat/completions"
}
},
"ai-aliyun-content-moderation": {
"endpoint": "http://localhost:6724",
"region_id": "cn-shanghai",
"access_key_id": "fake-key-id",
"access_key_secret": "fake-key-secret",
"risk_level_bar": "high",
"check_request": true,
"check_response": false,
"deny_code": 400,
"deny_message": "your request is rejected"
}
}
}]], provider, provider)
)
if code >= 300 then
ngx.status = code
return
end
end
ngx.say("passed")
}
}
--- response_body
passed
=== TEST 10: violent response should failed for openai provider
--- request
POST /chat-openai
{ "messages": [ { "role": "user", "content": "I want to kill you"} ] }
--- error_code: 400
--- response_body_like eval
qr/your request is rejected/
=== TEST 11: violent response should failed for deepseek provider
--- request
POST /chat-deepseek
{ "messages": [ { "role": "user", "content": "I want to kill you"} ] }
--- error_code: 400
--- response_body_like eval
qr/your request is rejected/
=== TEST 12: violent response should failed for openai-compatible provider
--- request
POST /chat-openai-compatible
{ "messages": [ { "role": "user", "content": "I want to kill you"} ] }
--- error_code: 400
--- response_body_like eval
qr/your request is rejected/
=== TEST 13: content moderation should keep usage data in response
--- request
POST /chat-openai
{"messages":[{"role":"user","content":"I want to kill you"}]}
--- error_code: 400
--- response_body_like eval
qr/completion_tokens/
=== TEST 14: content moderation should keep real llm model in response
--- request
POST /chat-openai
{"model": "gpt-3.5-turbo","messages":[{"role":"user","content":"I want to kill you"}]}
--- error_code: 400
--- response_body_like eval
qr/gpt-3.5-turbo/
=== TEST 15: content moderation should keep usage data in response
--- request
POST /chat-openai
{"messages":[{"role":"user","content":"I want to kill you"}]}
--- error_code: 400
--- response_body_like eval
qr/completion_tokens/
=== TEST 16: content moderation should keep real llm model in response
--- request
POST /chat-openai
{"model": "gpt-3.5-turbo","messages":[{"role":"user","content":"I want to kill you"}]}
--- error_code: 400
--- response_body_like eval
qr/gpt-3.5-turbo/
=== TEST 17: set route with stream = true (SSE) and stream_mode = final_packet
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/anything",
"plugins": {
"ai-proxy-multi": {
"instances": [
{
"name": "self-hosted",
"provider": "openai-compatible",
"weight": 1,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "custom-instruct",
"max_tokens": 512,
"temperature": 1.0,
"stream": true
},
"override": {
"endpoint": "http://localhost:7737/v1/chat/completions?offensive=true"
}
}
],
"ssl_verify": false
},
"ai-aliyun-content-moderation": {
"endpoint": "http://localhost:6724",
"region_id": "cn-shanghai",
"access_key_id": "fake-key-id",
"access_key_secret": "fake-key-secret",
"risk_level_bar": "high",
"check_request": false,
"check_response": true,
"deny_code": 400,
"deny_message": "your request is rejected"
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 18: test is SSE works as expected when response is offensive
--- config
location /t {
content_by_lua_block {
local http = require("resty.http")
local httpc = http.new()
local core = require("apisix.core")
local ok, err = httpc:connect({
scheme = "http",
host = "localhost",
port = ngx.var.server_port,
})
if not ok then
ngx.status = 500
ngx.say(err)
return
end
local params = {
method = "POST",
headers = {
["Content-Type"] = "application/json",
},
path = "/anything",
body = [[{
"messages": [
{ "role": "system", "content": "some content" }
],
"stream": true
}]],
}
local res, err = httpc:request(params)
if not res then
ngx.status = 500
ngx.say(err)
return
end
local final_res = {}
local inspect = require("inspect")
while true do
local chunk, err = res.body_reader() -- will read chunk by chunk
core.log.warn("CHUNK IS ", inspect(chunk))
if err then
core.log.error("failed to read response chunk: ", err)
break
end
if not chunk then
break
end
core.table.insert_tail(final_res, chunk)
end
ngx.print(final_res[5])
}
}
--- response_body_like eval
qr/"risk_level":"high"/
=== TEST 19: set route with stream = true (SSE) and stream_mode = realtime
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/anything",
"plugins": {
"ai-proxy-multi": {
"instances": [
{
"name": "self-hosted",
"provider": "openai-compatible",
"weight": 1,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "custom-instruct",
"max_tokens": 512,
"temperature": 1.0,
"stream": true
},
"override": {
"endpoint": "http://localhost:7737/v1/chat/completions?offensive=true"
}
}
],
"ssl_verify": false
},
"ai-aliyun-content-moderation": {
"endpoint": "http://localhost:6724",
"region_id": "cn-shanghai",
"access_key_id": "fake-key-id",
"access_key_secret": "fake-key-secret",
"risk_level_bar": "high",
"check_request": false,
"check_response": true,
"deny_code": 400,
"deny_message": "your request is rejected",
"stream_check_mode": "realtime",
"stream_check_cache_size": 5
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 20: test is SSE works as expected when third response chunk is offensive and stream_mode = realtime
--- config
location /t {
content_by_lua_block {
local http = require("resty.http")
local httpc = http.new()
local core = require("apisix.core")
local ok, err = httpc:connect({
scheme = "http",
host = "localhost",
port = ngx.var.server_port,
})
if not ok then
ngx.status = 500
ngx.say(err)
return
end
local params = {
method = "POST",
headers = {
["Content-Type"] = "application/json",
},
path = "/anything",
body = [[{
"messages": [
{ "role": "system", "content": "some content" }
],
"stream": true
}]],
}
local res, err = httpc:request(params)
if not res then
ngx.status = 500
ngx.say(err)
return
end
local final_res = {}
local inspect = require("inspect")
while true do
local chunk, err = res.body_reader() -- will read chunk by chunk
core.log.warn("CHUNK IS ", inspect(chunk))
if err then
core.log.error("failed to read response chunk: ", err)
break
end
if not chunk then
break
end
core.table.insert_tail(final_res, chunk)
end
ngx.print(final_res[3])
}
}
--- response_body_like eval
qr/your request is rejected/
--- grep_error_log eval
qr/execute content moderation/
--- grep_error_log_out
execute content moderation
execute content moderation
=== TEST 21: set route with stream = true (SSE) and stream_mode = realtime with larger buffer and large timeout
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/anything",
"plugins": {
"ai-proxy-multi": {
"instances": [
{
"name": "self-hosted",
"provider": "openai-compatible",
"weight": 1,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "custom-instruct",
"max_tokens": 512,
"temperature": 1.0,
"stream": true
},
"override": {
"endpoint": "http://localhost:7737/v1/chat/completions"
}
}
],
"ssl_verify": false
},
"ai-aliyun-content-moderation": {
"endpoint": "http://localhost:6724",
"region_id": "cn-shanghai",
"access_key_id": "fake-key-id",
"access_key_secret": "fake-key-secret",
"risk_level_bar": "high",
"check_request": false,
"check_response": true,
"deny_code": 400,
"deny_message": "your request is rejected",
"stream_check_mode": "realtime",
"stream_check_cache_size": 30000,
"stream_check_interval": 30
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 22: test is SSE works, stream_mode = realtime, large buffer + large timeout but content moderation should be called once
--- config
location /t {
content_by_lua_block {
local http = require("resty.http")
local httpc = http.new()
local core = require("apisix.core")
local ok, err = httpc:connect({
scheme = "http",
host = "localhost",
port = ngx.var.server_port,
})
if not ok then
ngx.status = 500
ngx.say(err)
return
end
local params = {
method = "POST",
headers = {
["Content-Type"] = "application/json",
},
path = "/anything",
body = [[{
"messages": [
{ "role": "system", "content": "some content" }
],
"stream": true
}]],
}
local res, err = httpc:request(params)
if not res then
ngx.status = 500
ngx.say(err)
return
end
local final_res = {}
local inspect = require("inspect")
while true do
local chunk, err = res.body_reader() -- will read chunk by chunk
core.log.warn("CHUNK IS ", inspect(chunk))
if err then
core.log.error("failed to read response chunk: ", err)
break
end
if not chunk then
break
end
core.table.insert_tail(final_res, chunk)
end
ngx.print(#final_res .. final_res[6])
}
}
--- response_body_like eval
qr/6data:/
--- grep_error_log eval
qr/execute content moderation/
--- grep_error_log_out
execute content moderation
=== TEST 23: set route with stream = true (SSE) and stream_mode = realtime with small buffer
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/anything",
"plugins": {
"ai-proxy-multi": {
"instances": [
{
"name": "self-hosted",
"provider": "openai-compatible",
"weight": 1,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "custom-instruct",
"max_tokens": 512,
"temperature": 1.0,
"stream": true
},
"override": {
"endpoint": "http://localhost:7737/v1/chat/completions"
}
}
],
"ssl_verify": false
},
"ai-aliyun-content-moderation": {
"endpoint": "http://localhost:6724",
"region_id": "cn-shanghai",
"access_key_id": "fake-key-id",
"access_key_secret": "fake-key-secret",
"risk_level_bar": "high",
"check_request": false,
"check_response": true,
"deny_code": 400,
"deny_message": "your request is rejected",
"stream_check_mode": "realtime",
"stream_check_cache_size": 1,
"stream_check_interval": 3
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 24: test is SSE works, stream_mode = realtime, small buffer. content moderation will be called on each chunk
--- config
location /t {
content_by_lua_block {
local http = require("resty.http")
local httpc = http.new()
local core = require("apisix.core")
local ok, err = httpc:connect({
scheme = "http",
host = "localhost",
port = ngx.var.server_port,
})
if not ok then
ngx.status = 500
ngx.say(err)
return
end
local params = {
method = "POST",
headers = {
["Content-Type"] = "application/json",
},
path = "/anything",
body = [[{
"messages": [
{ "role": "system", "content": "some content" }
],
"stream": true
}]],
}
local res, err = httpc:request(params)
if not res then
ngx.status = 500
ngx.say(err)
return
end
local final_res = {}
local inspect = require("inspect")
while true do
local chunk, err = res.body_reader() -- will read chunk by chunk
core.log.warn("CHUNK IS ", inspect(chunk))
if err then
core.log.error("failed to read response chunk: ", err)
break
end
if not chunk then
break
end
core.table.insert_tail(final_res, chunk)
end
ngx.print(#final_res .. final_res[6])
}
}
--- response_body_like eval
qr/6data:/
--- grep_error_log eval
qr/execute content moderation/
--- grep_error_log_out
execute content moderation
execute content moderation
execute content moderation
=== TEST 25: set route with stream = true (SSE) and stream_mode = realtime with large buffer but small timeout
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/anything",
"plugins": {
"ai-proxy-multi": {
"instances": [
{
"name": "self-hosted",
"provider": "openai-compatible",
"weight": 1,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "custom-instruct",
"max_tokens": 512,
"temperature": 1.0,
"stream": true
},
"override": {
"endpoint": "http://localhost:7737/v1/chat/completions?delay=true"
}
}
],
"ssl_verify": false
},
"ai-aliyun-content-moderation": {
"endpoint": "http://localhost:6724",
"region_id": "cn-shanghai",
"access_key_id": "fake-key-id",
"access_key_secret": "fake-key-secret",
"risk_level_bar": "high",
"check_request": false,
"check_response": true,
"deny_code": 400,
"deny_message": "your request is rejected",
"stream_check_mode": "realtime",
"stream_check_cache_size": 10000,
"stream_check_interval": 0.1
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 26: test is SSE works, stream_mode = realtime, large buffer + small timeout: content moderation will be called on each chunke
--- config
location /t {
content_by_lua_block {
local http = require("resty.http")
local httpc = http.new()
local core = require("apisix.core")
local ok, err = httpc:connect({
scheme = "http",
host = "localhost",
port = ngx.var.server_port,
})
if not ok then
ngx.status = 500
ngx.say(err)
return
end
local params = {
method = "POST",
headers = {
["Content-Type"] = "application/json",
},
path = "/anything",
body = [[{
"messages": [
{ "role": "system", "content": "some content" }
],
"stream": true
}]],
}
local res, err = httpc:request(params)
if not res then
ngx.status = 500
ngx.say(err)
return
end
local final_res = {}
local inspect = require("inspect")
while true do
local chunk, err = res.body_reader() -- will read chunk by chunk
core.log.warn("CHUNK IS ", inspect(chunk))
if err then
core.log.error("failed to read response chunk: ", err)
break
end
if not chunk then
break
end
core.table.insert_tail(final_res, chunk)
end
ngx.print(#final_res .. final_res[6])
}
}
--- response_body_like eval
qr/6data:/
--- grep_error_log eval
qr/execute content moderation/
--- grep_error_log_out
execute content moderation
execute content moderation
execute content moderation