From 3c3a63afead9bae9a68601bf9720f1a72c20cfc5 Mon Sep 17 00:00:00 2001 From: Se7en Date: Fri, 17 Jan 2025 20:30:17 +0800 Subject: [PATCH] sync baidu ai-proxy and ai-token-ratelimit docs --- .../ai/api-consumer/ai-token-ratelimit.md | 204 +++++++++++++++++ .../ai/api-consumer/ai-token-ratelimit.md | 210 +++++++++++++++++- .../zh-cn/plugins/ai/api-provider/ai-proxy.md | 10 +- 3 files changed, 407 insertions(+), 17 deletions(-) diff --git a/src/content/docs/latest/en/plugins/ai/api-consumer/ai-token-ratelimit.md b/src/content/docs/latest/en/plugins/ai/api-consumer/ai-token-ratelimit.md index 0650bbf1b0..c07e7aa2f6 100644 --- a/src/content/docs/latest/en/plugins/ai/api-consumer/ai-token-ratelimit.md +++ b/src/content/docs/latest/en/plugins/ai/api-consumer/ai-token-ratelimit.md @@ -168,3 +168,207 @@ rejected_msg: '{"code":-1,"msg":"Too many requests"}' redis: service_name: redis.static ``` + +## Example + +The AI Token Rate Limiting Plugin relies on Redis to track the remaining available tokens, so the Redis service must be deployed first. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + labels: + app: redis +spec: + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - name: redis + image: redis + ports: + - containerPort: 6379 +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + ports: + - port: 6379 + targetPort: 6379 + selector: + app: redis +--- +``` + +In this example, qwen is used as the AI service provider. Additionally, the AI Statistics Plugin must be configured, as the AI Token Rate Limiting Plugin depends on it to calculate the number of tokens consumed per request. The following configuration limits the total number of input and output tokens to 200 per minute. + +```yaml +apiVersion: extensions.higress.io/v1alpha1 +kind: WasmPlugin +metadata: + name: ai-proxy + namespace: higress-system +spec: + matchRules: + - config: + provider: + type: qwen + apiTokens: + - "" + modelMapping: + 'gpt-3': "qwen-turbo" + 'gpt-35-turbo': "qwen-plus" + 'gpt-4-turbo': "qwen-max" + '*': "qwen-turbo" + ingress: + - qwen + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0 + phase: UNSPECIFIED_PHASE + priority: 100 +--- +apiVersion: extensions.higress.io/v1alpha1 +kind: WasmPlugin +metadata: + name: ai-statistics + namespace: higress-system +spec: + defaultConfig: + enable: true + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0 + phase: UNSPECIFIED_PHASE + priority: 200 +--- +apiVersion: extensions.higress.io/v1alpha1 +kind: WasmPlugin +metadata: + name: ai-token-ratelimit + namespace: higress-system +spec: + defaultConfig: + rule_name: default_limit_by_param_apikey + rule_items: + - limit_by_param: apikey + limit_keys: + - key: 123456 + token_per_minute: 200 + redis: + # By default, to reduce data plane pressure, the `global.onlyPushRouteCluster` parameter in Higress is set to true, meaning that Kubernetes Services are not automatically discovered. + # If you need to use Kubernetes Service for service discovery, set `global.onlyPushRouteCluster` to false, + # allowing you to directly set `service_name` to the Kubernetes Service without needing to create an McpBridge and an Ingress route for Redis. + # service_name: redis.default.svc.cluster.local + service_name: redis.dns + service_port: 6379 + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0 + phase: UNSPECIFIED_PHASE + priority: 600 +``` + +Note that the `service_name` in the Redis configuration of the AI Token Rate Limiting Plugin is derived from the service source configured in McpBridge. Additionally, we need to configure the access address of the qnwen service in McpBridge. + +```yaml +apiVersion: networking.higress.io/v1 +kind: McpBridge +metadata: + name: default + namespace: higress-system +spec: + registries: + - domain: dashscope.aliyuncs.com + name: qwen + port: 443 + type: dns + - domain: redis.default.svc.cluster.local # Kubernetes Service + name: redis + type: dns + port: 6379 +``` + +Create two routing rules separately. + +```yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + higress.io/backend-protocol: HTTPS + higress.io/destination: qwen.dns + higress.io/proxy-ssl-name: dashscope.aliyuncs.com + higress.io/proxy-ssl-server-name: "on" + labels: + higress.io/resource-definer: higress + name: qwen + namespace: higress-system +spec: + ingressClassName: higress + rules: + - host: qwen-test.com + http: + paths: + - backend: + resource: + apiGroup: networking.higress.io + kind: McpBridge + name: default + path: / + pathType: Prefix +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + higress.io/destination: redis.dns + higress.io/ignore-path-case: "false" + labels: + higress.io/resource-definer: higress + name: redis +spec: + ingressClassName: higress + rules: + - http: + paths: + - backend: + resource: + apiGroup: networking.higress.io + kind: McpBridge + name: default + path: / + pathType: Prefix +``` + +The rate limiting effect is triggered as follows: + +```bash +curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{ + "model": "gpt-3", + "messages": [ + { + "role": "user", + "content": "Hello, who are you?" + } + ], + "stream": false +}' +{"id":"88cfa80f-545d-93b4-8ff3-3f5245ca33ba","choices":[{"index":0,"message":{"role":"assistant","content":"I am Tongyi Qianwen, an AI assistant developed by Alibaba Cloud. I can answer various questions, provide information, and have conversations with users. How can I assist you?"},"finish_reason":"stop"}],"created":1719909825,"model":"qwen-turbo","object":"chat.completion","usage":{"prompt_tokens":13,"completion_tokens":33,"total_tokens":46}} +curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{ + "model": "gpt-3", + "messages": [ + { + "role": "user", + "content": "Hello, who are you?" + } + ], + "stream": false +}' +Too many requests # Rate limiting successful +``` diff --git a/src/content/docs/latest/zh-cn/plugins/ai/api-consumer/ai-token-ratelimit.md b/src/content/docs/latest/zh-cn/plugins/ai/api-consumer/ai-token-ratelimit.md index 239524fb73..5375f94329 100644 --- a/src/content/docs/latest/zh-cn/plugins/ai/api-consumer/ai-token-ratelimit.md +++ b/src/content/docs/latest/zh-cn/plugins/ai/api-consumer/ai-token-ratelimit.md @@ -59,8 +59,6 @@ description: AI Token限流插件配置参考 | password | string | 否 | - | redis密码 | | timeout | int | 否 | 1000 | redis连接超时时间,单位毫秒 | - - ## 配置示例 ### 识别请求参数 apikey,进行区别限流 @@ -89,8 +87,6 @@ redis: service_name: redis.static ``` - - ### 识别请求头 x-ca-key,进行区别限流 ```yaml @@ -117,8 +113,6 @@ redis: service_name: redis.static ``` - - ### 根据请求头 x-forwarded-for 获取对端IP,进行区别限流 ```yaml @@ -165,8 +159,6 @@ redis: service_name: redis.static ``` - - ### 识别cookie中的键值对,进行区别限流 ```yaml @@ -194,3 +186,205 @@ rejected_msg: '{"code":-1,"msg":"Too many requests"}' redis: service_name: redis.static ``` + +## 完整示例 + +AI Token 限流插件依赖 Redis 记录剩余可用的 token 数,因此首先需要部署 Redis 服务。 +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + labels: + app: redis +spec: + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - name: redis + image: redis + ports: + - containerPort: 6379 +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + ports: + - port: 6379 + targetPort: 6379 + selector: + app: redis +--- +``` + +在本例中,使用通义千问作为 AI 服务提供商。另外还需要设置 AI 统计插件,因为 AI Token 限流插件依赖 AI 统计插件计算每次请求消耗的 token 数,以下配置限制每分钟的 input 和 output token 总数为 200 个。 + +```yaml +apiVersion: extensions.higress.io/v1alpha1 +kind: WasmPlugin +metadata: + name: ai-proxy + namespace: higress-system +spec: + matchRules: + - config: + provider: + type: qwen + apiTokens: + - "" + modelMapping: + 'gpt-3': "qwen-turbo" + 'gpt-35-turbo': "qwen-plus" + 'gpt-4-turbo': "qwen-max" + '*': "qwen-turbo" + ingress: + - qwen + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0 + phase: UNSPECIFIED_PHASE + priority: 100 +--- +apiVersion: extensions.higress.io/v1alpha1 +kind: WasmPlugin +metadata: + name: ai-statistics + namespace: higress-system +spec: + defaultConfig: + enable: true + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0 + phase: UNSPECIFIED_PHASE + priority: 200 +--- +apiVersion: extensions.higress.io/v1alpha1 +kind: WasmPlugin +metadata: + name: ai-token-ratelimit + namespace: higress-system +spec: + defaultConfig: + rule_name: default_limit_by_param_apikey + rule_items: + - limit_by_param: apikey + limit_keys: + - key: 123456 + token_per_minute: 200 + redis: + # 默认情况下,为了减轻数据面的压力,Higress 的 global.onlyPushRouteCluster 配置参数被设置为 true,意味着不会自动发现 Kubernetes Service + # 如果需要使用 Kubernetes Service 作为服务发现,可以将 global.onlyPushRouteCluster 参数设置为 false, + # 这样就可以直接将 service_name 设置为 Kubernetes Service, 而无须为 Redis 创建 McpBridge 以及 Ingress 路由 + # service_name: redis.default.svc.cluster.local + service_name: redis.dns + service_port: 6379 + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0 + phase: UNSPECIFIED_PHASE + priority: 600 +``` +注意,AI Token 限流插件中的 Redis 配置项 `service_name` 来自 McpBridge 中配置的服务来源,另外我们还需要在 McpBridge 中配置通义千问服务的访问地址。 + +```yaml +apiVersion: networking.higress.io/v1 +kind: McpBridge +metadata: + name: default + namespace: higress-system +spec: + registries: + - domain: dashscope.aliyuncs.com + name: qwen + port: 443 + type: dns + - domain: redis.default.svc.cluster.local # Kubernetes Service + name: redis + type: dns + port: 6379 +``` + +分别创建两条路由规则。 + +```yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + higress.io/backend-protocol: HTTPS + higress.io/destination: qwen.dns + higress.io/proxy-ssl-name: dashscope.aliyuncs.com + higress.io/proxy-ssl-server-name: "on" + labels: + higress.io/resource-definer: higress + name: qwen + namespace: higress-system +spec: + ingressClassName: higress + rules: + - host: qwen-test.com + http: + paths: + - backend: + resource: + apiGroup: networking.higress.io + kind: McpBridge + name: default + path: / + pathType: Prefix +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + higress.io/destination: redis.dns + higress.io/ignore-path-case: "false" + labels: + higress.io/resource-definer: higress + name: redis +spec: + ingressClassName: higress + rules: + - http: + paths: + - backend: + resource: + apiGroup: networking.higress.io + kind: McpBridge + name: default + path: / + pathType: Prefix +``` + +触发限流效果如下: + +```bash +curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{ + "model": "gpt-3", + "messages": [ + { + "role": "user", + "content": "你好,你是谁?" + } + ], + "stream": false +}' +{"id":"88cfa80f-545d-93b4-8ff3-3f5245ca33ba","choices":[{"index":0,"message":{"role":"assistant","content":"我是通义千问,由阿里云开发的AI助手。我可以回答各种问题、提供信息和与用户进行对话。有什么我可以帮助你的吗?"},"finish_reason":"stop"}],"created":1719909825,"model":"qwen-turbo","object":"chat.completion","usage":{"prompt_tokens":13,"completion_tokens":33,"total_tokens":46}} +curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{ + "model": "gpt-3", + "messages": [ + { + "role": "user", + "content": "你好,你是谁?" + } + ], + "stream": false +}' +Too many requests # 限流成功 +``` diff --git a/src/content/docs/latest/zh-cn/plugins/ai/api-provider/ai-proxy.md b/src/content/docs/latest/zh-cn/plugins/ai/api-provider/ai-proxy.md index 1fc7a160ab..0153abb822 100644 --- a/src/content/docs/latest/zh-cn/plugins/ai/api-provider/ai-proxy.md +++ b/src/content/docs/latest/zh-cn/plugins/ai/api-provider/ai-proxy.md @@ -157,15 +157,7 @@ Groq 所对应的 `type` 为 `groq`。它并无特有的配置字段。 #### 文心一言(Baidu) -文心一言所对应的 `type` 为 `baidu`。它特有的配置字段如下: - -| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | -|--------------------|-----------------|------|-----|-----------------------------------------------------------| -| `baiduAccessKeyAndSecret` | array of string | 必填 | - | Baidu 的 Access Key 和 Secret Key,中间用 `:` 分隔,用于申请 apiToken。 | -| `baiduApiTokenServiceName` | string | 必填 | - | 请求刷新百度 apiToken 服务名称。 | -| `baiduApiTokenServiceHost` | string | 非必填 | - | 请求刷新百度 apiToken 服务域名,默认是 iam.bj.baidubce.com。 | -| `baiduApiTokenServicePort` | int64 | 非必填 | - | 请求刷新百度 apiToken 服务端口,默认是 443。 | - +文心一言所对应的 `type` 为 `baidu`。它并无特有的配置字段。 #### 360智脑