
OpenAI API 生产模式
使用 Pydantic 的结构化输出
from openai import OpenAI
from pydantic import BaseModel, Field
client = OpenAI()
class ProductReview(BaseModel):
sentiment: str = Field(description="positive, negative, or neutral")
score: int = Field(description="Rating 1-5", ge=1, le=5)
key_themes: list[str]
summary: str
requires_response: bool
def analyze_review(text: str) -> ProductReview:
resp = client.beta.chat.completions.parse(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "Analyze customer reviews."},
{"role": "user", "content": f"Analyze: {text}"},
],
response_format=ProductReview,
temperature=0.1,
)
return resp.choices[0].message.parsed

函数调用
import json
tools = [{
"type": "function",
"function": {
"name": "search_products",
"description": "Search product catalog",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string"},
"category": {"type": "string", "enum": ["electronics", "clothing"]},
"max_price": {"type": "number"},
},
"required": ["query"],
},
},
}]
def agent_loop(user_message: str) -> str:
messages = [{"role": "user", "content": user_message}]
while True:
resp = client.chat.completions.create(
model="gpt-4o", messages=messages, tools=tools
)
choice = resp.choices[0]
messages.append(choice.message)
if choice.finish_reason == "stop":
return choice.message.content
for tc in choice.message.tool_calls:
result = execute_tool(tc.function.name, json.loads(tc.function.arguments))
messages.append({"role": "tool", "tool_call_id": tc.id, "content": result})

流式传输
async def stream_chat(messages):
stream = await async_client.chat.completions.create(
model="gpt-4o", messages=messages, stream=True
)
full_response = ""
async for chunk in stream:
delta = chunk.choices[0].delta
if delta.content:
full_response += delta.content
yield delta.content
return full_response

提示缓存
# GPT-4o 自动缓存 >= 1024 个 token 的提示
# 对于较长的系统提示,将稳定内容放在前面
system_prompt = """[Long stable system context - 2000 tokens]
...product documentation...
...rules and guidelines..."""
def cached_query(user_question: str) -> str:
# 系统提示在第一次调用后会被缓存
return client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": system_prompt}, # cached
{"role": "user", "content": user_question}, # dynamic
],
).choices[0].message.content
批量 API 以节省成本
import json
requests = [
{"custom_id": f"req-{i}", "method": "POST", "url": "/v1/chat/completions",
"body": {"model": "gpt-4o-mini", "messages": [
{"role": "user", "content": f"Summarize: {text}"}
]}}
for i, text in enumerate(large_text_list)
]
# 写入批处理文件
with open("batch_requests.jsonl", "w") as f:
for req in requests:
f.write(json.dumps(req) + "\n")
# 提交批处理(成本降低 50%,24 小时窗口)
batch_input_file = client.files.create(file=open("batch_requests.jsonl", "rb"), purpose="batch")
batch = client.batches.create(
input_file_id=batch_input_file.id,
endpoint="/v1/chat/completions",
completion_window="24h",
)
print(f"Batch ID: {batch.id}")
成本优化技巧
| 策略 |
节省 |
| 批量 API |
50% 折扣 |
| 提示缓存 |
缓存 token 节省 50% |
| 简单任务使用 gpt-4o-mini |
便宜 15 倍 |
| 结构化输出(无需解析) |
减少重试次数 |
| Token 计数 |
避免过大的提示 |