
AI图像生成:生产指南
使用diffusers的Stable Diffusion
import torch
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from PIL import Image
import io
def load_pipeline(model_id: str = "stabilityai/stable-diffusion-xl-base-1.0"):
pipe = StableDiffusionPipeline.from_pretrained(
model_id,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16",
)
# 更快的调度器
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")
pipe.enable_model_cpu_offload() # 节省VRAM
pipe.enable_xformers_memory_efficient_attention()
return pipe
pipe = load_pipeline()
def generate_image(
prompt: str,
negative_prompt: str = "blurry, low quality, deformed",
width: int = 1024,
height: int = 1024,
num_inference_steps: int = 25,
guidance_scale: float = 7.5,
seed: int = None,
) -> Image.Image:
generator = torch.Generator("cuda").manual_seed(seed) if seed else None
output = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
width=width, height=height,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
generator=generator,
)
return output.images[0]
img = generate_image("A futuristic city at sunset, photorealistic, 8k")
img.save("output.png")

使用Replicate API的FLUX
import replicate
import requests
from PIL import Image
from io import BytesIO
def flux_generate(prompt: str, aspect_ratio: str = "1:1") -> Image.Image:
output = replicate.run(
"black-forest-labs/flux-1.1-pro",
input={
"prompt": prompt,
"aspect_ratio": aspect_ratio,
"output_format": "webp",
"output_quality": 80,
}
)
# 下载生成的图像
resp = requests.get(output[0])
return Image.open(BytesIO(resp.content))

用于引导生成的ControlNet
from diffusers import ControlNetModel, StableDiffusionControlNetPipeline
from controlnet_aux import CannyDetector
controlnet = ControlNetModel.from_pretrained(
"lllyasviel/sd-controlnet-canny",
torch_dtype=torch.float16,
)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
controlnet=controlnet,
torch_dtype=torch.float16,
)
pipe = pipe.to("cuda")
def generate_from_sketch(sketch_path: str, prompt: str) -> Image.Image:
sketch = Image.open(sketch_path)
# 提取Canny边缘
canny = CannyDetector()
control_image = canny(sketch, low_threshold=100, high_threshold=200)
output = pipe(
prompt=prompt,
image=control_image,
num_inference_steps=20,
guidance_scale=7.5,
controlnet_conditioning_scale=0.8,
)
return output.images[0]

FastAPI图像生成服务器
from fastapi import FastAPI, BackgroundTasks
from fastapi.responses import Response
from pydantic import BaseModel
import asyncio
import uuid
app = FastAPI()
class GenerateRequest(BaseModel):
prompt: str
negative_prompt: str = "blurry, low quality"
width: int = 1024
height: int = 1024
seed: int = None
# 用于管理GPU请求的队列
queue = asyncio.Queue()
results = {}
@app.post("/generate")
async def generate(request: GenerateRequest):
job_id = str(uuid.uuid4())
await queue.put((job_id, request))
return {"job_id": job_id, "status": "queued"}
@app.get("/result/{job_id}")
async def get_result(job_id: str):
if job_id not in results:
return {"status": "processing"}
img_bytes = results.pop(job_id)
return Response(content=img_bytes, media_type="image/png")
async def process_queue():
while True:
job_id, req = await queue.get()
img = generate_image(
req.prompt, req.negative_prompt,
req.width, req.height, seed=req.seed
)
buf = io.BytesIO()
img.save(buf, format="PNG")
results[job_id] = buf.getvalue()
提示工程最佳实践
# 有效的SD提示结构
def build_sd_prompt(subject: str, style: str, quality_tags: list) -> str:
return f"{subject}, {style}, " + ", ".join(quality_tags)
positive = build_sd_prompt(
subject="a professional portrait of a software engineer",
style="cinematic lighting, shallow depth of field",
quality_tags=["photorealistic", "8k", "sharp focus", "professional photography"]
)
negative = "cartoon, anime, painting, illustration, low quality, blurry, watermark"
ComfyUI API集成
import websocket
import json
def comfy_generate(workflow: dict, server_addr: str = "127.0.0.1:8188") -> bytes:
"""向ComfyUI提交工作流并获取图像字节。"""
import urllib.request
data = json.dumps({"prompt": workflow}).encode("utf-8")
req = urllib.request.Request(f"http://{server_addr}/prompt", data=data)
response = urllib.request.urlopen(req)
prompt_id = json.loads(response.read())["prompt_id"]
# 轮询结果
ws = websocket.WebSocket()
ws.connect(f"ws://{server_addr}/ws")
while True:
msg = json.loads(ws.recv())
if msg["type"] == "executing" and msg["data"]["node"] is None:
if msg["data"]["prompt_id"] == prompt_id:
break
# 获取输出
history = json.loads(urllib.request.urlopen(
f"http://{server_addr}/history/{prompt_id}"
).read())
output = history[prompt_id]["outputs"]
return output