正在加载,请稍候…

AI图像生成:Stable Diffusion API与ComfyUI工作流

使用Stable Diffusion API、ComfyUI和FLUX构建生产级图像生成系统,涵盖提示工程、ControlNet、img2img及可扩展推理服务

AI图像生成:Stable Diffusion API与ComfyUI工作流

AI图像生成:生产指南

使用diffusers的Stable Diffusion

import torch
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from PIL import Image
import io

def load_pipeline(model_id: str = "stabilityai/stable-diffusion-xl-base-1.0"):
    pipe = StableDiffusionPipeline.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        use_safetensors=True,
        variant="fp16",
    )
    # 更快的调度器
    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
    pipe = pipe.to("cuda")
    pipe.enable_model_cpu_offload()  # 节省VRAM
    pipe.enable_xformers_memory_efficient_attention()
    return pipe

pipe = load_pipeline()

def generate_image(
    prompt: str,
    negative_prompt: str = "blurry, low quality, deformed",
    width: int = 1024,
    height: int = 1024,
    num_inference_steps: int = 25,
    guidance_scale: float = 7.5,
    seed: int = None,
) -> Image.Image:
    generator = torch.Generator("cuda").manual_seed(seed) if seed else None
    output = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        width=width, height=height,
        num_inference_steps=num_inference_steps,
        guidance_scale=guidance_scale,
        generator=generator,
    )
    return output.images[0]

img = generate_image("A futuristic city at sunset, photorealistic, 8k")
img.save("output.png")

AI图像生成:Stable Diffusion API与ComfyUI工作流插图

使用Replicate API的FLUX

import replicate
import requests
from PIL import Image
from io import BytesIO

def flux_generate(prompt: str, aspect_ratio: str = "1:1") -> Image.Image:
    output = replicate.run(
        "black-forest-labs/flux-1.1-pro",
        input={
            "prompt": prompt,
            "aspect_ratio": aspect_ratio,
            "output_format": "webp",
            "output_quality": 80,
        }
    )
    # 下载生成的图像
    resp = requests.get(output[0])
    return Image.open(BytesIO(resp.content))

AI图像生成:Stable Diffusion API与ComfyUI工作流插图

用于引导生成的ControlNet

from diffusers import ControlNetModel, StableDiffusionControlNetPipeline
from controlnet_aux import CannyDetector

controlnet = ControlNetModel.from_pretrained(
    "lllyasviel/sd-controlnet-canny",
    torch_dtype=torch.float16,
)

pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    controlnet=controlnet,
    torch_dtype=torch.float16,
)
pipe = pipe.to("cuda")

def generate_from_sketch(sketch_path: str, prompt: str) -> Image.Image:
    sketch = Image.open(sketch_path)
    
    # 提取Canny边缘
    canny = CannyDetector()
    control_image = canny(sketch, low_threshold=100, high_threshold=200)
    
    output = pipe(
        prompt=prompt,
        image=control_image,
        num_inference_steps=20,
        guidance_scale=7.5,
        controlnet_conditioning_scale=0.8,
    )
    return output.images[0]

AI图像生成:Stable Diffusion API与ComfyUI工作流插图

FastAPI图像生成服务器

from fastapi import FastAPI, BackgroundTasks
from fastapi.responses import Response
from pydantic import BaseModel
import asyncio
import uuid

app = FastAPI()

class GenerateRequest(BaseModel):
    prompt: str
    negative_prompt: str = "blurry, low quality"
    width: int = 1024
    height: int = 1024
    seed: int = None

# 用于管理GPU请求的队列
queue = asyncio.Queue()
results = {}

@app.post("/generate")
async def generate(request: GenerateRequest):
    job_id = str(uuid.uuid4())
    await queue.put((job_id, request))
    return {"job_id": job_id, "status": "queued"}

@app.get("/result/{job_id}")
async def get_result(job_id: str):
    if job_id not in results:
        return {"status": "processing"}
    img_bytes = results.pop(job_id)
    return Response(content=img_bytes, media_type="image/png")

async def process_queue():
    while True:
        job_id, req = await queue.get()
        img = generate_image(
            req.prompt, req.negative_prompt,
            req.width, req.height, seed=req.seed
        )
        buf = io.BytesIO()
        img.save(buf, format="PNG")
        results[job_id] = buf.getvalue()

提示工程最佳实践

# 有效的SD提示结构
def build_sd_prompt(subject: str, style: str, quality_tags: list) -> str:
    return f"{subject}, {style}, " + ", ".join(quality_tags)

positive = build_sd_prompt(
    subject="a professional portrait of a software engineer",
    style="cinematic lighting, shallow depth of field",
    quality_tags=["photorealistic", "8k", "sharp focus", "professional photography"]
)

negative = "cartoon, anime, painting, illustration, low quality, blurry, watermark"

ComfyUI API集成

import websocket
import json

def comfy_generate(workflow: dict, server_addr: str = "127.0.0.1:8188") -> bytes:
    """向ComfyUI提交工作流并获取图像字节。"""
    import urllib.request

    data = json.dumps({"prompt": workflow}).encode("utf-8")
    req = urllib.request.Request(f"http://{server_addr}/prompt", data=data)
    response = urllib.request.urlopen(req)
    prompt_id = json.loads(response.read())["prompt_id"]

    # 轮询结果
    ws = websocket.WebSocket()
    ws.connect(f"ws://{server_addr}/ws")

    while True:
        msg = json.loads(ws.recv())
        if msg["type"] == "executing" and msg["data"]["node"] is None:
            if msg["data"]["prompt_id"] == prompt_id:
                break

    # 获取输出
    history = json.loads(urllib.request.urlopen(
        f"http://{server_addr}/history/{prompt_id}"
    ).read())
    output = history[prompt_id]["outputs"]
    return output