
构建生产级RAG系统
RAG将LLM与外部知识检索相结合,以提供准确、最新的响应。
文档分块
from langchain.text_splitter import RecursiveCharacterTextSplitter
def create_chunks(documents, chunk_size=1000, overlap=200):
splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=overlap,
separators=["\n\n", "\n", ". ", " ", ""],
)
return splitter.split_documents(documents)

向量存储
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import PGVector
def build_vector_store(chunks, collection_name="knowledge_base"):
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
return PGVector.from_documents(
documents=chunks,
embedding=embeddings,
collection_name=collection_name,
connection_string=os.getenv("DATABASE_URL"),
)

重排序
from sentence_transformers import CrossEncoder
class ReRanker:
def __init__(self):
self.model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
def rerank(self, query, documents, top_k=3):
pairs = [(query, doc.page_content) for doc in documents]
scores = self.model.predict(pairs)
ranked = sorted(zip(documents, scores), key=lambda x: x[1], reverse=True)
return [doc for doc, _ in ranked[:top_k]]

HyDE
def hyde_retrieve(vectorstore, query, k=5):
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
hypothetical = llm.invoke(f"Write a document answering: {query}").content
return vectorstore.similarity_search(hypothetical, k=k)
评估
from ragas import evaluate
from ragas.metrics import answer_relevancy, faithfulness, context_recall
def evaluate_rag(qa_pairs, pipeline):
results = [{"question": q["question"], "answer": a, "contexts": c,
"ground_truth": q["ground_truth"]}
for q in qa_pairs for a, c in [pipeline(q["question"])]]
return evaluate(results, metrics=[answer_relevancy, faithfulness, context_recall])
最佳实践
| 策略 | 优势 |
|---|---|
| 语义分块 | 更好的上下文 |
| HyDE | 更好的召回率 |
| 重排序 | 更好的精确度 |
| 混合搜索 | 最佳覆盖率 |