生產級 RAG 管線
AI/ML7 個節點 · 6 條連接ai ml
視覺化
ex-ai-ml-rag-pipeline.osop.yaml
# Production RAG Pipeline
# Embed documents, retrieve context, generate grounded answers, validate faithfulness
osop_version: "2.0"
id: ai-ml-rag-pipeline
name: "生產級 RAG 管線"
nodes:
- id: ingest_documents
type: api
purpose: Accept document batch via upload API and register in document store
runtime:
endpoint: /api/v1/ingest
method: POST
url: https://rag-service.internal
outputs: [document_ids, total_pages]
timeout_sec: 120
- id: chunk_and_embed
type: agent
purpose: Split documents into semantic chunks and generate vector embeddings
runtime:
provider: openai
model: text-embedding-3-large
config:
chunk_size: 512
chunk_overlap: 64
embedding_dimensions: 1536
inputs: [document_ids]
outputs: [embedding_count, vector_store_id]
timeout_sec: 300
retry_policy:
max_retries: 3
backoff_sec: 5
explain: |
Uses recursive character splitting with sentence boundary awareness.
Each chunk is embedded independently and stored with source metadata.
- id: receive_query
type: api
purpose: Accept user query through the search endpoint
runtime:
endpoint: /api/v1/query
method: POST
url: https://rag-service.internal
outputs: [user_query, session_id]
- id: retrieve_context
type: db
purpose: Perform hybrid search combining vector similarity and BM25 keyword matching
runtime:
engine: pgvector
connection: postgresql://rag:5432/vectors
inputs: [user_query, vector_store_id]
outputs: [retrieved_chunks, relevance_scores]
timeout_sec: 10
explain: |
Reciprocal rank fusion combines dense and sparse retrieval results.
Returns top-8 chunks with relevance scores above 0.7 threshold.
- id: generate_answer
type: agent
purpose: Generate grounded answer from retrieved context using Claude
runtime:
provider: anthropic
model: claude-sonnet-4-20250514
config:
max_tokens: 2048
temperature: 0.1
system_prompt: "Answer based only on the provided context. Cite sources."
inputs: [user_query, retrieved_chunks]
outputs: [answer, citations]
timeout_sec: 30
retry_policy:
max_retries: 2
backoff_sec: 2
- id: validate_faithfulness
type: agent
purpose: Score answer faithfulness and hallucination rate against source chunks
runtime:
provider: anthropic
model: claude-haiku-4-20250414
config:
evaluation_metrics: [faithfulness, relevance, completeness]
inputs: [answer, retrieved_chunks, user_query]
outputs: [faithfulness_score, relevance_score, flagged_claims]
timeout_sec: 15
- id: deliver_response
type: api
purpose: Return validated answer with citations and confidence metadata to client
runtime:
endpoint: /api/v1/response
method: POST
url: https://rag-service.internal
inputs: [answer, citations, faithfulness_score, relevance_score]
outputs: [response_id]
edges:
# Ingestion path
- from: ingest_documents
to: chunk_and_embed
mode: sequential
# Query path
- from: receive_query
to: retrieve_context
mode: sequential
- from: retrieve_context
to: generate_answer
mode: sequential
- from: generate_answer
to: validate_faithfulness
mode: sequential
- from: validate_faithfulness
to: deliver_response
mode: conditional
condition: "faithfulness_score >= 0.8"
- from: validate_faithfulness
to: generate_answer
mode: fallback
label: "Faithfulness below threshold, regenerate with stricter constraints"