Pipeline
Pipeline
class MyTextRetriever(BaseRetriever):
# Implement your text retrieval logic here
class MyChunkCleaner(BaseProcessor):
# Implement your chunking and cleaning logic here
class MyRerankerBefore(Reranker):
# Implement your first reranking logic here
class MyRerankerAfter(Reranker):
# Implement your second reranking logic here
def create_pipeline(
vector_db_retriever: MyVectorDBRetriever,
text_retriever: MyTextRetriever,
chunk_cleaner: MyChunkCleaner,
reranker_before: MyRerankerBefore,
reranker_after: MyRerankerAfter
) -> Pipeline:
"""
Creates a Langchain pipeline with hybrid search, chunking, and reranking.
Args:
vector_db_retriever: The retriever for the vector database.
text_retriever: The retriever for text search.
chunk_cleaner: The processor for chunking and cleaning.
reranker_before: The reranking component before multi-query retrieval.
reranker_after: The reranking component after multi-query retrieval.
Returns:
A Langchain pipeline.
"""
# Combine vector and text retrievers into a hybrid retriever
hybrid_retriever = HybridRetriever(retrievers=[vector_db_retriever,
text_retriever])
multi_query_retriever = MultiQueryRetriever.from_llm(
retriever=as_retriever(hybrid_retriever),
llm=mock_llm,
processor=chunk_cleaner,
include_original=True
)
# Chain the components with rerankers before and after multi-query retrieval
pipeline = Pipeline(
chain=[
hybrid_retriever,
reranker_before,
multi_query_retriever,
reranker_after
]
)
return pipeline
# Input query
query = "What is the capital of France?"
# Run the pipeline to retrieve documents (replace with your processing logic)
documents = pipeline.run(query)