Build a RAG Agent using LangGraph

Written by

Caleb Hayes

Updated on:June-13th-2025

introduce

This article introduces how to build RAG Agent using LangGraph .

RAG introduces an external knowledge base and combines dynamic retrieval and generation capabilities, making LLM both "erudite" and "credible". Its core logic is:
1️⃣ Retrieval → Accurately pull relevant documents from the knowledge base;
2️⃣ Enhancement → Incorporate retrieval results into prompts to assist model generation;
3️⃣ Generation → Output answers that are both accurate and transparent.

1. Preprocess the document

Use the WebBaseLoader tool to load web resources and read documents


from  langchain_community .document_loaders import  WebBaseLoader 

urls  = [
"https://lilianweng.github.io/posts/2024-11-28-reward-hacking/" ,
"https://lilianweng.github.io/posts/2024-07-07-hallucination/" ,
"https://lilianweng.github.io/posts/2024-04-12-diffusion-video/" ,
]

docs  = [ WebBaseLoader ( url ) . load ( ) for  url  in  urls ]

2. Create a search tool

Split document data:


from  langchain_text_splitters  import  RecursiveCharacterTextSplitter

docs_list  = [ item  for  sublist  in  docs  for  item  in  sublist ]

text_splitter  =  RecursiveCharacterTextSplitter . from_tiktoken_encoder (
    chunk_size = 100 ,  chunk_overlap = 50
)
doc_splits  =  text_splitter . split_documents ( docs_list )

Use Alibaba QianWen's embedding model to convert document data into vectors and store them in an in-memory vector database


from  langchain_core . vectorstores import InMemoryVectorStore
from  langchain_community . embeddings import DashScopeEmbeddings

vectorstore  = InMemoryVectorStore . from_documents (
    documents = doc_splits ,  embedding = DashScopeEmbeddings ( model = "text-embedding-v3" )
)
retriever  =  vectorstore . as_retriever ( )

To create a search tool:


from  langchain . tools . retriever import  create_retriever_tool

retriever_tool  = create_retriever_tool (
    retriever ,
"retrieve_blog_posts" ,
"Search and return information about Lilian Weng blog posts." ,
)

3. Generate a query

Use Alibaba QianWen model as LLM to build generate_query_or_respond node


from  langgraph . graph import MessagesState

response_model  = ChatTongyi ( model = "qwen-plus" )


def  generate_query_or_respond ( state : MessagesState ) :
"" " Call  the model to generate a response based on the current state . Given
    the question ,  it will decide to retrieve using the retriever tool ,  or simply respond to the user .
"" "
    response  = (
        response_model
. bind_tools ( [ retriever_tool ] ) . invoke ( state [ "messages" ] )
)
return { "messages" : [ response ] }

3. Scoring documents

definitiongrade_documentsNode: DefinitionGradeDocuments class, uses the QianWen model structured output (returns yes and no), scores the results of the search tool, and returns if it returns yesgenerate_answerNode, otherwise returnrewrite_questionnode.


from  pydantic  import BaseModel , Field
from  typing  import Literal

GRADE_PROMPT = (
"You are a grader assessing relevance of a retrieved document to a user question. \n "
"Here is the retrieved document: \n\n {context} \n\n"
"Here is the user question: {question} \n"
"If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n"
"Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."
)


class GradeDocuments ( BaseModel ) :
"" "Grade documents using a binary score for relevance check." ""

binary_score :  str  = Field (
        description = "Relevance score: 'yes' if relevant, or 'no' if not relevant"
)


grader_model  = ChatTongyi ( model = "qwen-plus" )


def  grade_documents (
state : MessagesState ,
) - > Literal [ "generate_answer" , "rewrite_question" ] :
"" "Determine whether the retrieved documents are relevant to the question." ""
for  message  in  state [ "messages" ] :
if isinstance ( message , HumanMessage ) :
            question  =  message . content
    context  =  state [ "messages" ] [ - 1 ] . content

    prompt  = GRADE_PROMPT . format ( question = question ,  context = context )
    response  = (
        grader_model
.with_structured_output ( GradeDocuments ) .invoke (
[ { "role" : "user" , "content" :  prompt } ]
)
)
    score  =  response . binary_score

if  score  == "yes" :
return "generate_answer"
else :
return "rewrite_question"

4. Rewrite the question

definitionrewrite_questionNode, if the document score is not relevant, then regenerate the query retrieval based on the user question:


REWRITE_PROMPT = (
"Look at the input and try to reason about the underlying semantic intent / meaning.\n"
"Here is the initial question:"
"\n ------- \n"
"{question}"
"\n ------- \n"
"Formulate an improved question:"
)


def  rewrite_question ( state : MessagesState ) :
"" "Rewrite the original user question." ""
for  message  in  state [ "messages" ] :
if isinstance ( message , HumanMessage ) :
            question  =  message . content
    prompt  = REWRITE_PROMPT . format ( question = question )
    response  =  response_model . invoke ( [ { "role" : "user" , "content" :  prompt } ] )
return { "messages" : [ { "role" : "user" , "content" :  response . content } ] }

5. Generate answers

definitiongenerate_answerNode, generates answers based on retrieval results and user questions:



GENERATE_PROMPT = (
"You are an assistant for question-answering tasks. "
"Use the following pieces of retrieved context to answer the question. "
"If you don't know the answer, just say that you don't know. "
"Use three sentences maximum and keep the answer concise.\n"
"Question: {question} \n"
"Context: {context}"
)


def  generate_answer ( state : MessagesState ) :
"" "Generate an answer." ""
for  message  in  state [ "messages" ] :
if isinstance ( message , HumanMessage ) :
            question  =  message . content
    context  =  state [ "messages" ] [ - 1 ] . content
    prompt  = GENERATE_PROMPT . format ( question = question ,  context = context )
    response  =  response_model . invoke ( [ { "role" : "user" , "content" :  prompt } ] )
return { "messages" : [ response ] }

6. Assemble Graph

Assemble all nodes into a Graph:



from  langgraph . graph import StateGraph , START , END
from  langgraph . prebuilt import ToolNode
from  langgraph . prebuilt import  tools_condition

workflow  = StateGraph ( MessagesState )

#  Define  the nodes we will cycle between
workflow . add_node ( generate_query_or_respond )
workflow . add_node ( "retrieve" , ToolNode ( [ retriever_tool ] ) )
workflow.add_node ( rewrite_question )
workflow.add_node ( generate_answer )

workflow . add_edge ( START , "generate_query_or_respond" )

#  Decide  whether to retrieve
workflow . add_conditional_edges (
"generate_query_or_respond" ,
    #  Assess LLM decision ( call  ` retriever_tool `  tool or respond to the user )
    tools_condition ,
{
        #  Translate  the condition outputs to nodes  in  our graph
"tools" : "retrieve" ,
END : END ,
} ,
)

#  Edges  taken after the  ` action `  node is called .
workflow . add_conditional_edges (
"retrieve" ,
    #  Assess  agent decision
    grade_documents ,
)
workflow . add_edge ( "generate_answer" , END )
workflow . add_edge ( "rewrite_question" , "generate_query_or_respond" )

#  Compile
graph  =  workflow . compile ( )