Demo
Option 1: Use the Querying Skill (recommended)
Load skills/querying-knowledge-and-memory/SKILL.md into the agent's system prompt. The skill encodes the full three-step retrieval orchestration — when to use MemoryVG, when to fall back to HetaDB, and when to write to memory.
Option 2: LangChain Agent
Wrap the three Heta endpoints as LangChain tools and let the LLM decide when to call each one:
import httpx
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
BASE = "http://localhost:8000"
AGENT_ID = "agent"
@tool
def search_memory(query: str) -> str:
"""Search personal memory for previously seen facts."""
res = httpx.post(f"{BASE}/api/v1/hetamem/vg/search",
json={"query": query, "agent_id": AGENT_ID}).json()
results = res.get("results", [])
return "\n".join(r["memory"] for r in results[:3]) if results else "No memory found."
@tool
def query_knowledge_base(query: str, kb_id: str) -> str:
"""Query a document knowledge base and return a synthesised answer with citations."""
res = httpx.post(f"{BASE}/api/v1/hetadb/chat", json={
"query": query,
"kb_id": kb_id,
"user_id": AGENT_ID,
"query_mode": "naive",
}).json()
return res.get("response", "No answer found.")
@tool
def store_memory(content: str) -> str:
"""Store a finding into personal memory for fast recall in future sessions."""
httpx.post(f"{BASE}/api/v1/hetamem/vg/add", json={
"messages": [{"role": "assistant", "content": content}],
"agent_id": AGENT_ID,
})
return "Stored."
# Build the agent
llm = ChatOpenAI(model="gpt-4o")
tools = [search_memory, query_knowledge_base, store_memory]
prompt = ChatPromptTemplate.from_messages([
("system",
"You are a research assistant. Always check memory first before querying a "
"knowledge base. Store useful findings for future recall."),
("human", "{input}"),
MessagesPlaceholder("agent_scratchpad"),
])
agent = create_tool_calling_agent(llm, tools, prompt)
executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
# Run
executor.invoke({"input": "What loss function does the paper in research-kb use?"})
The agent follows the same three-step pattern as the skill: it calls search_memory first, falls back to query_knowledge_base on a miss, and calls store_memory for findings worth keeping.