Phase 4: Agent Assistant + Provenance + Entity Cards + Confidence Indicators

- Add llm_client.py for Kimi API integration with RAG and streaming support
- Add Agent API endpoints: query, command, suggest
- Add Provenance API for relation source tracking
- Add Entity details API with mentions and relations
- Add Entity evolution analysis API
- Update workbench.html with Agent panel, entity cards, provenance modal
- Update app.js with Agent chat, entity hover cards, relation provenance
- Add low-confidence entity highlighting
- Update STATUS.md with Phase 4 progress
This commit is contained in:
OpenClaw Bot
2026-02-19 09:58:39 +08:00
parent 087a8d9c4d
commit 1f4fe5a33e
9 changed files with 1523 additions and 881 deletions

Binary file not shown.

View File

@@ -453,6 +453,168 @@ class DatabaseManager:
def get_all_entities_for_embedding(self, project_id: str) -> List[Entity]:
"""获取所有实体用于 embedding 计算"""
return self.list_project_entities(project_id)
# Phase 4: Agent & Provenance methods
def get_relation_with_details(self, relation_id: str) -> Optional[dict]:
"""获取关系详情,包含源文档信息"""
conn = self.get_conn()
row = conn.execute(
"""SELECT r.*,
s.name as source_name, t.name as target_name,
tr.filename as transcript_filename, tr.full_text as transcript_text
FROM entity_relations r
JOIN entities s ON r.source_entity_id = s.id
JOIN entities t ON r.target_entity_id = t.id
LEFT JOIN transcripts tr ON r.transcript_id = tr.id
WHERE r.id = ?""",
(relation_id,)
).fetchone()
conn.close()
if row:
return dict(row)
return None
def get_entity_with_mentions(self, entity_id: str) -> Optional[dict]:
"""获取实体详情及所有提及位置"""
conn = self.get_conn()
# 获取实体信息
entity_row = conn.execute(
"SELECT * FROM entities WHERE id = ?", (entity_id,)
).fetchone()
if not entity_row:
conn.close()
return None
entity = dict(entity_row)
entity['aliases'] = json.loads(entity['aliases']) if entity['aliases'] else []
# 获取提及位置
mentions = conn.execute(
"""SELECT m.*, t.filename, t.created_at as transcript_date
FROM entity_mentions m
JOIN transcripts t ON m.transcript_id = t.id
WHERE m.entity_id = ?
ORDER BY t.created_at, m.start_pos""",
(entity_id,)
).fetchall()
entity['mentions'] = [dict(m) for m in mentions]
entity['mention_count'] = len(mentions)
# 获取相关关系
relations = conn.execute(
"""SELECT r.*,
s.name as source_name, t.name as target_name
FROM entity_relations r
JOIN entities s ON r.source_entity_id = s.id
JOIN entities t ON r.target_entity_id = t.id
WHERE r.source_entity_id = ? OR r.target_entity_id = ?
ORDER BY r.created_at DESC""",
(entity_id, entity_id)
).fetchall()
entity['relations'] = [dict(r) for r in relations]
conn.close()
return entity
def search_entities(self, project_id: str, query: str) -> List[Entity]:
"""搜索实体"""
conn = self.get_conn()
rows = conn.execute(
"""SELECT * FROM entities
WHERE project_id = ? AND
(name LIKE ? OR definition LIKE ? OR aliases LIKE ?)
ORDER BY name""",
(project_id, f'%{query}%', f'%{query}%', f'%{query}%')
).fetchall()
conn.close()
entities = []
for row in rows:
data = dict(row)
data['aliases'] = json.loads(data['aliases']) if data['aliases'] else []
entities.append(Entity(**data))
return entities
def get_project_summary(self, project_id: str) -> dict:
"""获取项目摘要信息,用于 RAG 上下文"""
conn = self.get_conn()
# 项目基本信息
project = conn.execute(
"SELECT * FROM projects WHERE id = ?", (project_id,)
).fetchone()
# 统计信息
entity_count = conn.execute(
"SELECT COUNT(*) as count FROM entities WHERE project_id = ?",
(project_id,)
).fetchone()['count']
transcript_count = conn.execute(
"SELECT COUNT(*) as count FROM transcripts WHERE project_id = ?",
(project_id,)
).fetchone()['count']
relation_count = conn.execute(
"SELECT COUNT(*) as count FROM entity_relations WHERE project_id = ?",
(project_id,)
).fetchone()['count']
# 获取最近的转录文本片段
recent_transcripts = conn.execute(
"""SELECT filename, full_text, created_at
FROM transcripts
WHERE project_id = ?
ORDER BY created_at DESC
LIMIT 5""",
(project_id,)
).fetchall()
# 获取高频实体
top_entities = conn.execute(
"""SELECT e.name, e.type, e.definition, COUNT(m.id) as mention_count
FROM entities e
LEFT JOIN entity_mentions m ON e.id = m.entity_id
WHERE e.project_id = ?
GROUP BY e.id
ORDER BY mention_count DESC
LIMIT 10""",
(project_id,)
).fetchall()
conn.close()
return {
'project': dict(project) if project else {},
'statistics': {
'entity_count': entity_count,
'transcript_count': transcript_count,
'relation_count': relation_count
},
'recent_transcripts': [dict(t) for t in recent_transcripts],
'top_entities': [dict(e) for e in top_entities]
}
def get_transcript_context(self, transcript_id: str, position: int, context_chars: int = 200) -> str:
"""获取转录文本的上下文"""
conn = self.get_conn()
row = conn.execute(
"SELECT full_text FROM transcripts WHERE id = ?",
(transcript_id,)
).fetchone()
conn.close()
if not row:
return ""
text = row['full_text']
start = max(0, position - context_chars)
end = min(len(text), position + context_chars)
return text[start:end]
# Singleton instance

255
backend/llm_client.py Normal file
View File

@@ -0,0 +1,255 @@
#!/usr/bin/env python3
"""
InsightFlow LLM Client - Phase 4
用于与 Kimi API 交互,支持 RAG 问答和 Agent 功能
"""
import os
import json
import httpx
from typing import List, Dict, Optional, AsyncGenerator
from dataclasses import dataclass
KIMI_API_KEY = os.getenv("KIMI_API_KEY", "")
KIMI_BASE_URL = os.getenv("KIMI_BASE_URL", "https://api.kimi.com/coding")
@dataclass
class ChatMessage:
role: str
content: str
@dataclass
class EntityExtractionResult:
name: str
type: str
definition: str
confidence: float
@dataclass
class RelationExtractionResult:
source: str
target: str
type: str
confidence: float
class LLMClient:
"""Kimi API 客户端"""
def __init__(self, api_key: str = None, base_url: str = None):
self.api_key = api_key or KIMI_API_KEY
self.base_url = base_url or KIMI_BASE_URL
self.headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
async def chat(self, messages: List[ChatMessage], temperature: float = 0.3, stream: bool = False) -> str:
"""发送聊天请求"""
if not self.api_key:
raise ValueError("KIMI_API_KEY not set")
payload = {
"model": "k2p5",
"messages": [{"role": m.role, "content": m.content} for m in messages],
"temperature": temperature,
"stream": stream
}
async with httpx.AsyncClient() as client:
response = await client.post(
f"{self.base_url}/v1/chat/completions",
headers=self.headers,
json=payload,
timeout=120.0
)
response.raise_for_status()
result = response.json()
return result["choices"][0]["message"]["content"]
async def chat_stream(self, messages: List[ChatMessage], temperature: float = 0.3) -> AsyncGenerator[str, None]:
"""流式聊天请求"""
if not self.api_key:
raise ValueError("KIMI_API_KEY not set")
payload = {
"model": "k2p5",
"messages": [{"role": m.role, "content": m.content} for m in messages],
"temperature": temperature,
"stream": True
}
async with httpx.AsyncClient() as client:
async with client.stream(
"POST",
f"{self.base_url}/v1/chat/completions",
headers=self.headers,
json=payload,
timeout=120.0
) as response:
response.raise_for_status()
async for line in response.aiter_lines():
if line.startswith("data: "):
data = line[6:]
if data == "[DONE]":
break
try:
chunk = json.loads(data)
delta = chunk["choices"][0]["delta"]
if "content" in delta:
yield delta["content"]
except:
pass
async def extract_entities_with_confidence(self, text: str) -> tuple[List[EntityExtractionResult], List[RelationExtractionResult]]:
"""提取实体和关系,带置信度分数"""
prompt = f"""从以下会议文本中提取关键实体和它们之间的关系,以 JSON 格式返回:
文本:{text[:3000]}
要求:
1. entities: 每个实体包含 name(名称), type(类型: PROJECT/TECH/PERSON/ORG/OTHER), definition(一句话定义), confidence(置信度0-1)
2. relations: 每个关系包含 source(源实体名), target(目标实体名), type(关系类型: belongs_to/works_with/depends_on/mentions/related), confidence(置信度0-1)
3. 只返回 JSON 对象,格式: {{"entities": [...], "relations": [...]}}
示例:
{{
"entities": [
{{"name": "Project Alpha", "type": "PROJECT", "definition": "核心项目", "confidence": 0.95}},
{{"name": "K8s", "type": "TECH", "definition": "Kubernetes容器编排平台", "confidence": 0.88}}
],
"relations": [
{{"source": "Project Alpha", "target": "K8s", "type": "depends_on", "confidence": 0.82}}
]
}}"""
messages = [ChatMessage(role="user", content=prompt)]
content = await self.chat(messages, temperature=0.1)
import re
json_match = re.search(r'\{{.*?\}}', content, re.DOTALL)
if not json_match:
return [], []
try:
data = json.loads(json_match.group())
entities = [
EntityExtractionResult(
name=e["name"],
type=e.get("type", "OTHER"),
definition=e.get("definition", ""),
confidence=e.get("confidence", 0.8)
)
for e in data.get("entities", [])
]
relations = [
RelationExtractionResult(
source=r["source"],
target=r["target"],
type=r.get("type", "related"),
confidence=r.get("confidence", 0.8)
)
for r in data.get("relations", [])
]
return entities, relations
except Exception as e:
print(f"Parse extraction result failed: {e}")
return [], []
async def rag_query(self, query: str, context: str, project_context: Dict) -> str:
"""RAG 问答 - 基于项目上下文回答问题"""
prompt = f"""你是一个专业的项目分析助手。基于以下项目信息回答问题:
## 项目信息
{json.dumps(project_context, ensure_ascii=False, indent=2)}
## 相关上下文
{context[:4000]}
## 用户问题
{query}
请用中文回答,保持简洁专业。如果信息不足,请明确说明。"""
messages = [
ChatMessage(role="system", content="你是一个专业的项目分析助手,擅长从会议记录中提取洞察。"),
ChatMessage(role="user", content=prompt)
]
return await self.chat(messages, temperature=0.3)
async def agent_command(self, command: str, project_context: Dict) -> Dict:
"""Agent 指令解析 - 将自然语言指令转换为结构化操作"""
prompt = f"""解析以下用户指令,转换为结构化操作:
## 项目信息
{json.dumps(project_context, ensure_ascii=False, indent=2)}
## 用户指令
{command}
请分析指令意图,返回 JSON 格式:
{{
"intent": "merge_entities|answer_question|edit_entity|create_relation|unknown",
"params": {{
// 根据 intent 不同,参数不同
}},
"explanation": "对用户指令的解释"
}}
意图说明:
- merge_entities: 合并实体params 包含 source_names(源实体名列表), target_name(目标实体名)
- answer_question: 回答问题params 包含 question(问题内容)
- edit_entity: 编辑实体params 包含 entity_name(实体名), field(字段), value(新值)
- create_relation: 创建关系params 包含 source(源实体), target(目标实体), relation_type(关系类型)
"""
messages = [ChatMessage(role="user", content=prompt)]
content = await self.chat(messages, temperature=0.1)
import re
json_match = re.search(r'\{{.*?\}}', content, re.DOTALL)
if not json_match:
return {"intent": "unknown", "explanation": "无法解析指令"}
try:
return json.loads(json_match.group())
except:
return {"intent": "unknown", "explanation": "解析失败"}
async def analyze_entity_evolution(self, entity_name: str, mentions: List[Dict]) -> str:
"""分析实体在项目中的演变/态度变化"""
mentions_text = "\n".join([
f"[{m.get('created_at', '未知时间')}] {m.get('text_snippet', '')}"
for m in mentions[:20] # 限制数量
])
prompt = f"""分析实体 "{entity_name}" 在项目中的演变和态度变化:
## 提及记录
{mentions_text}
请分析:
1. 该实体的角色/重要性变化
2. 相关方对它的态度变化
3. 关键时间节点
4. 总结性洞察
用中文回答,结构清晰。"""
messages = [ChatMessage(role="user", content=prompt)]
return await self.chat(messages, temperature=0.3)
# Singleton instance
_llm_client = None
def get_llm_client() -> LLMClient:
global _llm_client
if _llm_client is None:
_llm_client = LLMClient()
return _llm_client

View File

@@ -48,6 +48,12 @@ try:
except ImportError:
ALIGNER_AVAILABLE = False
try:
from llm_client import get_llm_client, ChatMessage
LLM_CLIENT_AVAILABLE = True
except ImportError:
LLM_CLIENT_AVAILABLE = False
app = FastAPI(title="InsightFlow", version="0.3.0")
app.add_middleware(
@@ -99,6 +105,13 @@ class RelationCreate(BaseModel):
class TranscriptUpdate(BaseModel):
full_text: str
class AgentQuery(BaseModel):
query: str
stream: bool = False
class AgentCommand(BaseModel):
command: str
class EntityMergeRequest(BaseModel):
source_entity_id: str
target_entity_id: str
@@ -963,13 +976,14 @@ async def get_entity_mentions(entity_id: str):
async def health_check():
return {
"status": "ok",
"version": "0.3.0",
"phase": "Phase 3 - Memory & Growth",
"version": "0.4.0",
"phase": "Phase 4 - Agent Assistant",
"oss_available": OSS_AVAILABLE,
"tingwu_available": TINGWU_AVAILABLE,
"db_available": DB_AVAILABLE,
"doc_processor_available": DOC_PROCESSOR_AVAILABLE,
"aligner_available": ALIGNER_AVAILABLE
"aligner_available": ALIGNER_AVAILABLE,
"llm_client_available": LLM_CLIENT_AVAILABLE
}
# Serve frontend
@@ -978,3 +992,276 @@ app.mount("/", StaticFiles(directory="frontend", html=True), name="frontend")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
# ==================== Phase 4: Agent 助手 API ====================
@app.post("/api/v1/projects/{project_id}/agent/query")
async def agent_query(project_id: str, query: AgentQuery):
"""Agent RAG 问答"""
if not DB_AVAILABLE or not LLM_CLIENT_AVAILABLE:
raise HTTPException(status_code=500, detail="Service not available")
db = get_db_manager()
llm = get_llm_client()
project = db.get_project(project_id)
if not project:
raise HTTPException(status_code=404, detail="Project not found")
# 获取项目上下文
project_context = db.get_project_summary(project_id)
# 构建上下文
context_parts = []
for t in project_context.get('recent_transcripts', []):
context_parts.append(f"{t['filename']}\n{t['full_text'][:1000]}")
context = "\n\n".join(context_parts)
if query.stream:
from fastapi.responses import StreamingResponse
import json
async def stream_response():
messages = [
ChatMessage(role="system", content="你是一个专业的项目分析助手,擅长从会议记录中提取洞察。"),
ChatMessage(role="user", content=f"""基于以下项目信息回答问题:
## 项目信息
{json.dumps(project_context, ensure_ascii=False, indent=2)}
## 相关上下文
{context[:4000]}
## 用户问题
{query.query}
请用中文回答,保持简洁专业。如果信息不足,请明确说明。""")
]
async for chunk in llm.chat_stream(messages):
yield f"data: {json.dumps({'content': chunk})}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(stream_response(), media_type="text/event-stream")
else:
answer = await llm.rag_query(query.query, context, project_context)
return {"answer": answer, "project_id": project_id}
@app.post("/api/v1/projects/{project_id}/agent/command")
async def agent_command(project_id: str, command: AgentCommand):
"""Agent 指令执行 - 解析并执行自然语言指令"""
if not DB_AVAILABLE or not LLM_CLIENT_AVAILABLE:
raise HTTPException(status_code=500, detail="Service not available")
db = get_db_manager()
llm = get_llm_client()
project = db.get_project(project_id)
if not project:
raise HTTPException(status_code=404, detail="Project not found")
# 获取项目上下文
project_context = db.get_project_summary(project_id)
# 解析指令
parsed = await llm.agent_command(command.command, project_context)
intent = parsed.get("intent", "unknown")
params = parsed.get("params", {})
result = {"intent": intent, "explanation": parsed.get("explanation", "")}
# 执行指令
if intent == "merge_entities":
# 合并实体
source_names = params.get("source_names", [])
target_name = params.get("target_name", "")
target_entity = None
source_entities = []
# 查找目标实体
for e in project_context.get("top_entities", []):
if e["name"] == target_name or target_name in e["name"]:
target_entity = db.get_entity_by_name(project_id, e["name"])
break
# 查找源实体
for name in source_names:
for e in project_context.get("top_entities", []):
if e["name"] == name or name in e["name"]:
ent = db.get_entity_by_name(project_id, e["name"])
if ent and (not target_entity or ent.id != target_entity.id):
source_entities.append(ent)
break
merged = []
if target_entity:
for source in source_entities:
try:
db.merge_entities(target_entity.id, source.id)
merged.append(source.name)
except Exception as e:
print(f"Merge failed: {e}")
result["action"] = "merge_entities"
result["target"] = target_entity.name if target_entity else None
result["merged"] = merged
result["success"] = len(merged) > 0
elif intent == "answer_question":
# 问答 - 调用 RAG
answer = await llm.rag_query(params.get("question", command.command), "", project_context)
result["action"] = "answer"
result["answer"] = answer
elif intent == "edit_entity":
# 编辑实体
entity_name = params.get("entity_name", "")
field = params.get("field", "")
value = params.get("value", "")
entity = db.get_entity_by_name(project_id, entity_name)
if entity:
updated = db.update_entity(entity.id, **{field: value})
result["action"] = "edit_entity"
result["entity"] = {"id": updated.id, "name": updated.name} if updated else None
result["success"] = updated is not None
else:
result["success"] = False
result["error"] = "Entity not found"
else:
result["action"] = "none"
result["message"] = "无法理解的指令,请尝试:\n- 合并实体:把所有'客户端'合并到'App'\n- 提问:张总对项目的态度如何?\n- 编辑:修改'K8s'的定义为..."
return result
@app.get("/api/v1/projects/{project_id}/agent/suggest")
async def agent_suggest(project_id: str):
"""获取 Agent 建议 - 基于项目数据提供洞察"""
if not DB_AVAILABLE or not LLM_CLIENT_AVAILABLE:
raise HTTPException(status_code=500, detail="Service not available")
db = get_db_manager()
llm = get_llm_client()
project_context = db.get_project_summary(project_id)
# 生成建议
prompt = f"""基于以下项目数据提供3-5条分析建议
{json.dumps(project_context, ensure_ascii=False, indent=2)}
请提供:
1. 数据洞察发现
2. 建议的操作(如合并相似实体、补充定义等)
3. 值得关注的关键信息
返回 JSON 格式:{{"suggestions": [{{"type": "insight|action", "title": "...", "description": "..."}}]}}"""
messages = [ChatMessage(role="user", content=prompt)]
content = await llm.chat(messages, temperature=0.3)
import re
json_match = re.search(r'\{{.*?\}}', content, re.DOTALL)
if json_match:
try:
data = json.loads(json_match.group())
return data
except:
pass
return {"suggestions": []}
# ==================== Phase 4: 知识溯源 API ====================
@app.get("/api/v1/relations/{relation_id}/provenance")
async def get_relation_provenance(relation_id: str):
"""获取关系的知识溯源信息"""
if not DB_AVAILABLE:
raise HTTPException(status_code=500, detail="Database not available")
db = get_db_manager()
relation = db.get_relation_with_details(relation_id)
if not relation:
raise HTTPException(status_code=404, detail="Relation not found")
return {
"relation_id": relation_id,
"source": relation.get("source_name"),
"target": relation.get("target_name"),
"type": relation.get("relation_type"),
"evidence": relation.get("evidence"),
"transcript": {
"id": relation.get("transcript_id"),
"filename": relation.get("transcript_filename"),
} if relation.get("transcript_id") else None
}
@app.get("/api/v1/entities/{entity_id}/details")
async def get_entity_details(entity_id: str):
"""获取实体详情,包含所有提及位置"""
if not DB_AVAILABLE:
raise HTTPException(status_code=500, detail="Database not available")
db = get_db_manager()
entity = db.get_entity_with_mentions(entity_id)
if not entity:
raise HTTPException(status_code=404, detail="Entity not found")
return entity
@app.get("/api/v1/entities/{entity_id}/evolution")
async def get_entity_evolution(entity_id: str):
"""分析实体的演变和态度变化"""
if not DB_AVAILABLE or not LLM_CLIENT_AVAILABLE:
raise HTTPException(status_code=500, detail="Service not available")
db = get_db_manager()
llm = get_llm_client()
entity = db.get_entity_with_mentions(entity_id)
if not entity:
raise HTTPException(status_code=404, detail="Entity not found")
# 分析演变
analysis = await llm.analyze_entity_evolution(entity["name"], entity.get("mentions", []))
return {
"entity_id": entity_id,
"entity_name": entity["name"],
"mention_count": entity.get("mention_count", 0),
"analysis": analysis,
"timeline": [
{
"date": m.get("transcript_date"),
"snippet": m.get("text_snippet"),
"transcript_id": m.get("transcript_id"),
"filename": m.get("filename")
}
for m in entity.get("mentions", [])
]
}
# ==================== Phase 4: 实体管理增强 API ====================
@app.get("/api/v1/projects/{project_id}/entities/search")
async def search_entities(project_id: str, q: str):
"""搜索实体"""
if not DB_AVAILABLE:
raise HTTPException(status_code=500, detail="Database not available")
db = get_db_manager()
entities = db.search_entities(project_id, q)
return [{"id": e.id, "name": e.name, "type": e.type, "definition": e.definition} for e in entities]