Phase 4: Agent Assistant + Provenance + Entity Cards + Confidence Indicators

- Add llm_client.py for Kimi API integration with RAG and streaming support - Add Agent API endpoints: query, command, suggest - Add Provenance API for relation source tracking - Add Entity details API with mentions and relations - Add Entity evolution analysis API - Update workbench.html with Agent panel, entity cards, provenance modal - Update app.js with Agent chat, entity hover cards, relation provenance - Add low-confidence entity highlighting - Update STATUS.md with Phase 4 progress
2026-02-19 09:58:39 +08:00
parent 087a8d9c4d
commit 1f4fe5a33e
9 changed files with 1523 additions and 881 deletions
--- a/backend/main.py
+++ b/backend/main.py
@@ -48,6 +48,12 @@ try:
 except ImportError:
    ALIGNER_AVAILABLE = False

+try:
+    from llm_client import get_llm_client, ChatMessage
+    LLM_CLIENT_AVAILABLE = True
+except ImportError:
+    LLM_CLIENT_AVAILABLE = False
+
 app = FastAPI(title="InsightFlow", version="0.3.0")

 app.add_middleware(
@@ -99,6 +105,13 @@ class RelationCreate(BaseModel):
 class TranscriptUpdate(BaseModel):
    full_text: str

+class AgentQuery(BaseModel):
+    query: str
+    stream: bool = False
+
+class AgentCommand(BaseModel):
+    command: str
+
 class EntityMergeRequest(BaseModel):
    source_entity_id: str
    target_entity_id: str
@@ -963,13 +976,14 @@ async def get_entity_mentions(entity_id: str):
 async def health_check():
    return {
        "status": "ok",
-        "version": "0.3.0",
-        "phase": "Phase 3 - Memory & Growth",
+        "version": "0.4.0",
+        "phase": "Phase 4 - Agent Assistant",
        "oss_available": OSS_AVAILABLE,
        "tingwu_available": TINGWU_AVAILABLE,
        "db_available": DB_AVAILABLE,
        "doc_processor_available": DOC_PROCESSOR_AVAILABLE,
-        "aligner_available": ALIGNER_AVAILABLE
+        "aligner_available": ALIGNER_AVAILABLE,
+        "llm_client_available": LLM_CLIENT_AVAILABLE
    }

 # Serve frontend
@@ -978,3 +992,276 @@ app.mount("/", StaticFiles(directory="frontend", html=True), name="frontend")
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)
+
+
+# ==================== Phase 4: Agent 助手 API ====================
+
+@app.post("/api/v1/projects/{project_id}/agent/query")
+async def agent_query(project_id: str, query: AgentQuery):
+    """Agent RAG 问答"""
+    if not DB_AVAILABLE or not LLM_CLIENT_AVAILABLE:
+        raise HTTPException(status_code=500, detail="Service not available")
+    
+    db = get_db_manager()
+    llm = get_llm_client()
+    
+    project = db.get_project(project_id)
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+    
+    # 获取项目上下文
+    project_context = db.get_project_summary(project_id)
+    
+    # 构建上下文
+    context_parts = []
+    for t in project_context.get('recent_transcripts', []):
+        context_parts.append(f"【{t['filename']}】\n{t['full_text'][:1000]}")
+    
+    context = "\n\n".join(context_parts)
+    
+    if query.stream:
+        from fastapi.responses import StreamingResponse
+        import json
+        
+        async def stream_response():
+            messages = [
+                ChatMessage(role="system", content="你是一个专业的项目分析助手，擅长从会议记录中提取洞察。"),
+                ChatMessage(role="user", content=f"""基于以下项目信息回答问题：
+
+## 项目信息
+{json.dumps(project_context, ensure_ascii=False, indent=2)}
+
+## 相关上下文
+{context[:4000]}
+
+## 用户问题
+{query.query}
+
+请用中文回答，保持简洁专业。如果信息不足，请明确说明。""")
+            ]
+            
+            async for chunk in llm.chat_stream(messages):
+                yield f"data: {json.dumps({'content': chunk})}\n\n"
+            yield "data: [DONE]\n\n"
+        
+        return StreamingResponse(stream_response(), media_type="text/event-stream")
+    else:
+        answer = await llm.rag_query(query.query, context, project_context)
+        return {"answer": answer, "project_id": project_id}
+
+
+@app.post("/api/v1/projects/{project_id}/agent/command")
+async def agent_command(project_id: str, command: AgentCommand):
+    """Agent 指令执行 - 解析并执行自然语言指令"""
+    if not DB_AVAILABLE or not LLM_CLIENT_AVAILABLE:
+        raise HTTPException(status_code=500, detail="Service not available")
+    
+    db = get_db_manager()
+    llm = get_llm_client()
+    
+    project = db.get_project(project_id)
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+    
+    # 获取项目上下文
+    project_context = db.get_project_summary(project_id)
+    
+    # 解析指令
+    parsed = await llm.agent_command(command.command, project_context)
+    
+    intent = parsed.get("intent", "unknown")
+    params = parsed.get("params", {})
+    
+    result = {"intent": intent, "explanation": parsed.get("explanation", "")}
+    
+    # 执行指令
+    if intent == "merge_entities":
+        # 合并实体
+        source_names = params.get("source_names", [])
+        target_name = params.get("target_name", "")
+        
+        target_entity = None
+        source_entities = []
+        
+        # 查找目标实体
+        for e in project_context.get("top_entities", []):
+            if e["name"] == target_name or target_name in e["name"]:
+                target_entity = db.get_entity_by_name(project_id, e["name"])
+                break
+        
+        # 查找源实体
+        for name in source_names:
+            for e in project_context.get("top_entities", []):
+                if e["name"] == name or name in e["name"]:
+                    ent = db.get_entity_by_name(project_id, e["name"])
+                    if ent and (not target_entity or ent.id != target_entity.id):
+                        source_entities.append(ent)
+                    break
+        
+        merged = []
+        if target_entity:
+            for source in source_entities:
+                try:
+                    db.merge_entities(target_entity.id, source.id)
+                    merged.append(source.name)
+                except Exception as e:
+                    print(f"Merge failed: {e}")
+        
+        result["action"] = "merge_entities"
+        result["target"] = target_entity.name if target_entity else None
+        result["merged"] = merged
+        result["success"] = len(merged) > 0
+        
+    elif intent == "answer_question":
+        # 问答 - 调用 RAG
+        answer = await llm.rag_query(params.get("question", command.command), "", project_context)
+        result["action"] = "answer"
+        result["answer"] = answer
+        
+    elif intent == "edit_entity":
+        # 编辑实体
+        entity_name = params.get("entity_name", "")
+        field = params.get("field", "")
+        value = params.get("value", "")
+        
+        entity = db.get_entity_by_name(project_id, entity_name)
+        if entity:
+            updated = db.update_entity(entity.id, **{field: value})
+            result["action"] = "edit_entity"
+            result["entity"] = {"id": updated.id, "name": updated.name} if updated else None
+            result["success"] = updated is not None
+        else:
+            result["success"] = False
+            result["error"] = "Entity not found"
+    
+    else:
+        result["action"] = "none"
+        result["message"] = "无法理解的指令，请尝试：\n- 合并实体：把所有'客户端'合并到'App'\n- 提问：张总对项目的态度如何？\n- 编辑：修改'K8s'的定义为..."
+    
+    return result
+
+
+@app.get("/api/v1/projects/{project_id}/agent/suggest")
+async def agent_suggest(project_id: str):
+    """获取 Agent 建议 - 基于项目数据提供洞察"""
+    if not DB_AVAILABLE or not LLM_CLIENT_AVAILABLE:
+        raise HTTPException(status_code=500, detail="Service not available")
+    
+    db = get_db_manager()
+    llm = get_llm_client()
+    
+    project_context = db.get_project_summary(project_id)
+    
+    # 生成建议
+    prompt = f"""基于以下项目数据，提供3-5条分析建议：
+
+{json.dumps(project_context, ensure_ascii=False, indent=2)}
+
+请提供：
+1. 数据洞察发现
+2. 建议的操作（如合并相似实体、补充定义等）
+3. 值得关注的关键信息
+
+返回 JSON 格式：{{"suggestions": [{{"type": "insight|action", "title": "...", "description": "..."}}]}}"""
+    
+    messages = [ChatMessage(role="user", content=prompt)]
+    content = await llm.chat(messages, temperature=0.3)
+    
+    import re
+    json_match = re.search(r'\{{.*?\}}', content, re.DOTALL)
+    if json_match:
+        try:
+            data = json.loads(json_match.group())
+            return data
+        except:
+            pass
+    
+    return {"suggestions": []}
+
+
+# ==================== Phase 4: 知识溯源 API ====================
+
+@app.get("/api/v1/relations/{relation_id}/provenance")
+async def get_relation_provenance(relation_id: str):
+    """获取关系的知识溯源信息"""
+    if not DB_AVAILABLE:
+        raise HTTPException(status_code=500, detail="Database not available")
+    
+    db = get_db_manager()
+    relation = db.get_relation_with_details(relation_id)
+    
+    if not relation:
+        raise HTTPException(status_code=404, detail="Relation not found")
+    
+    return {
+        "relation_id": relation_id,
+        "source": relation.get("source_name"),
+        "target": relation.get("target_name"),
+        "type": relation.get("relation_type"),
+        "evidence": relation.get("evidence"),
+        "transcript": {
+            "id": relation.get("transcript_id"),
+            "filename": relation.get("transcript_filename"),
+        } if relation.get("transcript_id") else None
+    }
+
+
+@app.get("/api/v1/entities/{entity_id}/details")
+async def get_entity_details(entity_id: str):
+    """获取实体详情，包含所有提及位置"""
+    if not DB_AVAILABLE:
+        raise HTTPException(status_code=500, detail="Database not available")
+    
+    db = get_db_manager()
+    entity = db.get_entity_with_mentions(entity_id)
+    
+    if not entity:
+        raise HTTPException(status_code=404, detail="Entity not found")
+    
+    return entity
+
+
+@app.get("/api/v1/entities/{entity_id}/evolution")
+async def get_entity_evolution(entity_id: str):
+    """分析实体的演变和态度变化"""
+    if not DB_AVAILABLE or not LLM_CLIENT_AVAILABLE:
+        raise HTTPException(status_code=500, detail="Service not available")
+    
+    db = get_db_manager()
+    llm = get_llm_client()
+    
+    entity = db.get_entity_with_mentions(entity_id)
+    if not entity:
+        raise HTTPException(status_code=404, detail="Entity not found")
+    
+    # 分析演变
+    analysis = await llm.analyze_entity_evolution(entity["name"], entity.get("mentions", []))
+    
+    return {
+        "entity_id": entity_id,
+        "entity_name": entity["name"],
+        "mention_count": entity.get("mention_count", 0),
+        "analysis": analysis,
+        "timeline": [
+            {
+                "date": m.get("transcript_date"),
+                "snippet": m.get("text_snippet"),
+                "transcript_id": m.get("transcript_id"),
+                "filename": m.get("filename")
+            }
+            for m in entity.get("mentions", [])
+        ]
+    }
+
+
+# ==================== Phase 4: 实体管理增强 API ====================
+
+@app.get("/api/v1/projects/{project_id}/entities/search")
+async def search_entities(project_id: str, q: str):
+    """搜索实体"""
+    if not DB_AVAILABLE:
+        raise HTTPException(status_code=500, detail="Database not available")
+    
+    db = get_db_manager()
+    entities = db.search_entities(project_id, q)
+    return [{"id": e.id, "name": e.name, "type": e.type, "definition": e.definition} for e in entities]