Phase 4: Agent Assistant + Provenance + Entity Cards + Confidence Indicators

- Add llm_client.py for Kimi API integration with RAG and streaming support - Add Agent API endpoints: query, command, suggest - Add Provenance API for relation source tracking - Add Entity details API with mentions and relations - Add Entity evolution analysis API - Update workbench.html with Agent panel, entity cards, provenance modal - Update app.js with Agent chat, entity hover cards, relation provenance - Add low-confidence entity highlighting - Update STATUS.md with Phase 4 progress
2026-02-19 09:58:39 +08:00
parent 087a8d9c4d
commit 1f4fe5a33e
9 changed files with 1523 additions and 881 deletions
--- a/backend/db_manager.py
+++ b/backend/db_manager.py
@@ -453,6 +453,168 @@ class DatabaseManager:
    def get_all_entities_for_embedding(self, project_id: str) -> List[Entity]:
        """获取所有实体用于 embedding 计算"""
        return self.list_project_entities(project_id)
+    
+    # Phase 4: Agent & Provenance methods
+    def get_relation_with_details(self, relation_id: str) -> Optional[dict]:
+        """获取关系详情，包含源文档信息"""
+        conn = self.get_conn()
+        row = conn.execute(
+            """SELECT r.*, 
+                s.name as source_name, t.name as target_name,
+                tr.filename as transcript_filename, tr.full_text as transcript_text
+               FROM entity_relations r
+               JOIN entities s ON r.source_entity_id = s.id
+               JOIN entities t ON r.target_entity_id = t.id
+               LEFT JOIN transcripts tr ON r.transcript_id = tr.id
+               WHERE r.id = ?""",
+            (relation_id,)
+        ).fetchone()
+        conn.close()
+        if row:
+            return dict(row)
+        return None
+    
+    def get_entity_with_mentions(self, entity_id: str) -> Optional[dict]:
+        """获取实体详情及所有提及位置"""
+        conn = self.get_conn()
+        
+        # 获取实体信息
+        entity_row = conn.execute(
+            "SELECT * FROM entities WHERE id = ?", (entity_id,)
+        ).fetchone()
+        
+        if not entity_row:
+            conn.close()
+            return None
+        
+        entity = dict(entity_row)
+        entity['aliases'] = json.loads(entity['aliases']) if entity['aliases'] else []
+        
+        # 获取提及位置
+        mentions = conn.execute(
+            """SELECT m.*, t.filename, t.created_at as transcript_date
+               FROM entity_mentions m
+               JOIN transcripts t ON m.transcript_id = t.id
+               WHERE m.entity_id = ?
+               ORDER BY t.created_at, m.start_pos""",
+            (entity_id,)
+        ).fetchall()
+        
+        entity['mentions'] = [dict(m) for m in mentions]
+        entity['mention_count'] = len(mentions)
+        
+        # 获取相关关系
+        relations = conn.execute(
+            """SELECT r.*, 
+                s.name as source_name, t.name as target_name
+               FROM entity_relations r
+               JOIN entities s ON r.source_entity_id = s.id
+               JOIN entities t ON r.target_entity_id = t.id
+               WHERE r.source_entity_id = ? OR r.target_entity_id = ?
+               ORDER BY r.created_at DESC""",
+            (entity_id, entity_id)
+        ).fetchall()
+        
+        entity['relations'] = [dict(r) for r in relations]
+        
+        conn.close()
+        return entity
+    
+    def search_entities(self, project_id: str, query: str) -> List[Entity]:
+        """搜索实体"""
+        conn = self.get_conn()
+        rows = conn.execute(
+            """SELECT * FROM entities 
+               WHERE project_id = ? AND 
+               (name LIKE ? OR definition LIKE ? OR aliases LIKE ?)
+               ORDER BY name""",
+            (project_id, f'%{query}%', f'%{query}%', f'%{query}%')
+        ).fetchall()
+        conn.close()
+        
+        entities = []
+        for row in rows:
+            data = dict(row)
+            data['aliases'] = json.loads(data['aliases']) if data['aliases'] else []
+            entities.append(Entity(**data))
+        return entities
+    
+    def get_project_summary(self, project_id: str) -> dict:
+        """获取项目摘要信息，用于 RAG 上下文"""
+        conn = self.get_conn()
+        
+        # 项目基本信息
+        project = conn.execute(
+            "SELECT * FROM projects WHERE id = ?", (project_id,)
+        ).fetchone()
+        
+        # 统计信息
+        entity_count = conn.execute(
+            "SELECT COUNT(*) as count FROM entities WHERE project_id = ?",
+            (project_id,)
+        ).fetchone()['count']
+        
+        transcript_count = conn.execute(
+            "SELECT COUNT(*) as count FROM transcripts WHERE project_id = ?",
+            (project_id,)
+        ).fetchone()['count']
+        
+        relation_count = conn.execute(
+            "SELECT COUNT(*) as count FROM entity_relations WHERE project_id = ?",
+            (project_id,)
+        ).fetchone()['count']
+        
+        # 获取最近的转录文本片段
+        recent_transcripts = conn.execute(
+            """SELECT filename, full_text, created_at 
+               FROM transcripts 
+               WHERE project_id = ? 
+               ORDER BY created_at DESC 
+               LIMIT 5""",
+            (project_id,)
+        ).fetchall()
+        
+        # 获取高频实体
+        top_entities = conn.execute(
+            """SELECT e.name, e.type, e.definition, COUNT(m.id) as mention_count
+               FROM entities e
+               LEFT JOIN entity_mentions m ON e.id = m.entity_id
+               WHERE e.project_id = ?
+               GROUP BY e.id
+               ORDER BY mention_count DESC
+               LIMIT 10""",
+            (project_id,)
+        ).fetchall()
+        
+        conn.close()
+        
+        return {
+            'project': dict(project) if project else {},
+            'statistics': {
+                'entity_count': entity_count,
+                'transcript_count': transcript_count,
+                'relation_count': relation_count
+            },
+            'recent_transcripts': [dict(t) for t in recent_transcripts],
+            'top_entities': [dict(e) for e in top_entities]
+        }
+    
+    def get_transcript_context(self, transcript_id: str, position: int, context_chars: int = 200) -> str:
+        """获取转录文本的上下文"""
+        conn = self.get_conn()
+        row = conn.execute(
+            "SELECT full_text FROM transcripts WHERE id = ?",
+            (transcript_id,)
+        ).fetchone()
+        conn.close()
+        
+        if not row:
+            return ""
+        
+        text = row['full_text']
+        start = max(0, position - context_chars)
+        end = min(len(text), position + context_chars)
+        return text[start:end]


 # Singleton instance