Phase 4: Agent Assistant + Provenance + Entity Cards + Confidence Indicators
- Add llm_client.py for Kimi API integration with RAG and streaming support - Add Agent API endpoints: query, command, suggest - Add Provenance API for relation source tracking - Add Entity details API with mentions and relations - Add Entity evolution analysis API - Update workbench.html with Agent panel, entity cards, provenance modal - Update app.js with Agent chat, entity hover cards, relation provenance - Add low-confidence entity highlighting - Update STATUS.md with Phase 4 progress
This commit is contained in:
@@ -453,6 +453,168 @@ class DatabaseManager:
|
||||
def get_all_entities_for_embedding(self, project_id: str) -> List[Entity]:
|
||||
"""获取所有实体用于 embedding 计算"""
|
||||
return self.list_project_entities(project_id)
|
||||
|
||||
# Phase 4: Agent & Provenance methods
|
||||
def get_relation_with_details(self, relation_id: str) -> Optional[dict]:
|
||||
"""获取关系详情,包含源文档信息"""
|
||||
conn = self.get_conn()
|
||||
row = conn.execute(
|
||||
"""SELECT r.*,
|
||||
s.name as source_name, t.name as target_name,
|
||||
tr.filename as transcript_filename, tr.full_text as transcript_text
|
||||
FROM entity_relations r
|
||||
JOIN entities s ON r.source_entity_id = s.id
|
||||
JOIN entities t ON r.target_entity_id = t.id
|
||||
LEFT JOIN transcripts tr ON r.transcript_id = tr.id
|
||||
WHERE r.id = ?""",
|
||||
(relation_id,)
|
||||
).fetchone()
|
||||
conn.close()
|
||||
if row:
|
||||
return dict(row)
|
||||
return None
|
||||
|
||||
def get_entity_with_mentions(self, entity_id: str) -> Optional[dict]:
|
||||
"""获取实体详情及所有提及位置"""
|
||||
conn = self.get_conn()
|
||||
|
||||
# 获取实体信息
|
||||
entity_row = conn.execute(
|
||||
"SELECT * FROM entities WHERE id = ?", (entity_id,)
|
||||
).fetchone()
|
||||
|
||||
if not entity_row:
|
||||
conn.close()
|
||||
return None
|
||||
|
||||
entity = dict(entity_row)
|
||||
entity['aliases'] = json.loads(entity['aliases']) if entity['aliases'] else []
|
||||
|
||||
# 获取提及位置
|
||||
mentions = conn.execute(
|
||||
"""SELECT m.*, t.filename, t.created_at as transcript_date
|
||||
FROM entity_mentions m
|
||||
JOIN transcripts t ON m.transcript_id = t.id
|
||||
WHERE m.entity_id = ?
|
||||
ORDER BY t.created_at, m.start_pos""",
|
||||
(entity_id,)
|
||||
).fetchall()
|
||||
|
||||
entity['mentions'] = [dict(m) for m in mentions]
|
||||
entity['mention_count'] = len(mentions)
|
||||
|
||||
# 获取相关关系
|
||||
relations = conn.execute(
|
||||
"""SELECT r.*,
|
||||
s.name as source_name, t.name as target_name
|
||||
FROM entity_relations r
|
||||
JOIN entities s ON r.source_entity_id = s.id
|
||||
JOIN entities t ON r.target_entity_id = t.id
|
||||
WHERE r.source_entity_id = ? OR r.target_entity_id = ?
|
||||
ORDER BY r.created_at DESC""",
|
||||
(entity_id, entity_id)
|
||||
).fetchall()
|
||||
|
||||
entity['relations'] = [dict(r) for r in relations]
|
||||
|
||||
conn.close()
|
||||
return entity
|
||||
|
||||
def search_entities(self, project_id: str, query: str) -> List[Entity]:
|
||||
"""搜索实体"""
|
||||
conn = self.get_conn()
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM entities
|
||||
WHERE project_id = ? AND
|
||||
(name LIKE ? OR definition LIKE ? OR aliases LIKE ?)
|
||||
ORDER BY name""",
|
||||
(project_id, f'%{query}%', f'%{query}%', f'%{query}%')
|
||||
).fetchall()
|
||||
conn.close()
|
||||
|
||||
entities = []
|
||||
for row in rows:
|
||||
data = dict(row)
|
||||
data['aliases'] = json.loads(data['aliases']) if data['aliases'] else []
|
||||
entities.append(Entity(**data))
|
||||
return entities
|
||||
|
||||
def get_project_summary(self, project_id: str) -> dict:
|
||||
"""获取项目摘要信息,用于 RAG 上下文"""
|
||||
conn = self.get_conn()
|
||||
|
||||
# 项目基本信息
|
||||
project = conn.execute(
|
||||
"SELECT * FROM projects WHERE id = ?", (project_id,)
|
||||
).fetchone()
|
||||
|
||||
# 统计信息
|
||||
entity_count = conn.execute(
|
||||
"SELECT COUNT(*) as count FROM entities WHERE project_id = ?",
|
||||
(project_id,)
|
||||
).fetchone()['count']
|
||||
|
||||
transcript_count = conn.execute(
|
||||
"SELECT COUNT(*) as count FROM transcripts WHERE project_id = ?",
|
||||
(project_id,)
|
||||
).fetchone()['count']
|
||||
|
||||
relation_count = conn.execute(
|
||||
"SELECT COUNT(*) as count FROM entity_relations WHERE project_id = ?",
|
||||
(project_id,)
|
||||
).fetchone()['count']
|
||||
|
||||
# 获取最近的转录文本片段
|
||||
recent_transcripts = conn.execute(
|
||||
"""SELECT filename, full_text, created_at
|
||||
FROM transcripts
|
||||
WHERE project_id = ?
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 5""",
|
||||
(project_id,)
|
||||
).fetchall()
|
||||
|
||||
# 获取高频实体
|
||||
top_entities = conn.execute(
|
||||
"""SELECT e.name, e.type, e.definition, COUNT(m.id) as mention_count
|
||||
FROM entities e
|
||||
LEFT JOIN entity_mentions m ON e.id = m.entity_id
|
||||
WHERE e.project_id = ?
|
||||
GROUP BY e.id
|
||||
ORDER BY mention_count DESC
|
||||
LIMIT 10""",
|
||||
(project_id,)
|
||||
).fetchall()
|
||||
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
'project': dict(project) if project else {},
|
||||
'statistics': {
|
||||
'entity_count': entity_count,
|
||||
'transcript_count': transcript_count,
|
||||
'relation_count': relation_count
|
||||
},
|
||||
'recent_transcripts': [dict(t) for t in recent_transcripts],
|
||||
'top_entities': [dict(e) for e in top_entities]
|
||||
}
|
||||
|
||||
def get_transcript_context(self, transcript_id: str, position: int, context_chars: int = 200) -> str:
|
||||
"""获取转录文本的上下文"""
|
||||
conn = self.get_conn()
|
||||
row = conn.execute(
|
||||
"SELECT full_text FROM transcripts WHERE id = ?",
|
||||
(transcript_id,)
|
||||
).fetchone()
|
||||
conn.close()
|
||||
|
||||
if not row:
|
||||
return ""
|
||||
|
||||
text = row['full_text']
|
||||
start = max(0, position - context_chars)
|
||||
end = min(len(text), position + context_chars)
|
||||
return text[start:end]
|
||||
|
||||
|
||||
# Singleton instance
|
||||
|
||||
Reference in New Issue
Block a user