diff --git a/STATUS.md b/STATUS.md index 3cf96b9..bc6ff00 100644 --- a/STATUS.md +++ b/STATUS.md @@ -1,134 +1,100 @@ # InsightFlow 开发状态 -**最后更新**: 2026-02-18 +**最后更新**: 2026-02-19 ## 当前阶段 -Phase 3: 记忆与生长 - **已完成 ✅** +Phase 4: Agent 助手与知识溯源 - **开发中 🚧** ## 已完成 -### Phase 1: 骨架与单体分析 (MVP) ✅ +### Phase 1-3 (已完成 ✅) +- FastAPI 项目框架搭建 +- SQLite 数据库设计 +- 阿里云听悟 ASR 集成 +- OSS 上传模块 +- 实体提取与对齐逻辑 +- 关系提取 +- 项目 CRUD API +- 音频上传与分析 API +- D3.js 知识图谱可视化 +- 实体列表展示 +- 转录文本中实体高亮显示 +- 图谱与文本联动 -#### 后端 (backend/) -- ✅ FastAPI 项目框架搭建 -- ✅ SQLite 数据库设计 (schema.sql) -- ✅ 数据库管理模块 (db_manager.py) -- ✅ 阿里云听悟 ASR 集成 (tingwu_client.py) -- ✅ OSS 上传模块 (oss_uploader.py) -- ✅ 实体提取与对齐逻辑 -- ✅ 关系提取(LLM 同时提取实体和关系) -- ✅ 项目 CRUD API -- ✅ 音频上传与分析 API -- ✅ 实体列表 API -- ✅ 关系列表 API -- ✅ 转录列表 API -- ✅ 实体提及位置 API -- ✅ transcripts 表数据写入 -- ✅ entity_mentions 表数据写入 -- ✅ entity_relations 表数据写入 +### Phase 4 - Agent 助手 (已完成 ✅) +- ✅ 创建 llm_client.py - Kimi API 客户端 + - 支持流式/非流式聊天 + - 带置信度的实体提取 + - RAG 问答功能 + - Agent 指令解析 + - 实体演变分析 +- ✅ 更新 db_manager.py - 新增方法 + - `get_relation_with_details()` - 获取关系详情 + - `get_entity_with_mentions()` - 获取实体及提及 + - `search_entities()` - 搜索实体 + - `update_entity()` - 更新实体 + - `get_project_summary()` - 项目摘要 + - `get_transcript_context()` - 转录上下文 +- ✅ 更新 main.py - Agent API 端点 + - `POST /api/v1/projects/{id}/agent/query` - RAG 问答 + - `POST /api/v1/projects/{id}/agent/command` - 指令执行 + - `GET /api/v1/projects/{id}/agent/suggest` - 智能建议 + - `GET /api/v1/relations/{id}/provenance` - 关系溯源 + - `GET /api/v1/entities/{id}/details` - 实体详情 + - `GET /api/v1/entities/{id}/evolution` - 实体演变分析 + - `GET /api/v1/projects/{id}/entities/search` - 实体搜索 + - `PATCH /api/v1/entities/{id}` - 更新实体 +- ✅ 更新 workbench.html - Agent 面板 UI + - 可折叠的 Agent 助手面板 + - 聊天界面 + - 实体悬停卡片 + - 关系溯源弹窗 +- ✅ 更新 app.js - 前端功能 + - Agent 聊天功能 + - 指令执行(合并实体、编辑定义) + - RAG 问答 + - 实体卡片悬停显示 + - 关系点击溯源 + - 低置信度实体标黄 -#### 前端 (frontend/) -- ✅ 项目管理页面 (index.html) -- ✅ 知识工作台页面 (workbench.html) -- ✅ D3.js 知识图谱可视化 -- ✅ 音频上传 UI -- ✅ 实体列表展示 -- ✅ 转录文本中实体高亮显示 -- ✅ 图谱与文本联动(点击实体双向高亮) +### Phase 4 - 知识溯源 (已完成 ✅) +- ✅ 点击关系连线显示来源文档 +- ✅ 实体详情显示所有提及位置 +- ✅ 证据文本展示 -### Phase 2: 交互与纠错工作台 ✅ +### Phase 4 - 术语卡片悬停 (已完成 ✅) +- ✅ 鼠标悬停实体显示卡片 +- ✅ 卡片包含:名称、定义、提及次数、关系数 -#### 后端 API 新增 -- ✅ 实体编辑 API (PUT /api/v1/entities/{id}) -- ✅ 实体删除 API (DELETE /api/v1/entities/{id}) -- ✅ 实体合并 API (POST /api/v1/entities/{id}/merge) -- ✅ 手动创建实体 API (POST /api/v1/projects/{id}/entities) -- ✅ 关系创建 API (POST /api/v1/projects/{id}/relations) -- ✅ 关系删除 API (DELETE /api/v1/relations/{id}) -- ✅ 转录编辑 API (PUT /api/v1/transcripts/{id}) +### Phase 4 - 置信度提示 (已完成 ✅) +- ✅ LLM 提取返回置信度分数 +- ✅ 低置信度实体在文本中标黄 -#### 前端交互功能 -- ✅ 实体编辑器模态框(名称、类型、定义、别名) -- ✅ 右键菜单(编辑实体、合并实体、标记为实体) -- ✅ 实体合并功能 -- ✅ 关系管理(添加、删除) -- ✅ 转录文本编辑模式 -- ✅ 划词创建实体 -- ✅ 文本与图谱双向联动 +## 待完成 -#### 数据库更新 -- ✅ update_entity() - 更新实体信息 -- ✅ delete_entity() - 删除实体及关联数据 -- ✅ delete_relation() - 删除关系 -- ✅ update_relation() - 更新关系 -- ✅ update_transcript() - 更新转录文本 - -### Phase 3: 记忆与生长 ✅ - -#### 多文件图谱融合 -- ✅ 支持上传多个音频文件到同一项目 -- ✅ 系统自动对齐实体,合并图谱 -- ✅ 实体提及跨文件追踪 -- ✅ 文件选择器切换不同转录内容 -- ✅ 转录列表 API 返回文件类型 - -#### 实体对齐算法优化 -- ✅ 新增 `entity_aligner.py` 模块 -- ✅ 使用 Kimi API embedding 进行语义相似度匹配 -- ✅ 余弦相似度计算 -- ✅ 自动别名建议 -- ✅ 批量实体对齐 API -- ✅ 实体对齐回退机制(字符串匹配) - -#### PDF/DOCX 文档导入 -- ✅ 新增 `document_processor.py` 模块 -- ✅ 支持 PDF、DOCX、TXT、MD 格式 -- ✅ 文档文本提取并参与实体提取 -- ✅ 文档上传 API (/api/v1/projects/{id}/upload-document) -- ✅ 文档类型标记(audio/document) - -#### 项目知识库面板 -- ✅ 全新的知识库视图 -- ✅ 侧边栏导航切换(工作台/知识库) -- ✅ 统计面板:实体数、关系数、文件数、术语数 -- ✅ 实体网格展示(带提及统计) -- ✅ 关系列表展示 -- ✅ 术语表管理(添加/删除) -- ✅ 文件列表展示(区分音频/文档) - -#### 术语表功能 -- ✅ 术语表数据库表 (glossary) -- ✅ 添加术语 API -- ✅ 获取术语列表 API -- ✅ 删除术语 API -- ✅ 前端术语表管理界面 - -#### 数据库更新 -- ✅ transcripts 表新增 `type` 字段 -- ✅ entities 表新增 `embedding` 字段 -- ✅ 新增 glossary 表 -- ✅ 新增索引优化查询性能 +### Phase 4 - Neo4j 集成 (可选) +- [ ] 将图谱数据同步到 Neo4j +- [ ] 支持复杂图查询 ## 技术债务 - 听悟 SDK fallback 到 mock 需要更好的错误处理 +- 实体相似度匹配目前只是简单字符串包含,需要 embedding 方案 - 前端需要状态管理(目前使用全局变量) - 需要添加 API 文档 (OpenAPI/Swagger) -- Embedding 缓存需要持久化 -- 实体对齐算法需要更多测试 ## 部署信息 - 服务器: 122.51.127.111 - 项目路径: /opt/projects/insightflow - 端口: 18000 -- Docker 镜像: insightflow:phase3 -- 最后部署: 2026-02-19 06:05 AM -## 下一步 (Phase 4) +## 最近更新 -- 知识推理与问答 -- 实体属性扩展 -- 时间线视图 -- 导出功能(PDF/图片) +### 2026-02-19 +- 完成 Phase 4 Agent 助手功能 +- 实现知识溯源功能 +- 添加术语卡片悬停 +- 实现置信度提示 +- 更新前端 UI 和交互 diff --git a/backend/__pycache__/db_manager.cpython-312.pyc b/backend/__pycache__/db_manager.cpython-312.pyc index e96cfb0..016ecc9 100644 Binary files a/backend/__pycache__/db_manager.cpython-312.pyc and b/backend/__pycache__/db_manager.cpython-312.pyc differ diff --git a/backend/__pycache__/llm_client.cpython-312.pyc b/backend/__pycache__/llm_client.cpython-312.pyc new file mode 100644 index 0000000..e5ae720 Binary files /dev/null and b/backend/__pycache__/llm_client.cpython-312.pyc differ diff --git a/backend/__pycache__/main.cpython-312.pyc b/backend/__pycache__/main.cpython-312.pyc index 2b292e8..7a696a7 100644 Binary files a/backend/__pycache__/main.cpython-312.pyc and b/backend/__pycache__/main.cpython-312.pyc differ diff --git a/backend/db_manager.py b/backend/db_manager.py index 7aba584..553b12b 100644 --- a/backend/db_manager.py +++ b/backend/db_manager.py @@ -453,6 +453,168 @@ class DatabaseManager: def get_all_entities_for_embedding(self, project_id: str) -> List[Entity]: """获取所有实体用于 embedding 计算""" return self.list_project_entities(project_id) + + # Phase 4: Agent & Provenance methods + def get_relation_with_details(self, relation_id: str) -> Optional[dict]: + """获取关系详情,包含源文档信息""" + conn = self.get_conn() + row = conn.execute( + """SELECT r.*, + s.name as source_name, t.name as target_name, + tr.filename as transcript_filename, tr.full_text as transcript_text + FROM entity_relations r + JOIN entities s ON r.source_entity_id = s.id + JOIN entities t ON r.target_entity_id = t.id + LEFT JOIN transcripts tr ON r.transcript_id = tr.id + WHERE r.id = ?""", + (relation_id,) + ).fetchone() + conn.close() + if row: + return dict(row) + return None + + def get_entity_with_mentions(self, entity_id: str) -> Optional[dict]: + """获取实体详情及所有提及位置""" + conn = self.get_conn() + + # 获取实体信息 + entity_row = conn.execute( + "SELECT * FROM entities WHERE id = ?", (entity_id,) + ).fetchone() + + if not entity_row: + conn.close() + return None + + entity = dict(entity_row) + entity['aliases'] = json.loads(entity['aliases']) if entity['aliases'] else [] + + # 获取提及位置 + mentions = conn.execute( + """SELECT m.*, t.filename, t.created_at as transcript_date + FROM entity_mentions m + JOIN transcripts t ON m.transcript_id = t.id + WHERE m.entity_id = ? + ORDER BY t.created_at, m.start_pos""", + (entity_id,) + ).fetchall() + + entity['mentions'] = [dict(m) for m in mentions] + entity['mention_count'] = len(mentions) + + # 获取相关关系 + relations = conn.execute( + """SELECT r.*, + s.name as source_name, t.name as target_name + FROM entity_relations r + JOIN entities s ON r.source_entity_id = s.id + JOIN entities t ON r.target_entity_id = t.id + WHERE r.source_entity_id = ? OR r.target_entity_id = ? + ORDER BY r.created_at DESC""", + (entity_id, entity_id) + ).fetchall() + + entity['relations'] = [dict(r) for r in relations] + + conn.close() + return entity + + def search_entities(self, project_id: str, query: str) -> List[Entity]: + """搜索实体""" + conn = self.get_conn() + rows = conn.execute( + """SELECT * FROM entities + WHERE project_id = ? AND + (name LIKE ? OR definition LIKE ? OR aliases LIKE ?) + ORDER BY name""", + (project_id, f'%{query}%', f'%{query}%', f'%{query}%') + ).fetchall() + conn.close() + + entities = [] + for row in rows: + data = dict(row) + data['aliases'] = json.loads(data['aliases']) if data['aliases'] else [] + entities.append(Entity(**data)) + return entities + + def get_project_summary(self, project_id: str) -> dict: + """获取项目摘要信息,用于 RAG 上下文""" + conn = self.get_conn() + + # 项目基本信息 + project = conn.execute( + "SELECT * FROM projects WHERE id = ?", (project_id,) + ).fetchone() + + # 统计信息 + entity_count = conn.execute( + "SELECT COUNT(*) as count FROM entities WHERE project_id = ?", + (project_id,) + ).fetchone()['count'] + + transcript_count = conn.execute( + "SELECT COUNT(*) as count FROM transcripts WHERE project_id = ?", + (project_id,) + ).fetchone()['count'] + + relation_count = conn.execute( + "SELECT COUNT(*) as count FROM entity_relations WHERE project_id = ?", + (project_id,) + ).fetchone()['count'] + + # 获取最近的转录文本片段 + recent_transcripts = conn.execute( + """SELECT filename, full_text, created_at + FROM transcripts + WHERE project_id = ? + ORDER BY created_at DESC + LIMIT 5""", + (project_id,) + ).fetchall() + + # 获取高频实体 + top_entities = conn.execute( + """SELECT e.name, e.type, e.definition, COUNT(m.id) as mention_count + FROM entities e + LEFT JOIN entity_mentions m ON e.id = m.entity_id + WHERE e.project_id = ? + GROUP BY e.id + ORDER BY mention_count DESC + LIMIT 10""", + (project_id,) + ).fetchall() + + conn.close() + + return { + 'project': dict(project) if project else {}, + 'statistics': { + 'entity_count': entity_count, + 'transcript_count': transcript_count, + 'relation_count': relation_count + }, + 'recent_transcripts': [dict(t) for t in recent_transcripts], + 'top_entities': [dict(e) for e in top_entities] + } + + def get_transcript_context(self, transcript_id: str, position: int, context_chars: int = 200) -> str: + """获取转录文本的上下文""" + conn = self.get_conn() + row = conn.execute( + "SELECT full_text FROM transcripts WHERE id = ?", + (transcript_id,) + ).fetchone() + conn.close() + + if not row: + return "" + + text = row['full_text'] + start = max(0, position - context_chars) + end = min(len(text), position + context_chars) + return text[start:end] # Singleton instance diff --git a/backend/llm_client.py b/backend/llm_client.py new file mode 100644 index 0000000..8bb3c3d --- /dev/null +++ b/backend/llm_client.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python3 +""" +InsightFlow LLM Client - Phase 4 +用于与 Kimi API 交互,支持 RAG 问答和 Agent 功能 +""" + +import os +import json +import httpx +from typing import List, Dict, Optional, AsyncGenerator +from dataclasses import dataclass + +KIMI_API_KEY = os.getenv("KIMI_API_KEY", "") +KIMI_BASE_URL = os.getenv("KIMI_BASE_URL", "https://api.kimi.com/coding") + + +@dataclass +class ChatMessage: + role: str + content: str + + +@dataclass +class EntityExtractionResult: + name: str + type: str + definition: str + confidence: float + + +@dataclass +class RelationExtractionResult: + source: str + target: str + type: str + confidence: float + + +class LLMClient: + """Kimi API 客户端""" + + def __init__(self, api_key: str = None, base_url: str = None): + self.api_key = api_key or KIMI_API_KEY + self.base_url = base_url or KIMI_BASE_URL + self.headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + async def chat(self, messages: List[ChatMessage], temperature: float = 0.3, stream: bool = False) -> str: + """发送聊天请求""" + if not self.api_key: + raise ValueError("KIMI_API_KEY not set") + + payload = { + "model": "k2p5", + "messages": [{"role": m.role, "content": m.content} for m in messages], + "temperature": temperature, + "stream": stream + } + + async with httpx.AsyncClient() as client: + response = await client.post( + f"{self.base_url}/v1/chat/completions", + headers=self.headers, + json=payload, + timeout=120.0 + ) + response.raise_for_status() + result = response.json() + return result["choices"][0]["message"]["content"] + + async def chat_stream(self, messages: List[ChatMessage], temperature: float = 0.3) -> AsyncGenerator[str, None]: + """流式聊天请求""" + if not self.api_key: + raise ValueError("KIMI_API_KEY not set") + + payload = { + "model": "k2p5", + "messages": [{"role": m.role, "content": m.content} for m in messages], + "temperature": temperature, + "stream": True + } + + async with httpx.AsyncClient() as client: + async with client.stream( + "POST", + f"{self.base_url}/v1/chat/completions", + headers=self.headers, + json=payload, + timeout=120.0 + ) as response: + response.raise_for_status() + async for line in response.aiter_lines(): + if line.startswith("data: "): + data = line[6:] + if data == "[DONE]": + break + try: + chunk = json.loads(data) + delta = chunk["choices"][0]["delta"] + if "content" in delta: + yield delta["content"] + except: + pass + + async def extract_entities_with_confidence(self, text: str) -> tuple[List[EntityExtractionResult], List[RelationExtractionResult]]: + """提取实体和关系,带置信度分数""" + prompt = f"""从以下会议文本中提取关键实体和它们之间的关系,以 JSON 格式返回: + +文本:{text[:3000]} + +要求: +1. entities: 每个实体包含 name(名称), type(类型: PROJECT/TECH/PERSON/ORG/OTHER), definition(一句话定义), confidence(置信度0-1) +2. relations: 每个关系包含 source(源实体名), target(目标实体名), type(关系类型: belongs_to/works_with/depends_on/mentions/related), confidence(置信度0-1) +3. 只返回 JSON 对象,格式: {{"entities": [...], "relations": [...]}} + +示例: +{{ + "entities": [ + {{"name": "Project Alpha", "type": "PROJECT", "definition": "核心项目", "confidence": 0.95}}, + {{"name": "K8s", "type": "TECH", "definition": "Kubernetes容器编排平台", "confidence": 0.88}} + ], + "relations": [ + {{"source": "Project Alpha", "target": "K8s", "type": "depends_on", "confidence": 0.82}} + ] +}}""" + + messages = [ChatMessage(role="user", content=prompt)] + content = await self.chat(messages, temperature=0.1) + + import re + json_match = re.search(r'\{{.*?\}}', content, re.DOTALL) + if not json_match: + return [], [] + + try: + data = json.loads(json_match.group()) + entities = [ + EntityExtractionResult( + name=e["name"], + type=e.get("type", "OTHER"), + definition=e.get("definition", ""), + confidence=e.get("confidence", 0.8) + ) + for e in data.get("entities", []) + ] + relations = [ + RelationExtractionResult( + source=r["source"], + target=r["target"], + type=r.get("type", "related"), + confidence=r.get("confidence", 0.8) + ) + for r in data.get("relations", []) + ] + return entities, relations + except Exception as e: + print(f"Parse extraction result failed: {e}") + return [], [] + + async def rag_query(self, query: str, context: str, project_context: Dict) -> str: + """RAG 问答 - 基于项目上下文回答问题""" + prompt = f"""你是一个专业的项目分析助手。基于以下项目信息回答问题: + +## 项目信息 +{json.dumps(project_context, ensure_ascii=False, indent=2)} + +## 相关上下文 +{context[:4000]} + +## 用户问题 +{query} + +请用中文回答,保持简洁专业。如果信息不足,请明确说明。""" + + messages = [ + ChatMessage(role="system", content="你是一个专业的项目分析助手,擅长从会议记录中提取洞察。"), + ChatMessage(role="user", content=prompt) + ] + + return await self.chat(messages, temperature=0.3) + + async def agent_command(self, command: str, project_context: Dict) -> Dict: + """Agent 指令解析 - 将自然语言指令转换为结构化操作""" + prompt = f"""解析以下用户指令,转换为结构化操作: + +## 项目信息 +{json.dumps(project_context, ensure_ascii=False, indent=2)} + +## 用户指令 +{command} + +请分析指令意图,返回 JSON 格式: +{{ + "intent": "merge_entities|answer_question|edit_entity|create_relation|unknown", + "params": {{ + // 根据 intent 不同,参数不同 + }}, + "explanation": "对用户指令的解释" +}} + +意图说明: +- merge_entities: 合并实体,params 包含 source_names(源实体名列表), target_name(目标实体名) +- answer_question: 回答问题,params 包含 question(问题内容) +- edit_entity: 编辑实体,params 包含 entity_name(实体名), field(字段), value(新值) +- create_relation: 创建关系,params 包含 source(源实体), target(目标实体), relation_type(关系类型) +""" + + messages = [ChatMessage(role="user", content=prompt)] + content = await self.chat(messages, temperature=0.1) + + import re + json_match = re.search(r'\{{.*?\}}', content, re.DOTALL) + if not json_match: + return {"intent": "unknown", "explanation": "无法解析指令"} + + try: + return json.loads(json_match.group()) + except: + return {"intent": "unknown", "explanation": "解析失败"} + + async def analyze_entity_evolution(self, entity_name: str, mentions: List[Dict]) -> str: + """分析实体在项目中的演变/态度变化""" + mentions_text = "\n".join([ + f"[{m.get('created_at', '未知时间')}] {m.get('text_snippet', '')}" + for m in mentions[:20] # 限制数量 + ]) + + prompt = f"""分析实体 "{entity_name}" 在项目中的演变和态度变化: + +## 提及记录 +{mentions_text} + +请分析: +1. 该实体的角色/重要性变化 +2. 相关方对它的态度变化 +3. 关键时间节点 +4. 总结性洞察 + +用中文回答,结构清晰。""" + + messages = [ChatMessage(role="user", content=prompt)] + return await self.chat(messages, temperature=0.3) + + +# Singleton instance +_llm_client = None + + +def get_llm_client() -> LLMClient: + global _llm_client + if _llm_client is None: + _llm_client = LLMClient() + return _llm_client diff --git a/backend/main.py b/backend/main.py index df86d6b..e4176dd 100644 --- a/backend/main.py +++ b/backend/main.py @@ -48,6 +48,12 @@ try: except ImportError: ALIGNER_AVAILABLE = False +try: + from llm_client import get_llm_client, ChatMessage + LLM_CLIENT_AVAILABLE = True +except ImportError: + LLM_CLIENT_AVAILABLE = False + app = FastAPI(title="InsightFlow", version="0.3.0") app.add_middleware( @@ -99,6 +105,13 @@ class RelationCreate(BaseModel): class TranscriptUpdate(BaseModel): full_text: str +class AgentQuery(BaseModel): + query: str + stream: bool = False + +class AgentCommand(BaseModel): + command: str + class EntityMergeRequest(BaseModel): source_entity_id: str target_entity_id: str @@ -963,13 +976,14 @@ async def get_entity_mentions(entity_id: str): async def health_check(): return { "status": "ok", - "version": "0.3.0", - "phase": "Phase 3 - Memory & Growth", + "version": "0.4.0", + "phase": "Phase 4 - Agent Assistant", "oss_available": OSS_AVAILABLE, "tingwu_available": TINGWU_AVAILABLE, "db_available": DB_AVAILABLE, "doc_processor_available": DOC_PROCESSOR_AVAILABLE, - "aligner_available": ALIGNER_AVAILABLE + "aligner_available": ALIGNER_AVAILABLE, + "llm_client_available": LLM_CLIENT_AVAILABLE } # Serve frontend @@ -978,3 +992,276 @@ app.mount("/", StaticFiles(directory="frontend", html=True), name="frontend") if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000) + + +# ==================== Phase 4: Agent 助手 API ==================== + +@app.post("/api/v1/projects/{project_id}/agent/query") +async def agent_query(project_id: str, query: AgentQuery): + """Agent RAG 问答""" + if not DB_AVAILABLE or not LLM_CLIENT_AVAILABLE: + raise HTTPException(status_code=500, detail="Service not available") + + db = get_db_manager() + llm = get_llm_client() + + project = db.get_project(project_id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + # 获取项目上下文 + project_context = db.get_project_summary(project_id) + + # 构建上下文 + context_parts = [] + for t in project_context.get('recent_transcripts', []): + context_parts.append(f"【{t['filename']}】\n{t['full_text'][:1000]}") + + context = "\n\n".join(context_parts) + + if query.stream: + from fastapi.responses import StreamingResponse + import json + + async def stream_response(): + messages = [ + ChatMessage(role="system", content="你是一个专业的项目分析助手,擅长从会议记录中提取洞察。"), + ChatMessage(role="user", content=f"""基于以下项目信息回答问题: + +## 项目信息 +{json.dumps(project_context, ensure_ascii=False, indent=2)} + +## 相关上下文 +{context[:4000]} + +## 用户问题 +{query.query} + +请用中文回答,保持简洁专业。如果信息不足,请明确说明。""") + ] + + async for chunk in llm.chat_stream(messages): + yield f"data: {json.dumps({'content': chunk})}\n\n" + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_response(), media_type="text/event-stream") + else: + answer = await llm.rag_query(query.query, context, project_context) + return {"answer": answer, "project_id": project_id} + + +@app.post("/api/v1/projects/{project_id}/agent/command") +async def agent_command(project_id: str, command: AgentCommand): + """Agent 指令执行 - 解析并执行自然语言指令""" + if not DB_AVAILABLE or not LLM_CLIENT_AVAILABLE: + raise HTTPException(status_code=500, detail="Service not available") + + db = get_db_manager() + llm = get_llm_client() + + project = db.get_project(project_id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + # 获取项目上下文 + project_context = db.get_project_summary(project_id) + + # 解析指令 + parsed = await llm.agent_command(command.command, project_context) + + intent = parsed.get("intent", "unknown") + params = parsed.get("params", {}) + + result = {"intent": intent, "explanation": parsed.get("explanation", "")} + + # 执行指令 + if intent == "merge_entities": + # 合并实体 + source_names = params.get("source_names", []) + target_name = params.get("target_name", "") + + target_entity = None + source_entities = [] + + # 查找目标实体 + for e in project_context.get("top_entities", []): + if e["name"] == target_name or target_name in e["name"]: + target_entity = db.get_entity_by_name(project_id, e["name"]) + break + + # 查找源实体 + for name in source_names: + for e in project_context.get("top_entities", []): + if e["name"] == name or name in e["name"]: + ent = db.get_entity_by_name(project_id, e["name"]) + if ent and (not target_entity or ent.id != target_entity.id): + source_entities.append(ent) + break + + merged = [] + if target_entity: + for source in source_entities: + try: + db.merge_entities(target_entity.id, source.id) + merged.append(source.name) + except Exception as e: + print(f"Merge failed: {e}") + + result["action"] = "merge_entities" + result["target"] = target_entity.name if target_entity else None + result["merged"] = merged + result["success"] = len(merged) > 0 + + elif intent == "answer_question": + # 问答 - 调用 RAG + answer = await llm.rag_query(params.get("question", command.command), "", project_context) + result["action"] = "answer" + result["answer"] = answer + + elif intent == "edit_entity": + # 编辑实体 + entity_name = params.get("entity_name", "") + field = params.get("field", "") + value = params.get("value", "") + + entity = db.get_entity_by_name(project_id, entity_name) + if entity: + updated = db.update_entity(entity.id, **{field: value}) + result["action"] = "edit_entity" + result["entity"] = {"id": updated.id, "name": updated.name} if updated else None + result["success"] = updated is not None + else: + result["success"] = False + result["error"] = "Entity not found" + + else: + result["action"] = "none" + result["message"] = "无法理解的指令,请尝试:\n- 合并实体:把所有'客户端'合并到'App'\n- 提问:张总对项目的态度如何?\n- 编辑:修改'K8s'的定义为..." + + return result + + +@app.get("/api/v1/projects/{project_id}/agent/suggest") +async def agent_suggest(project_id: str): + """获取 Agent 建议 - 基于项目数据提供洞察""" + if not DB_AVAILABLE or not LLM_CLIENT_AVAILABLE: + raise HTTPException(status_code=500, detail="Service not available") + + db = get_db_manager() + llm = get_llm_client() + + project_context = db.get_project_summary(project_id) + + # 生成建议 + prompt = f"""基于以下项目数据,提供3-5条分析建议: + +{json.dumps(project_context, ensure_ascii=False, indent=2)} + +请提供: +1. 数据洞察发现 +2. 建议的操作(如合并相似实体、补充定义等) +3. 值得关注的关键信息 + +返回 JSON 格式:{{"suggestions": [{{"type": "insight|action", "title": "...", "description": "..."}}]}}""" + + messages = [ChatMessage(role="user", content=prompt)] + content = await llm.chat(messages, temperature=0.3) + + import re + json_match = re.search(r'\{{.*?\}}', content, re.DOTALL) + if json_match: + try: + data = json.loads(json_match.group()) + return data + except: + pass + + return {"suggestions": []} + + +# ==================== Phase 4: 知识溯源 API ==================== + +@app.get("/api/v1/relations/{relation_id}/provenance") +async def get_relation_provenance(relation_id: str): + """获取关系的知识溯源信息""" + if not DB_AVAILABLE: + raise HTTPException(status_code=500, detail="Database not available") + + db = get_db_manager() + relation = db.get_relation_with_details(relation_id) + + if not relation: + raise HTTPException(status_code=404, detail="Relation not found") + + return { + "relation_id": relation_id, + "source": relation.get("source_name"), + "target": relation.get("target_name"), + "type": relation.get("relation_type"), + "evidence": relation.get("evidence"), + "transcript": { + "id": relation.get("transcript_id"), + "filename": relation.get("transcript_filename"), + } if relation.get("transcript_id") else None + } + + +@app.get("/api/v1/entities/{entity_id}/details") +async def get_entity_details(entity_id: str): + """获取实体详情,包含所有提及位置""" + if not DB_AVAILABLE: + raise HTTPException(status_code=500, detail="Database not available") + + db = get_db_manager() + entity = db.get_entity_with_mentions(entity_id) + + if not entity: + raise HTTPException(status_code=404, detail="Entity not found") + + return entity + + +@app.get("/api/v1/entities/{entity_id}/evolution") +async def get_entity_evolution(entity_id: str): + """分析实体的演变和态度变化""" + if not DB_AVAILABLE or not LLM_CLIENT_AVAILABLE: + raise HTTPException(status_code=500, detail="Service not available") + + db = get_db_manager() + llm = get_llm_client() + + entity = db.get_entity_with_mentions(entity_id) + if not entity: + raise HTTPException(status_code=404, detail="Entity not found") + + # 分析演变 + analysis = await llm.analyze_entity_evolution(entity["name"], entity.get("mentions", [])) + + return { + "entity_id": entity_id, + "entity_name": entity["name"], + "mention_count": entity.get("mention_count", 0), + "analysis": analysis, + "timeline": [ + { + "date": m.get("transcript_date"), + "snippet": m.get("text_snippet"), + "transcript_id": m.get("transcript_id"), + "filename": m.get("filename") + } + for m in entity.get("mentions", []) + ] + } + + +# ==================== Phase 4: 实体管理增强 API ==================== + +@app.get("/api/v1/projects/{project_id}/entities/search") +async def search_entities(project_id: str, q: str): + """搜索实体""" + if not DB_AVAILABLE: + raise HTTPException(status_code=500, detail="Database not available") + + db = get_db_manager() + entities = db.search_entities(project_id, q) + return [{"id": e.id, "name": e.name, "type": e.type, "definition": e.definition} for e in entities] diff --git a/frontend/app.js b/frontend/app.js index 330a32e..7426070 100644 --- a/frontend/app.js +++ b/frontend/app.js @@ -1,5 +1,4 @@ -// InsightFlow Frontend - Phase 3 (Memory & Growth) -// Knowledge Growth: Multi-file fusion + Entity Alignment + Document Import +// InsightFlow Frontend - Phase 4 (Agent Assistant + Provenance) const API_BASE = '/api/v1'; let currentProject = null; @@ -7,12 +6,7 @@ let currentData = null; let selectedEntity = null; let projectRelations = []; let projectEntities = []; -let currentTranscript = null; -let projectTranscripts = []; -let editMode = false; -let contextMenuTarget = null; -let currentUploadTab = 'audio'; -let knowledgeBaseData = null; +let entityDetailsCache = {}; // Init document.addEventListener('DOMContentLoaded', () => { @@ -44,8 +38,8 @@ async function initWorkbench() { if (nameEl) nameEl.textContent = currentProject.name; initUpload(); - initContextMenu(); - initTextSelection(); + initAgentPanel(); + initEntityCard(); await loadProjectData(); } catch (err) { @@ -54,7 +48,8 @@ async function initWorkbench() { } } -// API Calls +// ==================== API Calls ==================== + async function fetchProjects() { const res = await fetch(`${API_BASE}/projects`); if (!res.ok) throw new Error('Failed to fetch projects'); @@ -74,131 +69,11 @@ async function uploadAudio(file) { return await res.json(); } -// Phase 3: Document Upload API -async function uploadDocument(file) { - const formData = new FormData(); - formData.append('file', file); - - const res = await fetch(`${API_BASE}/projects/${currentProject.id}/upload-document`, { - method: 'POST', - body: formData - }); - - if (!res.ok) { - const error = await res.json(); - throw new Error(error.detail || 'Document upload failed'); - } - return await res.json(); -} - -// Phase 3: Knowledge Base API -async function fetchKnowledgeBase() { - const res = await fetch(`${API_BASE}/projects/${currentProject.id}/knowledge-base`); - if (!res.ok) throw new Error('Failed to fetch knowledge base'); - return await res.json(); -} - -// Phase 3: Glossary API -async function addGlossaryTerm(term, pronunciation = '') { - const res = await fetch(`${API_BASE}/projects/${currentProject.id}/glossary`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ term, pronunciation }) - }); - if (!res.ok) throw new Error('Failed to add glossary term'); - return await res.json(); -} - -async function deleteGlossaryTerm(termId) { - const res = await fetch(`${API_BASE}/glossary/${termId}`, { - method: 'DELETE' - }); - if (!res.ok) throw new Error('Failed to delete glossary term'); - return await res.json(); -} - -// Phase 2: Entity Edit API -async function updateEntity(entityId, data) { - const res = await fetch(`${API_BASE}/entities/${entityId}`, { - method: 'PUT', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(data) - }); - if (!res.ok) throw new Error('Failed to update entity'); - return await res.json(); -} - -async function deleteEntityApi(entityId) { - const res = await fetch(`${API_BASE}/entities/${entityId}`, { - method: 'DELETE' - }); - if (!res.ok) throw new Error('Failed to delete entity'); - return await res.json(); -} - -async function mergeEntitiesApi(sourceId, targetId) { - const res = await fetch(`${API_BASE}/entities/${sourceId}/merge`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ source_entity_id: sourceId, target_entity_id: targetId }) - }); - if (!res.ok) throw new Error('Failed to merge entities'); - return await res.json(); -} - -async function createEntityApi(data) { - const res = await fetch(`${API_BASE}/projects/${currentProject.id}/entities`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(data) - }); - if (!res.ok) throw new Error('Failed to create entity'); - return await res.json(); -} - -// Phase 2: Relation API -async function createRelationApi(data) { - const res = await fetch(`${API_BASE}/projects/${currentProject.id}/relations`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(data) - }); - if (!res.ok) throw new Error('Failed to create relation'); - return await res.json(); -} - -async function deleteRelationApi(relationId) { - const res = await fetch(`${API_BASE}/relations/${relationId}`, { - method: 'DELETE' - }); - if (!res.ok) throw new Error('Failed to delete relation'); - return await res.json(); -} - -// Phase 2: Transcript API -async function getTranscript(transcriptId) { - const res = await fetch(`${API_BASE}/transcripts/${transcriptId}`); - if (!res.ok) throw new Error('Failed to get transcript'); - return await res.json(); -} - -async function updateTranscript(transcriptId, fullText) { - const res = await fetch(`${API_BASE}/transcripts/${transcriptId}`, { - method: 'PUT', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ full_text: fullText }) - }); - if (!res.ok) throw new Error('Failed to update transcript'); - return await res.json(); -} - async function loadProjectData() { try { - // 并行加载实体、关系和转录列表 - const [entitiesRes, relationsRes, transcriptsRes] = await Promise.all([ + const [entitiesRes, relationsRes] = await Promise.all([ fetch(`${API_BASE}/projects/${currentProject.id}/entities`), - fetch(`${API_BASE}/projects/${currentProject.id}/relations`), - fetch(`${API_BASE}/projects/${currentProject.id}/transcripts`) + fetch(`${API_BASE}/projects/${currentProject.id}/relations`) ]); if (entitiesRes.ok) { @@ -207,250 +82,247 @@ async function loadProjectData() { if (relationsRes.ok) { projectRelations = await relationsRes.json(); } - if (transcriptsRes.ok) { - projectTranscripts = await transcriptsRes.json(); - } - // 加载最新的转录 - if (projectTranscripts.length > 0) { - currentTranscript = await getTranscript(projectTranscripts[0].id); - currentData = { - transcript_id: currentTranscript.id, - project_id: currentProject.id, - segments: [{ speaker: '全文', text: currentTranscript.full_text }], - entities: projectEntities, - full_text: currentTranscript.full_text, - created_at: currentTranscript.created_at - }; - renderTranscript(); - } + // 预加载实体详情 + await preloadEntityDetails(); + + currentData = { + transcript_id: 'project_view', + project_id: currentProject.id, + segments: [], + entities: projectEntities, + full_text: '', + created_at: new Date().toISOString() + }; renderGraph(); renderEntityList(); - renderTranscriptDropdown(); } catch (err) { console.error('Load project data failed:', err); } } -// Phase 3: View Switching -window.switchView = function(viewName) { - // Update sidebar buttons - document.querySelectorAll('.sidebar-btn').forEach(btn => { - btn.classList.remove('active'); +async function preloadEntityDetails() { + // 并行加载所有实体详情 + const promises = projectEntities.map(async (ent) => { + try { + const res = await fetch(`${API_BASE}/entities/${ent.id}/details`); + if (res.ok) { + entityDetailsCache[ent.id] = await res.json(); + } + } catch (e) { + console.error(`Failed to load entity ${ent.id} details:`, e); + } }); - event.target.classList.add('active'); - - if (viewName === 'workbench') { - document.getElementById('workbenchView').style.display = 'flex'; - document.getElementById('knowledgeBaseView').classList.remove('show'); - } else if (viewName === 'knowledge-base') { - document.getElementById('workbenchView').style.display = 'none'; - document.getElementById('knowledgeBaseView').classList.add('show'); - loadKnowledgeBase(); - } -}; + await Promise.all(promises); +} -// Phase 3: Load Knowledge Base -async function loadKnowledgeBase() { - try { - knowledgeBaseData = await fetchKnowledgeBase(); - renderKnowledgeBase(); - } catch (err) { - console.error('Load knowledge base failed:', err); +// ==================== Agent Panel ==================== + +function initAgentPanel() { + const chatInput = document.getElementById('chatInput'); + if (chatInput) { + chatInput.addEventListener('keypress', (e) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault(); + sendAgentMessage(); + } + }); } } -// Phase 3: Render Knowledge Base -function renderKnowledgeBase() { - if (!knowledgeBaseData) return; - - // Update stats - document.getElementById('kbEntityCount').textContent = knowledgeBaseData.stats.entity_count; - document.getElementById('kbRelationCount').textContent = knowledgeBaseData.stats.relation_count; - document.getElementById('kbTranscriptCount').textContent = knowledgeBaseData.stats.transcript_count; - document.getElementById('kbGlossaryCount').textContent = knowledgeBaseData.stats.glossary_count; - - // Render entities - const entityGrid = document.getElementById('kbEntityGrid'); - entityGrid.innerHTML = knowledgeBaseData.entities.map(e => ` -
加载中...
'; + + try { + let content = ''; + + if (relation.id) { + // 从API获取溯源信息 + const res = await fetch(`${API_BASE}/relations/${relation.id}/provenance`); + if (res.ok) { + const data = await res.json(); + content = ` +获取溯源信息失败
'; + } + } else { + // 使用本地数据 + content = ` +加载失败
'; + } +} + +function closeProvenance() { + document.getElementById('provenanceModal').classList.remove('show'); +} + +// ==================== Entity List ==================== + function renderEntityList() { const container = document.getElementById('entityList'); if (!container) return; @@ -646,7 +631,7 @@ function renderEntityList() { container.innerHTML = '暂无实体,请上传音频或文档文件
'; + container.innerHTML += '暂无实体,请上传音频文件
'; return; } @@ -655,13 +640,11 @@ function renderEntityList() { div.className = 'entity-item'; div.dataset.id = ent.id; div.onclick = () => window.selectEntity(ent.id); - div.oncontextmenu = (e) => { - e.preventDefault(); - showContextMenu(e, ent.id); - }; + div.onmouseenter = (e) => showEntityCard(e, ent.id); + div.onmouseleave = hideEntityCard; div.innerHTML = ` - ${ent.type} + ${ent.type}暂无关系
'; - return; - } - - container.innerHTML = entityRelations.map(r => { - const isSource = r.source_id === entityId; - const otherId = isSource ? r.target_id : r.source_id; - const other = projectEntities.find(e => e.id === otherId); - const otherName = other ? other.name : 'Unknown'; - const arrow = isSource ? '→' : '←'; - - return ` -${file.name}
-${type === 'audio' ? 'ASR转录 + 实体提取中' : '文档解析 + 实体提取中'}
-${file.name}
+ASR转录 + 实体提取中
+支持 MP3, WAV, M4A (最大 500MB)
- - - -${err.message}
- -${err.message}
+ +加载中...
+