diff --git a/STATUS.md b/STATUS.md index 709bc53..7d92a31 100644 --- a/STATUS.md +++ b/STATUS.md @@ -4,11 +4,13 @@ ## 当前阶段 -Phase 1: 骨架与单体分析 (MVP) - **已完成 ✅** +Phase 2: 交互与纠错工作台 - **已完成 ✅** ## 已完成 -### 后端 (backend/) +### Phase 1: 骨架与单体分析 (MVP) ✅ + +#### 后端 (backend/) - ✅ FastAPI 项目框架搭建 - ✅ SQLite 数据库设计 (schema.sql) - ✅ 数据库管理模块 (db_manager.py) @@ -26,7 +28,7 @@ Phase 1: 骨架与单体分析 (MVP) - **已完成 ✅** - ✅ entity_mentions 表数据写入 - ✅ entity_relations 表数据写入 -### 前端 (frontend/) +#### 前端 (frontend/) - ✅ 项目管理页面 (index.html) - ✅ 知识工作台页面 (workbench.html) - ✅ D3.js 知识图谱可视化 @@ -35,20 +37,34 @@ Phase 1: 骨架与单体分析 (MVP) - **已完成 ✅** - ✅ 转录文本中实体高亮显示 - ✅ 图谱与文本联动(点击实体双向高亮) -### 基础设施 -- ✅ Dockerfile -- ✅ docker-compose.yml -- ✅ Git 仓库初始化 +### Phase 2: 交互与纠错工作台 ✅ -## Phase 2 计划 (交互与纠错工作台) - **即将开始** +#### 后端 API 新增 +- ✅ 实体编辑 API (PUT /api/v1/entities/{id}) +- ✅ 实体删除 API (DELETE /api/v1/entities/{id}) +- ✅ 实体合并 API (POST /api/v1/entities/{id}/merge) +- ✅ 手动创建实体 API (POST /api/v1/projects/{id}/entities) +- ✅ 关系创建 API (POST /api/v1/projects/{id}/relations) +- ✅ 关系删除 API (DELETE /api/v1/relations/{id}) +- ✅ 转录编辑 API (PUT /api/v1/transcripts/{id}) -- 实体定义编辑功能 -- 实体合并功能 -- 关系编辑功能(添加/删除) -- 人工修正数据保存 -- 文本编辑器增强(支持编辑转录文本) +#### 前端交互功能 +- ✅ 实体编辑器模态框(名称、类型、定义、别名) +- ✅ 右键菜单(编辑实体、合并实体、标记为实体) +- ✅ 实体合并功能 +- ✅ 关系管理(添加、删除) +- ✅ 转录文本编辑模式 +- ✅ 划词创建实体 +- ✅ 文本与图谱双向联动 -## Phase 3 计划 (记忆与生长) +#### 数据库更新 +- ✅ update_entity() - 更新实体信息 +- ✅ delete_entity() - 删除实体及关联数据 +- ✅ delete_relation() - 删除关系 +- ✅ update_relation() - 更新关系 +- ✅ update_transcript() - 更新转录文本 + +## Phase 3 计划 (记忆与生长) - **即将开始** - 多文件图谱融合 - 实体对齐算法优化 diff --git a/backend/db_manager.py b/backend/db_manager.py index 519a8c7..0ef5322 100644 --- a/backend/db_manager.py +++ b/backend/db_manager.py @@ -290,6 +290,104 @@ class DatabaseManager: ).fetchall() conn.close() return [dict(r) for r in rows] + + def update_entity(self, entity_id: str, **kwargs) -> Entity: + """更新实体信息""" + conn = self.get_conn() + + # 构建更新字段 + allowed_fields = ['name', 'type', 'definition', 'canonical_name'] + updates = [] + values = [] + + for field in allowed_fields: + if field in kwargs: + updates.append(f"{field} = ?") + values.append(kwargs[field]) + + # 处理别名 + if 'aliases' in kwargs: + updates.append("aliases = ?") + values.append(json.dumps(kwargs['aliases'])) + + if not updates: + conn.close() + return self.get_entity(entity_id) + + updates.append("updated_at = ?") + values.append(datetime.now().isoformat()) + values.append(entity_id) + + query = f"UPDATE entities SET {', '.join(updates)} WHERE id = ?" + conn.execute(query, values) + conn.commit() + conn.close() + + return self.get_entity(entity_id) + + def delete_entity(self, entity_id: str): + """删除实体及其关联数据""" + conn = self.get_conn() + + # 删除提及记录 + conn.execute("DELETE FROM entity_mentions WHERE entity_id = ?", (entity_id,)) + + # 删除关系 + conn.execute("DELETE FROM entity_relations WHERE source_entity_id = ? OR target_entity_id = ?", + (entity_id, entity_id)) + + # 删除实体 + conn.execute("DELETE FROM entities WHERE id = ?", (entity_id,)) + + conn.commit() + conn.close() + + def delete_relation(self, relation_id: str): + """删除关系""" + conn = self.get_conn() + conn.execute("DELETE FROM entity_relations WHERE id = ?", (relation_id,)) + conn.commit() + conn.close() + + def update_relation(self, relation_id: str, **kwargs) -> dict: + """更新关系""" + conn = self.get_conn() + + allowed_fields = ['relation_type', 'evidence'] + updates = [] + values = [] + + for field in allowed_fields: + if field in kwargs: + updates.append(f"{field} = ?") + values.append(kwargs[field]) + + if updates: + query = f"UPDATE entity_relations SET {', '.join(updates)} WHERE id = ?" + values.append(relation_id) + conn.execute(query, values) + conn.commit() + + row = conn.execute("SELECT * FROM entity_relations WHERE id = ?", (relation_id,)).fetchone() + conn.close() + + return dict(row) if row else None + + def update_transcript(self, transcript_id: str, full_text: str) -> dict: + """更新转录文本""" + conn = self.get_conn() + now = datetime.now().isoformat() + + conn.execute( + "UPDATE transcripts SET full_text = ?, updated_at = ? WHERE id = ?", + (full_text, now, transcript_id) + ) + conn.commit() + + row = conn.execute("SELECT * FROM transcripts WHERE id = ?", (transcript_id,)).fetchone() + conn.close() + + return dict(row) if row else None # Singleton instance diff --git a/backend/main.py b/backend/main.py index 470c305..d878b3b 100644 --- a/backend/main.py +++ b/backend/main.py @@ -71,10 +71,251 @@ class ProjectCreate(BaseModel): name: str description: str = "" +class EntityUpdate(BaseModel): + name: Optional[str] = None + type: Optional[str] = None + definition: Optional[str] = None + aliases: Optional[List[str]] = None + +class RelationCreate(BaseModel): + source_entity_id: str + target_entity_id: str + relation_type: str + evidence: Optional[str] = "" + +class TranscriptUpdate(BaseModel): + full_text: str + +class EntityMergeRequest(BaseModel): + source_entity_id: str + target_entity_id: str + # API Keys KIMI_API_KEY = os.getenv("KIMI_API_KEY", "") KIMI_BASE_URL = "https://api.kimi.com/coding" +# Phase 2: Entity Edit API +@app.put("/api/v1/entities/{entity_id}") +async def update_entity(entity_id: str, update: EntityUpdate): + """更新实体信息(名称、类型、定义、别名)""" + if not DB_AVAILABLE: + raise HTTPException(status_code=500, detail="Database not available") + + db = get_db_manager() + entity = db.get_entity(entity_id) + if not entity: + raise HTTPException(status_code=404, detail="Entity not found") + + # 更新字段 + update_data = {k: v for k, v in update.dict().items() if v is not None} + updated = db.update_entity(entity_id, **update_data) + + return { + "id": updated.id, + "name": updated.name, + "type": updated.type, + "definition": updated.definition, + "aliases": updated.aliases + } + +@app.delete("/api/v1/entities/{entity_id}") +async def delete_entity(entity_id: str): + """删除实体""" + if not DB_AVAILABLE: + raise HTTPException(status_code=500, detail="Database not available") + + db = get_db_manager() + entity = db.get_entity(entity_id) + if not entity: + raise HTTPException(status_code=404, detail="Entity not found") + + db.delete_entity(entity_id) + return {"success": True, "message": f"Entity {entity_id} deleted"} + +@app.post("/api/v1/entities/{entity_id}/merge") +async def merge_entities_endpoint(entity_id: str, merge_req: EntityMergeRequest): + """合并两个实体""" + if not DB_AVAILABLE: + raise HTTPException(status_code=500, detail="Database not available") + + db = get_db_manager() + + # 验证两个实体都存在 + source = db.get_entity(merge_req.source_entity_id) + target = db.get_entity(merge_req.target_entity_id) + + if not source or not target: + raise HTTPException(status_code=404, detail="Entity not found") + + result = db.merge_entities(merge_req.target_entity_id, merge_req.source_entity_id) + return { + "success": True, + "merged_entity": { + "id": result.id, + "name": result.name, + "type": result.type, + "definition": result.definition, + "aliases": result.aliases + } + } + +# Phase 2: Relation Edit API +@app.post("/api/v1/projects/{project_id}/relations") +async def create_relation_endpoint(project_id: str, relation: RelationCreate): + """创建新的实体关系""" + if not DB_AVAILABLE: + raise HTTPException(status_code=500, detail="Database not available") + + db = get_db_manager() + + # 验证实体存在 + source = db.get_entity(relation.source_entity_id) + target = db.get_entity(relation.target_entity_id) + + if not source or not target: + raise HTTPException(status_code=404, detail="Source or target entity not found") + + relation_id = db.create_relation( + project_id=project_id, + source_entity_id=relation.source_entity_id, + target_entity_id=relation.target_entity_id, + relation_type=relation.relation_type, + evidence=relation.evidence + ) + + return { + "id": relation_id, + "source_id": relation.source_entity_id, + "target_id": relation.target_entity_id, + "type": relation.relation_type, + "success": True + } + +@app.delete("/api/v1/relations/{relation_id}") +async def delete_relation(relation_id: str): + """删除关系""" + if not DB_AVAILABLE: + raise HTTPException(status_code=500, detail="Database not available") + + db = get_db_manager() + db.delete_relation(relation_id) + return {"success": True, "message": f"Relation {relation_id} deleted"} + +@app.put("/api/v1/relations/{relation_id}") +async def update_relation(relation_id: str, relation: RelationCreate): + """更新关系""" + if not DB_AVAILABLE: + raise HTTPException(status_code=500, detail="Database not available") + + db = get_db_manager() + updated = db.update_relation( + relation_id=relation_id, + relation_type=relation.relation_type, + evidence=relation.evidence + ) + + return { + "id": relation_id, + "type": updated["relation_type"], + "evidence": updated["evidence"], + "success": True + } + +# Phase 2: Transcript Edit API +@app.get("/api/v1/transcripts/{transcript_id}") +async def get_transcript(transcript_id: str): + """获取转录详情""" + if not DB_AVAILABLE: + raise HTTPException(status_code=500, detail="Database not available") + + db = get_db_manager() + transcript = db.get_transcript(transcript_id) + + if not transcript: + raise HTTPException(status_code=404, detail="Transcript not found") + + return transcript + +@app.put("/api/v1/transcripts/{transcript_id}") +async def update_transcript(transcript_id: str, update: TranscriptUpdate): + """更新转录文本(人工修正)""" + if not DB_AVAILABLE: + raise HTTPException(status_code=500, detail="Database not available") + + db = get_db_manager() + transcript = db.get_transcript(transcript_id) + + if not transcript: + raise HTTPException(status_code=404, detail="Transcript not found") + + updated = db.update_transcript(transcript_id, update.full_text) + return { + "id": transcript_id, + "full_text": updated["full_text"], + "updated_at": updated["updated_at"], + "success": True + } + +# Phase 2: Manual Entity Creation +class ManualEntityCreate(BaseModel): + name: str + type: str = "OTHER" + definition: str = "" + transcript_id: Optional[str] = None + start_pos: Optional[int] = None + end_pos: Optional[int] = None + +@app.post("/api/v1/projects/{project_id}/entities") +async def create_manual_entity(project_id: str, entity: ManualEntityCreate): + """手动创建实体(划词新建)""" + if not DB_AVAILABLE: + raise HTTPException(status_code=500, detail="Database not available") + + db = get_db_manager() + + # 检查是否已存在 + existing = db.get_entity_by_name(project_id, entity.name) + if existing: + return { + "id": existing.id, + "name": existing.name, + "type": existing.type, + "existed": True + } + + entity_id = str(uuid.uuid4())[:8] + new_entity = db.create_entity(Entity( + id=entity_id, + project_id=project_id, + name=entity.name, + type=entity.type, + definition=entity.definition + )) + + # 如果有提及位置信息,保存提及 + if entity.transcript_id and entity.start_pos is not None and entity.end_pos is not None: + transcript = db.get_transcript(entity.transcript_id) + if transcript: + text = transcript["full_text"] + mention = EntityMention( + id=str(uuid.uuid4())[:8], + entity_id=entity_id, + transcript_id=entity.transcript_id, + start_pos=entity.start_pos, + end_pos=entity.end_pos, + text_snippet=text[max(0, entity.start_pos-20):min(len(text), entity.end_pos+20)], + confidence=1.0 + ) + db.add_mention(mention) + + return { + "id": new_entity.id, + "name": new_entity.name, + "type": new_entity.type, + "definition": new_entity.definition, + "success": True + } + def transcribe_audio(audio_data: bytes, filename: str) -> dict: """转录音频:OSS上传 + 听悟转录""" @@ -379,14 +620,31 @@ async def get_entity_mentions(entity_id: str): } for m in mentions] @app.post("/api/v1/entities/{entity_id}/merge") -async def merge_entities(entity_id: str, target_entity_id: str): +async def merge_entities_endpoint(entity_id: str, merge_req: EntityMergeRequest): """合并两个实体""" if not DB_AVAILABLE: raise HTTPException(status_code=500, detail="Database not available") db = get_db_manager() - result = db.merge_entities(target_entity_id, entity_id) - return {"success": True, "merged_entity": {"id": result.id, "name": result.name}} + + # 验证两个实体都存在 + source = db.get_entity(merge_req.source_entity_id) + target = db.get_entity(merge_req.target_entity_id) + + if not source or not target: + raise HTTPException(status_code=404, detail="Entity not found") + + result = db.merge_entities(merge_req.target_entity_id, merge_req.source_entity_id) + return { + "success": True, + "merged_entity": { + "id": result.id, + "name": result.name, + "type": result.type, + "definition": result.definition, + "aliases": result.aliases + } + } # Health check @app.get("/health") diff --git a/frontend/app.js b/frontend/app.js index dc872a2..e8dc04f 100644 --- a/frontend/app.js +++ b/frontend/app.js @@ -1,4 +1,4 @@ -// InsightFlow Frontend - Production Version +// InsightFlow Frontend - Phase 2 (Interactive Workbench) const API_BASE = '/api/v1'; let currentProject = null; @@ -6,6 +6,9 @@ let currentData = null; let selectedEntity = null; let projectRelations = []; let projectEntities = []; +let currentTranscript = null; +let editMode = false; +let contextMenuTarget = null; // Init document.addEventListener('DOMContentLoaded', () => { @@ -37,6 +40,8 @@ async function initWorkbench() { if (nameEl) nameEl.textContent = currentProject.name; initUpload(); + initContextMenu(); + initTextSelection(); await loadProjectData(); } catch (err) { @@ -65,12 +70,88 @@ async function uploadAudio(file) { return await res.json(); } +// Phase 2: Entity Edit API +async function updateEntity(entityId, data) { + const res = await fetch(`${API_BASE}/entities/${entityId}`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(data) + }); + if (!res.ok) throw new Error('Failed to update entity'); + return await res.json(); +} + +async function deleteEntityApi(entityId) { + const res = await fetch(`${API_BASE}/entities/${entityId}`, { + method: 'DELETE' + }); + if (!res.ok) throw new Error('Failed to delete entity'); + return await res.json(); +} + +async function mergeEntitiesApi(sourceId, targetId) { + const res = await fetch(`${API_BASE}/entities/${sourceId}/merge`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ source_entity_id: sourceId, target_entity_id: targetId }) + }); + if (!res.ok) throw new Error('Failed to merge entities'); + return await res.json(); +} + +async function createEntityApi(data) { + const res = await fetch(`${API_BASE}/projects/${currentProject.id}/entities`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(data) + }); + if (!res.ok) throw new Error('Failed to create entity'); + return await res.json(); +} + +// Phase 2: Relation API +async function createRelationApi(data) { + const res = await fetch(`${API_BASE}/projects/${currentProject.id}/relations`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(data) + }); + if (!res.ok) throw new Error('Failed to create relation'); + return await res.json(); +} + +async function deleteRelationApi(relationId) { + const res = await fetch(`${API_BASE}/relations/${relationId}`, { + method: 'DELETE' + }); + if (!res.ok) throw new Error('Failed to delete relation'); + return await res.json(); +} + +// Phase 2: Transcript API +async function getTranscript(transcriptId) { + const res = await fetch(`${API_BASE}/transcripts/${transcriptId}`); + if (!res.ok) throw new Error('Failed to get transcript'); + return await res.json(); +} + +async function updateTranscript(transcriptId, fullText) { + const res = await fetch(`${API_BASE}/transcripts/${transcriptId}`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ full_text: fullText }) + }); + if (!res.ok) throw new Error('Failed to update transcript'); + return await res.json(); +} + async function loadProjectData() { try { // 并行加载实体和关系 - const [entitiesRes, relationsRes] = await Promise.all([ + const [entitiesRes, relationsRes, transcriptsRes] = await Promise.all([ fetch(`${API_BASE}/projects/${currentProject.id}/entities`), - fetch(`${API_BASE}/projects/${currentProject.id}/relations`) + fetch(`${API_BASE}/projects/${currentProject.id}/relations`), + fetch(`${API_BASE}/projects/${currentProject.id}/transcripts`) ]); if (entitiesRes.ok) { @@ -80,14 +161,22 @@ async function loadProjectData() { projectRelations = await relationsRes.json(); } - currentData = { - transcript_id: 'project_view', - project_id: currentProject.id, - segments: [], - entities: projectEntities, - full_text: '', - created_at: new Date().toISOString() - }; + // 加载最新的转录 + if (transcriptsRes.ok) { + const transcripts = await transcriptsRes.json(); + if (transcripts.length > 0) { + currentTranscript = await getTranscript(transcripts[0].id); + currentData = { + transcript_id: currentTranscript.id, + project_id: currentProject.id, + segments: [{ speaker: '全文', text: currentTranscript.full_text }], + entities: projectEntities, + full_text: currentTranscript.full_text, + created_at: currentTranscript.created_at + }; + renderTranscript(); + } + } renderGraph(); renderEntityList(); @@ -97,39 +186,80 @@ async function loadProjectData() { } } +// Phase 2: Transcript Edit Mode +window.toggleEditMode = function() { + editMode = !editMode; + const editBtn = document.getElementById('editBtn'); + const saveBtn = document.getElementById('saveBtn'); + const content = document.getElementById('transcriptContent'); + + if (editMode) { + editBtn.style.display = 'none'; + saveBtn.style.display = 'inline-block'; + content.contentEditable = 'true'; + content.style.background = '#0f0f0f'; + content.style.border = '1px solid #00d4ff'; + content.focus(); + } else { + editBtn.style.display = 'inline-block'; + saveBtn.style.display = 'none'; + content.contentEditable = 'false'; + content.style.background = ''; + content.style.border = ''; + } +}; + +window.saveTranscript = async function() { + if (!currentTranscript) return; + + const content = document.getElementById('transcriptContent'); + const fullText = content.innerText; + + try { + await updateTranscript(currentTranscript.id, fullText); + currentTranscript.full_text = fullText; + toggleEditMode(); + alert('转录文本已保存'); + } catch (err) { + console.error('Save failed:', err); + alert('保存失败: ' + err.message); + } +}; + // Render transcript with entity highlighting function renderTranscript() { const container = document.getElementById('transcriptContent'); - if (!container || !currentData || !currentData.segments) return; + if (!container || !currentData) return; container.innerHTML = ''; - currentData.segments.forEach((seg, idx) => { - const div = document.createElement('div'); - div.className = 'segment'; - div.dataset.index = idx; - - // 高亮实体 - let text = seg.text; - const entities = findEntitiesInText(seg.text); - - // 按位置倒序替换,避免位置偏移 - entities.sort((a, b) => b.start - a.start); - - entities.forEach(ent => { - const before = text.slice(0, ent.start); - const name = text.slice(ent.start, ent.end); - const after = text.slice(ent.end); - text = before + `${name}` + after; - }); - - div.innerHTML = ` -
暂无关系
'; + return; + } + + container.innerHTML = entityRelations.map(r => { + const isSource = r.source_id === entityId; + const otherId = isSource ? r.target_id : r.source_id; + const other = projectEntities.find(e => e.id === otherId); + const otherName = other ? other.name : 'Unknown'; + const arrow = isSource ? '→' : '←'; + + return ` +