feat: Phase 1 MVP 完成
- 实现实体和关系同时提取(LLM) - 添加 transcripts/mentions/relations 数据持久化 - 新增 API: 关系列表、转录列表、实体提及位置 - 前端实体高亮显示和图谱联动 - 添加 STATUS.md 跟踪开发进度
This commit is contained in:
69
STATUS.md
Normal file
69
STATUS.md
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
# InsightFlow 开发状态
|
||||||
|
|
||||||
|
**最后更新**: 2026-02-18
|
||||||
|
|
||||||
|
## 当前阶段
|
||||||
|
|
||||||
|
Phase 1: 骨架与单体分析 (MVP) - **已完成 ✅**
|
||||||
|
|
||||||
|
## 已完成
|
||||||
|
|
||||||
|
### 后端 (backend/)
|
||||||
|
- ✅ FastAPI 项目框架搭建
|
||||||
|
- ✅ SQLite 数据库设计 (schema.sql)
|
||||||
|
- ✅ 数据库管理模块 (db_manager.py)
|
||||||
|
- ✅ 阿里云听悟 ASR 集成 (tingwu_client.py)
|
||||||
|
- ✅ OSS 上传模块 (oss_uploader.py)
|
||||||
|
- ✅ 实体提取与对齐逻辑
|
||||||
|
- ✅ 关系提取(LLM 同时提取实体和关系)
|
||||||
|
- ✅ 项目 CRUD API
|
||||||
|
- ✅ 音频上传与分析 API
|
||||||
|
- ✅ 实体列表 API
|
||||||
|
- ✅ 关系列表 API
|
||||||
|
- ✅ 转录列表 API
|
||||||
|
- ✅ 实体提及位置 API
|
||||||
|
- ✅ transcripts 表数据写入
|
||||||
|
- ✅ entity_mentions 表数据写入
|
||||||
|
- ✅ entity_relations 表数据写入
|
||||||
|
|
||||||
|
### 前端 (frontend/)
|
||||||
|
- ✅ 项目管理页面 (index.html)
|
||||||
|
- ✅ 知识工作台页面 (workbench.html)
|
||||||
|
- ✅ D3.js 知识图谱可视化
|
||||||
|
- ✅ 音频上传 UI
|
||||||
|
- ✅ 实体列表展示
|
||||||
|
- ✅ 转录文本中实体高亮显示
|
||||||
|
- ✅ 图谱与文本联动(点击实体双向高亮)
|
||||||
|
|
||||||
|
### 基础设施
|
||||||
|
- ✅ Dockerfile
|
||||||
|
- ✅ docker-compose.yml
|
||||||
|
- ✅ Git 仓库初始化
|
||||||
|
|
||||||
|
## Phase 2 计划 (交互与纠错工作台) - **即将开始**
|
||||||
|
|
||||||
|
- 实体定义编辑功能
|
||||||
|
- 实体合并功能
|
||||||
|
- 关系编辑功能(添加/删除)
|
||||||
|
- 人工修正数据保存
|
||||||
|
- 文本编辑器增强(支持编辑转录文本)
|
||||||
|
|
||||||
|
## Phase 3 计划 (记忆与生长)
|
||||||
|
|
||||||
|
- 多文件图谱融合
|
||||||
|
- 实体对齐算法优化
|
||||||
|
- PDF/DOCX 文档导入
|
||||||
|
- 项目知识库面板
|
||||||
|
|
||||||
|
## 技术债务
|
||||||
|
|
||||||
|
- 听悟 SDK fallback 到 mock 需要更好的错误处理
|
||||||
|
- 实体相似度匹配目前只是简单字符串包含,需要 embedding 方案
|
||||||
|
- 前端需要状态管理(目前使用全局变量)
|
||||||
|
- 需要添加 API 文档 (OpenAPI/Swagger)
|
||||||
|
|
||||||
|
## 部署信息
|
||||||
|
|
||||||
|
- 服务器: 122.51.127.111
|
||||||
|
- 项目路径: /opt/projects/insightflow
|
||||||
|
- 端口: 18000
|
||||||
@@ -221,9 +221,81 @@ class DatabaseManager:
|
|||||||
conn.close()
|
conn.close()
|
||||||
return [EntityMention(**dict(r)) for r in rows]
|
return [EntityMention(**dict(r)) for r in rows]
|
||||||
|
|
||||||
|
# Transcript operations
|
||||||
|
def save_transcript(self, transcript_id: str, project_id: str, filename: str, full_text: str):
|
||||||
|
"""保存转录记录"""
|
||||||
|
conn = self.get_conn()
|
||||||
|
now = datetime.now().isoformat()
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO transcripts (id, project_id, filename, full_text, created_at) VALUES (?, ?, ?, ?, ?)",
|
||||||
|
(transcript_id, project_id, filename, full_text, now)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def get_transcript(self, transcript_id: str) -> Optional[dict]:
|
||||||
|
"""获取转录记录"""
|
||||||
|
conn = self.get_conn()
|
||||||
|
row = conn.execute("SELECT * FROM transcripts WHERE id = ?", (transcript_id,)).fetchone()
|
||||||
|
conn.close()
|
||||||
|
if row:
|
||||||
|
return dict(row)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def list_project_transcripts(self, project_id: str) -> List[dict]:
|
||||||
|
"""列出项目的所有转录"""
|
||||||
|
conn = self.get_conn()
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT * FROM transcripts WHERE project_id = ? ORDER BY created_at DESC",
|
||||||
|
(project_id,)
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
# Relation operations
|
||||||
|
def create_relation(self, project_id: str, source_entity_id: str, target_entity_id: str,
|
||||||
|
relation_type: str = "related", evidence: str = "", transcript_id: str = ""):
|
||||||
|
"""创建实体关系"""
|
||||||
|
conn = self.get_conn()
|
||||||
|
relation_id = str(uuid.uuid4())[:8]
|
||||||
|
now = datetime.now().isoformat()
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT INTO entity_relations
|
||||||
|
(id, project_id, source_entity_id, target_entity_id, relation_type, evidence, transcript_id, created_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||||
|
(relation_id, project_id, source_entity_id, target_entity_id, relation_type, evidence, transcript_id, now)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
return relation_id
|
||||||
|
|
||||||
|
def get_entity_relations(self, entity_id: str) -> List[dict]:
|
||||||
|
"""获取实体的所有关系"""
|
||||||
|
conn = self.get_conn()
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT * FROM entity_relations
|
||||||
|
WHERE source_entity_id = ? OR target_entity_id = ?
|
||||||
|
ORDER BY created_at DESC""",
|
||||||
|
(entity_id, entity_id)
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
def list_project_relations(self, project_id: str) -> List[dict]:
|
||||||
|
"""列出项目的所有关系"""
|
||||||
|
conn = self.get_conn()
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT * FROM entity_relations WHERE project_id = ? ORDER BY created_at DESC",
|
||||||
|
(project_id,)
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
# Singleton instance
|
# Singleton instance
|
||||||
_db_manager = None
|
_db_manager = None
|
||||||
|
|
||||||
|
|
||||||
def get_db_manager() -> DatabaseManager:
|
def get_db_manager() -> DatabaseManager:
|
||||||
global _db_manager
|
global _db_manager
|
||||||
if _db_manager is None:
|
if _db_manager is None:
|
||||||
|
|||||||
152
backend/main.py
152
backend/main.py
@@ -114,20 +114,34 @@ def mock_transcribe() -> dict:
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
def extract_entities_with_llm(text: str) -> List[dict]:
|
def extract_entities_with_llm(text: str) -> tuple[List[dict], List[dict]]:
|
||||||
"""使用 Kimi API 提取实体"""
|
"""使用 Kimi API 提取实体和关系
|
||||||
if not KIMI_API_KEY or not text:
|
|
||||||
return []
|
|
||||||
|
|
||||||
prompt = f"""从以下会议文本中提取关键实体,以 JSON 格式返回:
|
Returns:
|
||||||
|
(entities, relations): 实体列表和关系列表
|
||||||
|
"""
|
||||||
|
if not KIMI_API_KEY or not text:
|
||||||
|
return [], []
|
||||||
|
|
||||||
|
prompt = f"""从以下会议文本中提取关键实体和它们之间的关系,以 JSON 格式返回:
|
||||||
|
|
||||||
文本:{text[:3000]}
|
文本:{text[:3000]}
|
||||||
|
|
||||||
要求:
|
要求:
|
||||||
1. 每个实体包含:name(名称), type(类型: PROJECT/TECH/PERSON/ORG/OTHER), definition(一句话定义)
|
1. entities: 每个实体包含 name(名称), type(类型: PROJECT/TECH/PERSON/ORG/OTHER), definition(一句话定义)
|
||||||
2. 只返回 JSON 数组
|
2. relations: 每个关系包含 source(源实体名), target(目标实体名), type(关系类型: belongs_to/works_with/depends_on/mentions/related)
|
||||||
|
3. 只返回 JSON 对象,格式: {{"entities": [...], "relations": [...]}}
|
||||||
|
|
||||||
示例:[{{"name": "Project Alpha", "type": "PROJECT", "definition": "核心项目"}}]
|
示例:
|
||||||
|
{{
|
||||||
|
"entities": [
|
||||||
|
{{"name": "Project Alpha", "type": "PROJECT", "definition": "核心项目"}},
|
||||||
|
{{"name": "K8s", "type": "TECH", "definition": "Kubernetes容器编排平台"}}
|
||||||
|
],
|
||||||
|
"relations": [
|
||||||
|
{{"source": "Project Alpha", "target": "K8s", "type": "depends_on"}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -142,13 +156,14 @@ def extract_entities_with_llm(text: str) -> List[dict]:
|
|||||||
content = result["choices"][0]["message"]["content"]
|
content = result["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
import re
|
import re
|
||||||
json_match = re.search(r'\[.*?\]', content, re.DOTALL)
|
json_match = re.search(r'\{{.*?\}}', content, re.DOTALL)
|
||||||
if json_match:
|
if json_match:
|
||||||
return json.loads(json_match.group())
|
data = json.loads(json_match.group())
|
||||||
|
return data.get("entities", []), data.get("relations", [])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"LLM extraction failed: {e}")
|
print(f"LLM extraction failed: {e}")
|
||||||
|
|
||||||
return []
|
return [], []
|
||||||
|
|
||||||
def align_entity(project_id: str, name: str, db) -> Optional[Entity]:
|
def align_entity(project_id: str, name: str, db) -> Optional[Entity]:
|
||||||
"""实体对齐"""
|
"""实体对齐"""
|
||||||
@@ -202,12 +217,23 @@ async def upload_audio(project_id: str, file: UploadFile = File(...)):
|
|||||||
print(f"Processing: {file.filename}")
|
print(f"Processing: {file.filename}")
|
||||||
tw_result = transcribe_audio(content, file.filename)
|
tw_result = transcribe_audio(content, file.filename)
|
||||||
|
|
||||||
# 提取实体
|
# 提取实体和关系
|
||||||
print("Extracting entities...")
|
print("Extracting entities and relations...")
|
||||||
raw_entities = extract_entities_with_llm(tw_result["full_text"])
|
raw_entities, raw_relations = extract_entities_with_llm(tw_result["full_text"])
|
||||||
|
|
||||||
# 实体对齐
|
# 保存转录记录
|
||||||
|
transcript_id = str(uuid.uuid4())[:8]
|
||||||
|
db.save_transcript(
|
||||||
|
transcript_id=transcript_id,
|
||||||
|
project_id=project_id,
|
||||||
|
filename=file.filename,
|
||||||
|
full_text=tw_result["full_text"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# 实体对齐并保存
|
||||||
aligned_entities = []
|
aligned_entities = []
|
||||||
|
entity_name_to_id = {} # 用于关系映射
|
||||||
|
|
||||||
for raw_ent in raw_entities:
|
for raw_ent in raw_entities:
|
||||||
existing = align_entity(project_id, raw_ent["name"], db)
|
existing = align_entity(project_id, raw_ent["name"], db)
|
||||||
|
|
||||||
@@ -219,6 +245,7 @@ async def upload_audio(project_id: str, file: UploadFile = File(...)):
|
|||||||
definition=existing.definition,
|
definition=existing.definition,
|
||||||
aliases=existing.aliases
|
aliases=existing.aliases
|
||||||
)
|
)
|
||||||
|
entity_name_to_id[raw_ent["name"]] = existing.id
|
||||||
else:
|
else:
|
||||||
new_ent = db.create_entity(Entity(
|
new_ent = db.create_entity(Entity(
|
||||||
id=str(uuid.uuid4())[:8],
|
id=str(uuid.uuid4())[:8],
|
||||||
@@ -233,14 +260,47 @@ async def upload_audio(project_id: str, file: UploadFile = File(...)):
|
|||||||
type=new_ent.type,
|
type=new_ent.type,
|
||||||
definition=new_ent.definition
|
definition=new_ent.definition
|
||||||
)
|
)
|
||||||
|
entity_name_to_id[raw_ent["name"]] = new_ent.id
|
||||||
|
|
||||||
aligned_entities.append(ent_model)
|
aligned_entities.append(ent_model)
|
||||||
|
|
||||||
|
# 保存实体提及位置
|
||||||
|
full_text = tw_result["full_text"]
|
||||||
|
name = raw_ent["name"]
|
||||||
|
start_pos = 0
|
||||||
|
while True:
|
||||||
|
pos = full_text.find(name, start_pos)
|
||||||
|
if pos == -1:
|
||||||
|
break
|
||||||
|
mention = EntityMention(
|
||||||
|
id=str(uuid.uuid4())[:8],
|
||||||
|
entity_id=entity_name_to_id[name],
|
||||||
|
transcript_id=transcript_id,
|
||||||
|
start_pos=pos,
|
||||||
|
end_pos=pos + len(name),
|
||||||
|
text_snippet=full_text[max(0, pos-20):min(len(full_text), pos+len(name)+20)],
|
||||||
|
confidence=1.0
|
||||||
|
)
|
||||||
|
db.add_mention(mention)
|
||||||
|
start_pos = pos + 1
|
||||||
|
|
||||||
|
# 保存关系
|
||||||
|
for rel in raw_relations:
|
||||||
|
source_id = entity_name_to_id.get(rel.get("source", ""))
|
||||||
|
target_id = entity_name_to_id.get(rel.get("target", ""))
|
||||||
|
if source_id and target_id:
|
||||||
|
db.create_relation(
|
||||||
|
project_id=project_id,
|
||||||
|
source_entity_id=source_id,
|
||||||
|
target_entity_id=target_id,
|
||||||
|
relation_type=rel.get("type", "related"),
|
||||||
|
evidence=tw_result["full_text"][:200],
|
||||||
|
transcript_id=transcript_id
|
||||||
|
)
|
||||||
|
|
||||||
# 构建片段
|
# 构建片段
|
||||||
segments = [TranscriptSegment(**seg) for seg in tw_result["segments"]]
|
segments = [TranscriptSegment(**seg) for seg in tw_result["segments"]]
|
||||||
|
|
||||||
transcript_id = str(uuid.uuid4())[:8]
|
|
||||||
|
|
||||||
return AnalysisResult(
|
return AnalysisResult(
|
||||||
transcript_id=transcript_id,
|
transcript_id=transcript_id,
|
||||||
project_id=project_id,
|
project_id=project_id,
|
||||||
@@ -260,6 +320,64 @@ async def get_project_entities(project_id: str):
|
|||||||
entities = db.list_project_entities(project_id)
|
entities = db.list_project_entities(project_id)
|
||||||
return [{"id": e.id, "name": e.name, "type": e.type, "definition": e.definition} for e in entities]
|
return [{"id": e.id, "name": e.name, "type": e.type, "definition": e.definition} for e in entities]
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/v1/projects/{project_id}/relations")
|
||||||
|
async def get_project_relations(project_id: str):
|
||||||
|
"""获取项目的实体关系列表"""
|
||||||
|
if not DB_AVAILABLE:
|
||||||
|
return []
|
||||||
|
|
||||||
|
db = get_db_manager()
|
||||||
|
relations = db.list_project_relations(project_id)
|
||||||
|
|
||||||
|
# 获取实体名称映射
|
||||||
|
entities = db.list_project_entities(project_id)
|
||||||
|
entity_map = {e.id: e.name for e in entities}
|
||||||
|
|
||||||
|
return [{
|
||||||
|
"id": r["id"],
|
||||||
|
"source_id": r["source_entity_id"],
|
||||||
|
"source_name": entity_map.get(r["source_entity_id"], "Unknown"),
|
||||||
|
"target_id": r["target_entity_id"],
|
||||||
|
"target_name": entity_map.get(r["target_entity_id"], "Unknown"),
|
||||||
|
"type": r["relation_type"],
|
||||||
|
"evidence": r["evidence"]
|
||||||
|
} for r in relations]
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/v1/projects/{project_id}/transcripts")
|
||||||
|
async def get_project_transcripts(project_id: str):
|
||||||
|
"""获取项目的转录列表"""
|
||||||
|
if not DB_AVAILABLE:
|
||||||
|
return []
|
||||||
|
|
||||||
|
db = get_db_manager()
|
||||||
|
transcripts = db.list_project_transcripts(project_id)
|
||||||
|
return [{
|
||||||
|
"id": t["id"],
|
||||||
|
"filename": t["filename"],
|
||||||
|
"created_at": t["created_at"],
|
||||||
|
"preview": t["full_text"][:100] + "..." if len(t["full_text"]) > 100 else t["full_text"]
|
||||||
|
} for t in transcripts]
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/v1/entities/{entity_id}/mentions")
|
||||||
|
async def get_entity_mentions(entity_id: str):
|
||||||
|
"""获取实体的所有提及位置"""
|
||||||
|
if not DB_AVAILABLE:
|
||||||
|
return []
|
||||||
|
|
||||||
|
db = get_db_manager()
|
||||||
|
mentions = db.get_entity_mentions(entity_id)
|
||||||
|
return [{
|
||||||
|
"id": m.id,
|
||||||
|
"transcript_id": m.transcript_id,
|
||||||
|
"start_pos": m.start_pos,
|
||||||
|
"end_pos": m.end_pos,
|
||||||
|
"text_snippet": m.text_snippet,
|
||||||
|
"confidence": m.confidence
|
||||||
|
} for m in mentions]
|
||||||
|
|
||||||
@app.post("/api/v1/entities/{entity_id}/merge")
|
@app.post("/api/v1/entities/{entity_id}/merge")
|
||||||
async def merge_entities(entity_id: str, target_entity_id: str):
|
async def merge_entities(entity_id: str, target_entity_id: str):
|
||||||
"""合并两个实体"""
|
"""合并两个实体"""
|
||||||
|
|||||||
21
data/aws-learning-progress.json
Normal file
21
data/aws-learning-progress.json
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
{
|
||||||
|
"current_domain": "复杂工作负载设计",
|
||||||
|
"domain_index": 0,
|
||||||
|
"domains": [
|
||||||
|
"复杂工作负载设计",
|
||||||
|
"高可用与容错",
|
||||||
|
"安全设计",
|
||||||
|
"成本优化",
|
||||||
|
"迁移与现代化",
|
||||||
|
"组织复杂性管理"
|
||||||
|
],
|
||||||
|
"recent_topics": [
|
||||||
|
"Multi-AZ vs Multi-Region",
|
||||||
|
"ASG Health Check",
|
||||||
|
"Storage Gateway",
|
||||||
|
"Transit Gateway",
|
||||||
|
"Transit Gateway 路由表"
|
||||||
|
],
|
||||||
|
"total_learned": 5,
|
||||||
|
"last_updated": "2026-02-17T20:00:00+08:00"
|
||||||
|
}
|
||||||
206
frontend/app.js
206
frontend/app.js
@@ -4,6 +4,8 @@ const API_BASE = '/api/v1';
|
|||||||
let currentProject = null;
|
let currentProject = null;
|
||||||
let currentData = null;
|
let currentData = null;
|
||||||
let selectedEntity = null;
|
let selectedEntity = null;
|
||||||
|
let projectRelations = [];
|
||||||
|
let projectEntities = [];
|
||||||
|
|
||||||
// Init
|
// Init
|
||||||
document.addEventListener('DOMContentLoaded', () => {
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
@@ -35,7 +37,7 @@ async function initWorkbench() {
|
|||||||
if (nameEl) nameEl.textContent = currentProject.name;
|
if (nameEl) nameEl.textContent = currentProject.name;
|
||||||
|
|
||||||
initUpload();
|
initUpload();
|
||||||
await loadProjectEntities();
|
await loadProjectData();
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('Init failed:', err);
|
console.error('Init failed:', err);
|
||||||
@@ -63,22 +65,26 @@ async function uploadAudio(file) {
|
|||||||
return await res.json();
|
return await res.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function loadProjectEntities() {
|
async function loadProjectData() {
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${API_BASE}/projects/${currentProject.id}/entities`);
|
// 并行加载实体和关系
|
||||||
if (!res.ok) return;
|
const [entitiesRes, relationsRes] = await Promise.all([
|
||||||
const entities = await res.json();
|
fetch(`${API_BASE}/projects/${currentProject.id}/entities`),
|
||||||
|
fetch(`${API_BASE}/projects/${currentProject.id}/relations`)
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (entitiesRes.ok) {
|
||||||
|
projectEntities = await entitiesRes.json();
|
||||||
|
}
|
||||||
|
if (relationsRes.ok) {
|
||||||
|
projectRelations = await relationsRes.json();
|
||||||
|
}
|
||||||
|
|
||||||
currentData = {
|
currentData = {
|
||||||
transcript_id: 'project_view',
|
transcript_id: 'project_view',
|
||||||
project_id: currentProject.id,
|
project_id: currentProject.id,
|
||||||
segments: [],
|
segments: [],
|
||||||
entities: entities.map(e => ({
|
entities: projectEntities,
|
||||||
id: e.id,
|
|
||||||
name: e.name,
|
|
||||||
type: e.type,
|
|
||||||
definition: e.definition || ''
|
|
||||||
})),
|
|
||||||
full_text: '',
|
full_text: '',
|
||||||
created_at: new Date().toISOString()
|
created_at: new Date().toISOString()
|
||||||
};
|
};
|
||||||
@@ -87,11 +93,11 @@ async function loadProjectEntities() {
|
|||||||
renderEntityList();
|
renderEntityList();
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('Load entities failed:', err);
|
console.error('Load project data failed:', err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Render transcript
|
// Render transcript with entity highlighting
|
||||||
function renderTranscript() {
|
function renderTranscript() {
|
||||||
const container = document.getElementById('transcriptContent');
|
const container = document.getElementById('transcriptContent');
|
||||||
if (!container || !currentData || !currentData.segments) return;
|
if (!container || !currentData || !currentData.segments) return;
|
||||||
@@ -103,8 +109,11 @@ function renderTranscript() {
|
|||||||
div.className = 'segment';
|
div.className = 'segment';
|
||||||
div.dataset.index = idx;
|
div.dataset.index = idx;
|
||||||
|
|
||||||
|
// 高亮实体
|
||||||
let text = seg.text;
|
let text = seg.text;
|
||||||
const entities = findEntitiesInSegment(seg, idx);
|
const entities = findEntitiesInText(seg.text);
|
||||||
|
|
||||||
|
// 按位置倒序替换,避免位置偏移
|
||||||
entities.sort((a, b) => b.start - a.start);
|
entities.sort((a, b) => b.start - a.start);
|
||||||
|
|
||||||
entities.forEach(ent => {
|
entities.forEach(ent => {
|
||||||
@@ -123,29 +132,50 @@ function renderTranscript() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function findEntitiesInSegment(seg, segIndex) {
|
// 在文本中查找实体位置
|
||||||
if (!currentData || !currentData.entities) return [];
|
function findEntitiesInText(text) {
|
||||||
|
if (!projectEntities || projectEntities.length === 0) return [];
|
||||||
|
|
||||||
let offset = 0;
|
const found = [];
|
||||||
for (let i = 0; i < segIndex; i++) {
|
projectEntities.forEach(ent => {
|
||||||
offset += currentData.segments[i].text.length + 1;
|
const name = ent.name;
|
||||||
|
let pos = 0;
|
||||||
|
while ((pos = text.indexOf(name, pos)) !== -1) {
|
||||||
|
found.push({
|
||||||
|
id: ent.id,
|
||||||
|
name: ent.name,
|
||||||
|
start: pos,
|
||||||
|
end: pos + name.length
|
||||||
|
});
|
||||||
|
pos += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return currentData.entities.filter(ent => {
|
// 也检查别名
|
||||||
return ent.start >= offset && ent.end <= offset + seg.text.length;
|
if (ent.aliases && ent.aliases.length > 0) {
|
||||||
}).map(ent => ({
|
ent.aliases.forEach(alias => {
|
||||||
...ent,
|
let aliasPos = 0;
|
||||||
start: ent.start - offset,
|
while ((aliasPos = text.indexOf(alias, aliasPos)) !== -1) {
|
||||||
end: ent.end - offset
|
found.push({
|
||||||
}));
|
id: ent.id,
|
||||||
|
name: alias,
|
||||||
|
start: aliasPos,
|
||||||
|
end: aliasPos + alias.length
|
||||||
|
});
|
||||||
|
aliasPos += 1;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return found;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Render D3 graph
|
// Render D3 graph with relations
|
||||||
function renderGraph() {
|
function renderGraph() {
|
||||||
const svg = d3.select('#graph-svg');
|
const svg = d3.select('#graph-svg');
|
||||||
svg.selectAll('*').remove();
|
svg.selectAll('*').remove();
|
||||||
|
|
||||||
if (!currentData || !currentData.entities || currentData.entities.length === 0) {
|
if (!projectEntities || projectEntities.length === 0) {
|
||||||
svg.append('text')
|
svg.append('text')
|
||||||
.attr('x', '50%')
|
.attr('x', '50%')
|
||||||
.attr('y', '50%')
|
.attr('y', '50%')
|
||||||
@@ -155,21 +185,32 @@ function renderGraph() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const width = svg.node().parentElement.clientWidth;
|
const container = svg.node().parentElement;
|
||||||
const height = svg.node().parentElement.clientHeight - 200;
|
const width = container.clientWidth;
|
||||||
|
const height = container.clientHeight - 200;
|
||||||
|
|
||||||
svg.attr('width', width).attr('height', height);
|
svg.attr('width', width).attr('height', height);
|
||||||
|
|
||||||
const nodes = currentData.entities.map(e => ({
|
const nodes = projectEntities.map(e => ({
|
||||||
id: e.id,
|
id: e.id,
|
||||||
name: e.name,
|
name: e.name,
|
||||||
type: e.type,
|
type: e.type,
|
||||||
|
definition: e.definition,
|
||||||
...e
|
...e
|
||||||
}));
|
}));
|
||||||
|
|
||||||
const links = [];
|
// 使用数据库中的关系
|
||||||
for (let i = 0; i < nodes.length - 1; i++) {
|
const links = projectRelations.map(r => ({
|
||||||
links.push({ source: nodes[i].id, target: nodes[i + 1].id });
|
source: r.source_id,
|
||||||
|
target: r.target_id,
|
||||||
|
type: r.type
|
||||||
|
})).filter(r => r.source && r.target);
|
||||||
|
|
||||||
|
// 如果没有关系,创建默认连接
|
||||||
|
if (links.length === 0 && nodes.length > 1) {
|
||||||
|
for (let i = 0; i < Math.min(nodes.length - 1, 5); i++) {
|
||||||
|
links.push({ source: nodes[0].id, target: nodes[i + 1].id, type: 'related' });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const colorMap = {
|
const colorMap = {
|
||||||
@@ -181,18 +222,31 @@ function renderGraph() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const simulation = d3.forceSimulation(nodes)
|
const simulation = d3.forceSimulation(nodes)
|
||||||
.force('link', d3.forceLink(links).id(d => d.id).distance(100))
|
.force('link', d3.forceLink(links).id(d => d.id).distance(120))
|
||||||
.force('charge', d3.forceManyBody().strength(-300))
|
.force('charge', d3.forceManyBody().strength(-400))
|
||||||
.force('center', d3.forceCenter(width / 2, height / 2))
|
.force('center', d3.forceCenter(width / 2, height / 2))
|
||||||
.force('collision', d3.forceCollide().radius(40));
|
.force('collision', d3.forceCollide().radius(50));
|
||||||
|
|
||||||
|
// 关系连线
|
||||||
const link = svg.append('g')
|
const link = svg.append('g')
|
||||||
.selectAll('line')
|
.selectAll('line')
|
||||||
.data(links)
|
.data(links)
|
||||||
.enter().append('line')
|
.enter().append('line')
|
||||||
.attr('stroke', '#333')
|
.attr('stroke', '#444')
|
||||||
.attr('stroke-width', 1);
|
.attr('stroke-width', 1.5)
|
||||||
|
.attr('stroke-opacity', 0.6);
|
||||||
|
|
||||||
|
// 关系标签
|
||||||
|
const linkLabel = svg.append('g')
|
||||||
|
.selectAll('text')
|
||||||
|
.data(links)
|
||||||
|
.enter().append('text')
|
||||||
|
.attr('font-size', '10px')
|
||||||
|
.attr('fill', '#666')
|
||||||
|
.attr('text-anchor', 'middle')
|
||||||
|
.text(d => d.type);
|
||||||
|
|
||||||
|
// 节点组
|
||||||
const node = svg.append('g')
|
const node = svg.append('g')
|
||||||
.selectAll('g')
|
.selectAll('g')
|
||||||
.data(nodes)
|
.data(nodes)
|
||||||
@@ -204,18 +258,30 @@ function renderGraph() {
|
|||||||
.on('end', dragended))
|
.on('end', dragended))
|
||||||
.on('click', (e, d) => window.selectEntity(d.id));
|
.on('click', (e, d) => window.selectEntity(d.id));
|
||||||
|
|
||||||
|
// 节点圆圈
|
||||||
node.append('circle')
|
node.append('circle')
|
||||||
.attr('r', 30)
|
.attr('r', 35)
|
||||||
.attr('fill', d => colorMap[d.type] || '#666')
|
.attr('fill', d => colorMap[d.type] || '#666')
|
||||||
.attr('stroke', '#fff')
|
.attr('stroke', '#fff')
|
||||||
.attr('stroke-width', 2);
|
.attr('stroke-width', 2)
|
||||||
|
.attr('class', 'node-circle');
|
||||||
|
|
||||||
|
// 节点文字
|
||||||
node.append('text')
|
node.append('text')
|
||||||
.text(d => d.name.length > 8 ? d.name.slice(0, 6) + '...' : d.name)
|
.text(d => d.name.length > 6 ? d.name.slice(0, 5) + '...' : d.name)
|
||||||
.attr('text-anchor', 'middle')
|
.attr('text-anchor', 'middle')
|
||||||
.attr('dy', 5)
|
.attr('dy', 5)
|
||||||
.attr('fill', '#fff')
|
.attr('fill', '#fff')
|
||||||
.attr('font-size', '11px');
|
.attr('font-size', '11px')
|
||||||
|
.attr('font-weight', '500');
|
||||||
|
|
||||||
|
// 节点类型图标
|
||||||
|
node.append('text')
|
||||||
|
.attr('dy', -45)
|
||||||
|
.attr('text-anchor', 'middle')
|
||||||
|
.attr('fill', d => colorMap[d.type] || '#666')
|
||||||
|
.attr('font-size', '10px')
|
||||||
|
.text(d => d.type);
|
||||||
|
|
||||||
simulation.on('tick', () => {
|
simulation.on('tick', () => {
|
||||||
link
|
link
|
||||||
@@ -224,6 +290,10 @@ function renderGraph() {
|
|||||||
.attr('x2', d => d.target.x)
|
.attr('x2', d => d.target.x)
|
||||||
.attr('y2', d => d.target.y);
|
.attr('y2', d => d.target.y);
|
||||||
|
|
||||||
|
linkLabel
|
||||||
|
.attr('x', d => (d.source.x + d.target.x) / 2)
|
||||||
|
.attr('y', d => (d.source.y + d.target.y) / 2);
|
||||||
|
|
||||||
node.attr('transform', d => `translate(${d.x},${d.y})`);
|
node.attr('transform', d => `translate(${d.x},${d.y})`);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -252,14 +322,15 @@ function renderEntityList() {
|
|||||||
|
|
||||||
container.innerHTML = '<h3 style="margin-bottom:12px;color:#888;font-size:0.9rem;">项目实体</h3>';
|
container.innerHTML = '<h3 style="margin-bottom:12px;color:#888;font-size:0.9rem;">项目实体</h3>';
|
||||||
|
|
||||||
if (!currentData || !currentData.entities || currentData.entities.length === 0) {
|
if (!projectEntities || projectEntities.length === 0) {
|
||||||
container.innerHTML += '<p style="color:#666;font-size:0.85rem;">暂无实体,请上传音频文件</p>';
|
container.innerHTML += '<p style="color:#666;font-size:0.85rem;">暂无实体,请上传音频文件</p>';
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
currentData.entities.forEach(ent => {
|
projectEntities.forEach(ent => {
|
||||||
const div = document.createElement('div');
|
const div = document.createElement('div');
|
||||||
div.className = 'entity-item';
|
div.className = 'entity-item';
|
||||||
|
div.dataset.id = ent.id;
|
||||||
div.onclick = () => window.selectEntity(ent.id);
|
div.onclick = () => window.selectEntity(ent.id);
|
||||||
|
|
||||||
div.innerHTML = `
|
div.innerHTML = `
|
||||||
@@ -274,21 +345,41 @@ function renderEntityList() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Select entity
|
// Select entity - 联动高亮
|
||||||
window.selectEntity = function(entityId) {
|
window.selectEntity = function(entityId) {
|
||||||
selectedEntity = entityId;
|
selectedEntity = entityId;
|
||||||
const entity = currentData && currentData.entities.find(e => e.id === entityId);
|
const entity = projectEntities.find(e => e.id === entityId);
|
||||||
if (!entity) return;
|
if (!entity) return;
|
||||||
|
|
||||||
|
// 高亮文本中的实体
|
||||||
document.querySelectorAll('.entity').forEach(el => {
|
document.querySelectorAll('.entity').forEach(el => {
|
||||||
el.style.background = el.dataset.id === entityId ? '#ff6b6b' : '';
|
if (el.dataset.id === entityId) {
|
||||||
|
el.style.background = '#ff6b6b';
|
||||||
|
el.style.color = '#fff';
|
||||||
|
} else {
|
||||||
|
el.style.background = '';
|
||||||
|
el.style.color = '';
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
d3.selectAll('.node circle')
|
// 高亮图谱中的节点
|
||||||
|
d3.selectAll('.node-circle')
|
||||||
.attr('stroke', d => d.id === entityId ? '#ff6b6b' : '#fff')
|
.attr('stroke', d => d.id === entityId ? '#ff6b6b' : '#fff')
|
||||||
.attr('stroke-width', d => d.id === entityId ? 4 : 2);
|
.attr('stroke-width', d => d.id === entityId ? 4 : 2)
|
||||||
|
.attr('r', d => d.id === entityId ? 40 : 35);
|
||||||
|
|
||||||
console.log('Selected:', entity.name);
|
// 高亮实体列表
|
||||||
|
document.querySelectorAll('.entity-item').forEach(el => {
|
||||||
|
if (el.dataset.id === entityId) {
|
||||||
|
el.style.background = '#2a2a2a';
|
||||||
|
el.style.borderLeft = '3px solid #ff6b6b';
|
||||||
|
} else {
|
||||||
|
el.style.background = '';
|
||||||
|
el.style.borderLeft = '';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Selected:', entity.name, entity.definition);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Show/hide upload
|
// Show/hide upload
|
||||||
@@ -318,17 +409,24 @@ function initUpload() {
|
|||||||
<div style="text-align:center;">
|
<div style="text-align:center;">
|
||||||
<h2>正在分析...</h2>
|
<h2>正在分析...</h2>
|
||||||
<p style="color:#666;margin-top:10px;">${file.name}</p>
|
<p style="color:#666;margin-top:10px;">${file.name}</p>
|
||||||
|
<p style="color:#888;margin-top:20px;font-size:0.9rem;">ASR转录 + 实体提取中</p>
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = await uploadAudio(file);
|
const result = await uploadAudio(file);
|
||||||
|
|
||||||
|
// 更新当前数据
|
||||||
currentData = result;
|
currentData = result;
|
||||||
|
|
||||||
|
// 重新加载项目数据(包含新实体和关系)
|
||||||
|
await loadProjectData();
|
||||||
|
|
||||||
|
// 渲染转录文本
|
||||||
|
if (result.segments && result.segments.length > 0) {
|
||||||
renderTranscript();
|
renderTranscript();
|
||||||
renderGraph();
|
}
|
||||||
renderEntityList();
|
|
||||||
|
|
||||||
if (overlay) overlay.classList.remove('show');
|
if (overlay) overlay.classList.remove('show');
|
||||||
|
|
||||||
@@ -339,7 +437,7 @@ function initUpload() {
|
|||||||
<div style="text-align:center;">
|
<div style="text-align:center;">
|
||||||
<h2 style="color:#ff6b6b;">分析失败</h2>
|
<h2 style="color:#ff6b6b;">分析失败</h2>
|
||||||
<p style="color:#666;margin-top:10px;">${err.message}</p>
|
<p style="color:#666;margin-top:10px;">${err.message}</p>
|
||||||
<button class="btn" onclick="location.reload()">重试</button>
|
<button class="btn" onclick="location.reload()" style="margin-top:20px;">重试</button>
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user