Phase 3: Memory & Growth - Multi-file fusion, Entity alignment with embedding, Document import, Knowledge base panel

This commit is contained in:
OpenClaw Bot
2026-02-18 12:12:39 +08:00
parent 643fe46780
commit da8a4db985
11 changed files with 1842 additions and 167 deletions

View File

@@ -1,7 +1,8 @@
#!/usr/bin/env python3
"""
InsightFlow Database Manager
InsightFlow Database Manager - Phase 3
处理项目、实体、关系的持久化
支持文档类型和多文件融合
"""
import os
@@ -166,6 +167,18 @@ class DatabaseManager:
(target_id, source_id)
)
# 更新关系 - source 作为 source_entity_id
conn.execute(
"UPDATE entity_relations SET source_entity_id = ? WHERE source_entity_id = ?",
(target_id, source_id)
)
# 更新关系 - source 作为 target_entity_id
conn.execute(
"UPDATE entity_relations SET target_entity_id = ? WHERE target_entity_id = ?",
(target_id, source_id)
)
# 删除源实体
conn.execute("DELETE FROM entities WHERE id = ?", (source_id,))
@@ -222,13 +235,13 @@ class DatabaseManager:
return [EntityMention(**dict(r)) for r in rows]
# Transcript operations
def save_transcript(self, transcript_id: str, project_id: str, filename: str, full_text: str):
def save_transcript(self, transcript_id: str, project_id: str, filename: str, full_text: str, transcript_type: str = "audio"):
"""保存转录记录"""
conn = self.get_conn()
now = datetime.now().isoformat()
conn.execute(
"INSERT INTO transcripts (id, project_id, filename, full_text, created_at) VALUES (?, ?, ?, ?, ?)",
(transcript_id, project_id, filename, full_text, now)
"INSERT INTO transcripts (id, project_id, filename, full_text, type, created_at) VALUES (?, ?, ?, ?, ?, ?)",
(transcript_id, project_id, filename, full_text, transcript_type, now)
)
conn.commit()
conn.close()
@@ -388,6 +401,58 @@ class DatabaseManager:
conn.close()
return dict(row) if row else None
# Phase 3: Glossary operations
def add_glossary_term(self, project_id: str, term: str, pronunciation: str = "") -> str:
"""添加术语到术语表"""
conn = self.get_conn()
# 检查是否已存在
existing = conn.execute(
"SELECT * FROM glossary WHERE project_id = ? AND term = ?",
(project_id, term)
).fetchone()
if existing:
# 更新频率
conn.execute(
"UPDATE glossary SET frequency = frequency + 1 WHERE id = ?",
(existing['id'],)
)
conn.commit()
conn.close()
return existing['id']
term_id = str(uuid.uuid4())[:8]
conn.execute(
"INSERT INTO glossary (id, project_id, term, pronunciation, frequency) VALUES (?, ?, ?, ?, ?)",
(term_id, project_id, term, pronunciation, 1)
)
conn.commit()
conn.close()
return term_id
def list_glossary(self, project_id: str) -> List[dict]:
"""列出项目术语表"""
conn = self.get_conn()
rows = conn.execute(
"SELECT * FROM glossary WHERE project_id = ? ORDER BY frequency DESC",
(project_id,)
).fetchall()
conn.close()
return [dict(r) for r in rows]
def delete_glossary_term(self, term_id: str):
"""删除术语"""
conn = self.get_conn()
conn.execute("DELETE FROM glossary WHERE id = ?", (term_id,))
conn.commit()
conn.close()
# Phase 3: Get all entities for embedding
def get_all_entities_for_embedding(self, project_id: str) -> List[Entity]:
"""获取所有实体用于 embedding 计算"""
return self.list_project_entities(project_id)
# Singleton instance