fix: auto-fix code issues (cron)
- 修复重复导入/字段 - 修复异常处理 - 修复PEP8格式问题 - 添加类型注解 - 修复缺失的urllib.parse导入
This commit is contained in:
@@ -19,6 +19,7 @@ from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class SearchOperator(Enum):
|
||||
"""搜索操作符"""
|
||||
|
||||
@@ -26,6 +27,7 @@ class SearchOperator(Enum):
|
||||
OR = "OR"
|
||||
NOT = "NOT"
|
||||
|
||||
|
||||
# 尝试导入 sentence-transformers 用于语义搜索
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
@@ -37,6 +39,7 @@ except ImportError:
|
||||
|
||||
# ==================== 数据模型 ====================
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
"""搜索结果数据模型"""
|
||||
@@ -60,6 +63,7 @@ class SearchResult:
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class SemanticSearchResult:
|
||||
"""语义搜索结果数据模型"""
|
||||
@@ -85,6 +89,7 @@ class SemanticSearchResult:
|
||||
result["embedding_dim"] = len(self.embedding)
|
||||
return result
|
||||
|
||||
|
||||
@dataclass
|
||||
class EntityPath:
|
||||
"""实体关系路径数据模型"""
|
||||
@@ -114,6 +119,7 @@ class EntityPath:
|
||||
"path_description": self.path_description,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class KnowledgeGap:
|
||||
"""知识缺口数据模型"""
|
||||
@@ -141,6 +147,7 @@ class KnowledgeGap:
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchIndex:
|
||||
"""搜索索引数据模型"""
|
||||
@@ -154,6 +161,7 @@ class SearchIndex:
|
||||
created_at: str
|
||||
updated_at: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class TextEmbedding:
|
||||
"""文本 Embedding 数据模型"""
|
||||
@@ -166,8 +174,10 @@ class TextEmbedding:
|
||||
model_name: str
|
||||
created_at: str
|
||||
|
||||
|
||||
# ==================== 全文搜索 ====================
|
||||
|
||||
|
||||
class FullTextSearch:
|
||||
"""
|
||||
全文搜索模块
|
||||
@@ -222,10 +232,14 @@ class FullTextSearch:
|
||||
""")
|
||||
|
||||
# 创建索引
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_search_content ON search_indexes(content_id, content_type)")
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_search_content ON search_indexes(content_id, content_type)"
|
||||
)
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_search_project ON search_indexes(project_id)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_term_freq_term ON search_term_freq(term)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_term_freq_project ON search_term_freq(project_id)")
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_term_freq_project ON search_term_freq(project_id)"
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
@@ -320,7 +334,14 @@ class FullTextSearch:
|
||||
(term, content_id, content_type, project_id, frequency, positions)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(token, content_id, content_type, project_id, freq, json.dumps(positions, ensure_ascii=False)),
|
||||
(
|
||||
token,
|
||||
content_id,
|
||||
content_type,
|
||||
project_id,
|
||||
freq,
|
||||
json.dumps(positions, ensure_ascii=False),
|
||||
),
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
@@ -364,7 +385,7 @@ class FullTextSearch:
|
||||
# 排序和分页
|
||||
scored_results.sort(key=lambda x: x.score, reverse=True)
|
||||
|
||||
return scored_results[offset: offset + limit]
|
||||
return scored_results[offset : offset + limit]
|
||||
|
||||
def _parse_boolean_query(self, query: str) -> dict:
|
||||
"""
|
||||
@@ -405,7 +426,10 @@ class FullTextSearch:
|
||||
return {"and": and_terms + phrases, "or": or_terms, "not": not_terms, "phrases": phrases}
|
||||
|
||||
def _execute_boolean_search(
|
||||
self, parsed_query: dict, project_id: str | None = None, content_types: list[str] | None = None
|
||||
self,
|
||||
parsed_query: dict,
|
||||
project_id: str | None = None,
|
||||
content_types: list[str] | None = None,
|
||||
) -> list[dict]:
|
||||
"""执行布尔搜索"""
|
||||
conn = self._get_conn()
|
||||
@@ -510,7 +534,8 @@ class FullTextSearch:
|
||||
{
|
||||
"id": content_id,
|
||||
"content_type": content_type,
|
||||
"project_id": project_id or self._get_project_id(conn, content_id, content_type),
|
||||
"project_id": project_id
|
||||
or self._get_project_id(conn, content_id, content_type),
|
||||
"content": content,
|
||||
"terms": parsed_query["and"] + parsed_query["or"] + parsed_query["phrases"],
|
||||
}
|
||||
@@ -519,15 +544,21 @@ class FullTextSearch:
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
def _get_content_by_id(self, conn: sqlite3.Connection, content_id: str, content_type: str) -> str | None:
|
||||
def _get_content_by_id(
|
||||
self, conn: sqlite3.Connection, content_id: str, content_type: str
|
||||
) -> str | None:
|
||||
"""根据ID获取内容"""
|
||||
try:
|
||||
if content_type == "transcript":
|
||||
row = conn.execute("SELECT full_text FROM transcripts WHERE id = ?", (content_id,)).fetchone()
|
||||
row = conn.execute(
|
||||
"SELECT full_text FROM transcripts WHERE id = ?", (content_id,)
|
||||
).fetchone()
|
||||
return row["full_text"] if row else None
|
||||
|
||||
elif content_type == "entity":
|
||||
row = conn.execute("SELECT name, definition FROM entities WHERE id = ?", (content_id,)).fetchone()
|
||||
row = conn.execute(
|
||||
"SELECT name, definition FROM entities WHERE id = ?", (content_id,)
|
||||
).fetchone()
|
||||
if row:
|
||||
return f"{row['name']} {row['definition'] or ''}"
|
||||
return None
|
||||
@@ -551,15 +582,23 @@ class FullTextSearch:
|
||||
print(f"获取内容失败: {e}")
|
||||
return None
|
||||
|
||||
def _get_project_id(self, conn: sqlite3.Connection, content_id: str, content_type: str) -> str | None:
|
||||
def _get_project_id(
|
||||
self, conn: sqlite3.Connection, content_id: str, content_type: str
|
||||
) -> str | None:
|
||||
"""获取内容所属的项目ID"""
|
||||
try:
|
||||
if content_type == "transcript":
|
||||
row = conn.execute("SELECT project_id FROM transcripts WHERE id = ?", (content_id,)).fetchone()
|
||||
row = conn.execute(
|
||||
"SELECT project_id FROM transcripts WHERE id = ?", (content_id,)
|
||||
).fetchone()
|
||||
elif content_type == "entity":
|
||||
row = conn.execute("SELECT project_id FROM entities WHERE id = ?", (content_id,)).fetchone()
|
||||
row = conn.execute(
|
||||
"SELECT project_id FROM entities WHERE id = ?", (content_id,)
|
||||
).fetchone()
|
||||
elif content_type == "relation":
|
||||
row = conn.execute("SELECT project_id FROM entity_relations WHERE id = ?", (content_id,)).fetchone()
|
||||
row = conn.execute(
|
||||
"SELECT project_id FROM entity_relations WHERE id = ?", (content_id,)
|
||||
).fetchone()
|
||||
else:
|
||||
return None
|
||||
|
||||
@@ -673,12 +712,14 @@ class FullTextSearch:
|
||||
|
||||
# 删除索引
|
||||
conn.execute(
|
||||
"DELETE FROM search_indexes WHERE content_id = ? AND content_type = ?", (content_id, content_type)
|
||||
"DELETE FROM search_indexes WHERE content_id = ? AND content_type = ?",
|
||||
(content_id, content_type),
|
||||
)
|
||||
|
||||
# 删除词频统计
|
||||
conn.execute(
|
||||
"DELETE FROM search_term_freq WHERE content_id = ? AND content_type = ?", (content_id, content_type)
|
||||
"DELETE FROM search_term_freq WHERE content_id = ? AND content_type = ?",
|
||||
(content_id, content_type),
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
@@ -696,7 +737,8 @@ class FullTextSearch:
|
||||
try:
|
||||
# 索引转录文本
|
||||
transcripts = conn.execute(
|
||||
"SELECT id, project_id, full_text FROM transcripts WHERE project_id = ?", (project_id,)
|
||||
"SELECT id, project_id, full_text FROM transcripts WHERE project_id = ?",
|
||||
(project_id,),
|
||||
).fetchall()
|
||||
|
||||
for t in transcripts:
|
||||
@@ -708,7 +750,8 @@ class FullTextSearch:
|
||||
|
||||
# 索引实体
|
||||
entities = conn.execute(
|
||||
"SELECT id, project_id, name, definition FROM entities WHERE project_id = ?", (project_id,)
|
||||
"SELECT id, project_id, name, definition FROM entities WHERE project_id = ?",
|
||||
(project_id,),
|
||||
).fetchall()
|
||||
|
||||
for e in entities:
|
||||
@@ -743,8 +786,10 @@ class FullTextSearch:
|
||||
conn.close()
|
||||
return stats
|
||||
|
||||
|
||||
# ==================== 语义搜索 ====================
|
||||
|
||||
|
||||
class SemanticSearch:
|
||||
"""
|
||||
语义搜索模块
|
||||
@@ -756,7 +801,11 @@ class SemanticSearch:
|
||||
- 语义相似内容推荐
|
||||
"""
|
||||
|
||||
def __init__(self, db_path: str = "insightflow.db", model_name: str = "paraphrase-multilingual-MiniLM-L12-v2"):
|
||||
def __init__(
|
||||
self,
|
||||
db_path: str = "insightflow.db",
|
||||
model_name: str = "paraphrase-multilingual-MiniLM-L12-v2",
|
||||
):
|
||||
self.db_path = db_path
|
||||
self.model_name = model_name
|
||||
self.model = None
|
||||
@@ -793,7 +842,9 @@ class SemanticSearch:
|
||||
)
|
||||
""")
|
||||
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_embedding_content ON embeddings(content_id, content_type)")
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_embedding_content ON embeddings(content_id, content_type)"
|
||||
)
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_embedding_project ON embeddings(project_id)")
|
||||
|
||||
conn.commit()
|
||||
@@ -828,7 +879,9 @@ class SemanticSearch:
|
||||
print(f"生成 embedding 失败: {e}")
|
||||
return None
|
||||
|
||||
def index_embedding(self, content_id: str, content_type: str, project_id: str, text: str) -> bool:
|
||||
def index_embedding(
|
||||
self, content_id: str, content_type: str, project_id: str, text: str
|
||||
) -> bool:
|
||||
"""
|
||||
为内容生成并保存 embedding
|
||||
|
||||
@@ -975,11 +1028,15 @@ class SemanticSearch:
|
||||
|
||||
try:
|
||||
if content_type == "transcript":
|
||||
row = conn.execute("SELECT full_text FROM transcripts WHERE id = ?", (content_id,)).fetchone()
|
||||
row = conn.execute(
|
||||
"SELECT full_text FROM transcripts WHERE id = ?", (content_id,)
|
||||
).fetchone()
|
||||
result = row["full_text"] if row else None
|
||||
|
||||
elif content_type == "entity":
|
||||
row = conn.execute("SELECT name, definition FROM entities WHERE id = ?", (content_id,)).fetchone()
|
||||
row = conn.execute(
|
||||
"SELECT name, definition FROM entities WHERE id = ?", (content_id,)
|
||||
).fetchone()
|
||||
result = f"{row['name']}: {row['definition']}" if row else None
|
||||
|
||||
elif content_type == "relation":
|
||||
@@ -992,7 +1049,11 @@ class SemanticSearch:
|
||||
WHERE r.id = ?""",
|
||||
(content_id,),
|
||||
).fetchone()
|
||||
result = f"{row['source_name']} {row['relation_type']} {row['target_name']}" if row else None
|
||||
result = (
|
||||
f"{row['source_name']} {row['relation_type']} {row['target_name']}"
|
||||
if row
|
||||
else None
|
||||
)
|
||||
|
||||
else:
|
||||
result = None
|
||||
@@ -1005,7 +1066,9 @@ class SemanticSearch:
|
||||
print(f"获取内容失败: {e}")
|
||||
return None
|
||||
|
||||
def find_similar_content(self, content_id: str, content_type: str, top_k: int = 5) -> list[SemanticSearchResult]:
|
||||
def find_similar_content(
|
||||
self, content_id: str, content_type: str, top_k: int = 5
|
||||
) -> list[SemanticSearchResult]:
|
||||
"""
|
||||
查找与指定内容相似的内容
|
||||
|
||||
@@ -1076,7 +1139,10 @@ class SemanticSearch:
|
||||
"""删除内容的 embedding"""
|
||||
try:
|
||||
conn = self._get_conn()
|
||||
conn.execute("DELETE FROM embeddings WHERE content_id = ? AND content_type = ?", (content_id, content_type))
|
||||
conn.execute(
|
||||
"DELETE FROM embeddings WHERE content_id = ? AND content_type = ?",
|
||||
(content_id, content_type),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return True
|
||||
@@ -1084,8 +1150,10 @@ class SemanticSearch:
|
||||
print(f"删除 embedding 失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
# ==================== 实体关系路径发现 ====================
|
||||
|
||||
|
||||
class EntityPathDiscovery:
|
||||
"""
|
||||
实体关系路径发现模块
|
||||
@@ -1106,7 +1174,9 @@ class EntityPathDiscovery:
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
def find_shortest_path(self, source_entity_id: str, target_entity_id: str, max_depth: int = 5) -> EntityPath | None:
|
||||
def find_shortest_path(
|
||||
self, source_entity_id: str, target_entity_id: str, max_depth: int = 5
|
||||
) -> EntityPath | None:
|
||||
"""
|
||||
查找两个实体之间的最短路径(BFS算法)
|
||||
|
||||
@@ -1121,7 +1191,9 @@ class EntityPathDiscovery:
|
||||
conn = self._get_conn()
|
||||
|
||||
# 获取项目ID
|
||||
row = conn.execute("SELECT project_id FROM entities WHERE id = ?", (source_entity_id,)).fetchone()
|
||||
row = conn.execute(
|
||||
"SELECT project_id FROM entities WHERE id = ?", (source_entity_id,)
|
||||
).fetchone()
|
||||
|
||||
if not row:
|
||||
conn.close()
|
||||
@@ -1194,7 +1266,9 @@ class EntityPathDiscovery:
|
||||
conn = self._get_conn()
|
||||
|
||||
# 获取项目ID
|
||||
row = conn.execute("SELECT project_id FROM entities WHERE id = ?", (source_entity_id,)).fetchone()
|
||||
row = conn.execute(
|
||||
"SELECT project_id FROM entities WHERE id = ?", (source_entity_id,)
|
||||
).fetchone()
|
||||
|
||||
if not row:
|
||||
conn.close()
|
||||
@@ -1250,7 +1324,9 @@ class EntityPathDiscovery:
|
||||
# 获取实体信息
|
||||
nodes = []
|
||||
for entity_id in entity_ids:
|
||||
row = conn.execute("SELECT id, name, type FROM entities WHERE id = ?", (entity_id,)).fetchone()
|
||||
row = conn.execute(
|
||||
"SELECT id, name, type FROM entities WHERE id = ?", (entity_id,)
|
||||
).fetchone()
|
||||
if row:
|
||||
nodes.append({"id": row["id"], "name": row["name"], "type": row["type"]})
|
||||
|
||||
@@ -1318,7 +1394,9 @@ class EntityPathDiscovery:
|
||||
conn = self._get_conn()
|
||||
|
||||
# 获取项目ID
|
||||
row = conn.execute("SELECT project_id, name FROM entities WHERE id = ?", (entity_id,)).fetchone()
|
||||
row = conn.execute(
|
||||
"SELECT project_id, name FROM entities WHERE id = ?", (entity_id,)
|
||||
).fetchone()
|
||||
|
||||
if not row:
|
||||
conn.close()
|
||||
@@ -1376,7 +1454,9 @@ class EntityPathDiscovery:
|
||||
"hops": depth + 1,
|
||||
"relation_type": neighbor["relation_type"],
|
||||
"evidence": neighbor["evidence"],
|
||||
"path": self._get_path_to_entity(entity_id, neighbor_id, project_id, conn),
|
||||
"path": self._get_path_to_entity(
|
||||
entity_id, neighbor_id, project_id, conn
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -1481,7 +1561,9 @@ class EntityPathDiscovery:
|
||||
conn = self._get_conn()
|
||||
|
||||
# 获取所有实体
|
||||
entities = conn.execute("SELECT id, name FROM entities WHERE project_id = ?", (project_id,)).fetchall()
|
||||
entities = conn.execute(
|
||||
"SELECT id, name FROM entities WHERE project_id = ?", (project_id,)
|
||||
).fetchall()
|
||||
|
||||
# 计算每个实体作为桥梁的次数
|
||||
bridge_scores = []
|
||||
@@ -1512,10 +1594,10 @@ class EntityPathDiscovery:
|
||||
f"""
|
||||
SELECT COUNT(*) as count
|
||||
FROM entity_relations
|
||||
WHERE ((source_entity_id IN ({','.join(['?' for _ in neighbor_ids])})
|
||||
AND target_entity_id IN ({','.join(['?' for _ in neighbor_ids])}))
|
||||
OR (target_entity_id IN ({','.join(['?' for _ in neighbor_ids])})
|
||||
AND source_entity_id IN ({','.join(['?' for _ in neighbor_ids])})))
|
||||
WHERE ((source_entity_id IN ({",".join(["?" for _ in neighbor_ids])})
|
||||
AND target_entity_id IN ({",".join(["?" for _ in neighbor_ids])}))
|
||||
OR (target_entity_id IN ({",".join(["?" for _ in neighbor_ids])})
|
||||
AND source_entity_id IN ({",".join(["?" for _ in neighbor_ids])})))
|
||||
AND project_id = ?
|
||||
""",
|
||||
list(neighbor_ids) * 4 + [project_id],
|
||||
@@ -1541,8 +1623,10 @@ class EntityPathDiscovery:
|
||||
bridge_scores.sort(key=lambda x: x["bridge_score"], reverse=True)
|
||||
return bridge_scores[:20] # 返回前20
|
||||
|
||||
|
||||
# ==================== 知识缺口识别 ====================
|
||||
|
||||
|
||||
class KnowledgeGapDetection:
|
||||
"""
|
||||
知识缺口识别模块
|
||||
@@ -1603,7 +1687,8 @@ class KnowledgeGapDetection:
|
||||
|
||||
# 获取项目的属性模板
|
||||
templates = conn.execute(
|
||||
"SELECT id, name, type, is_required FROM attribute_templates WHERE project_id = ?", (project_id,)
|
||||
"SELECT id, name, type, is_required FROM attribute_templates WHERE project_id = ?",
|
||||
(project_id,),
|
||||
).fetchall()
|
||||
|
||||
if not templates:
|
||||
@@ -1617,7 +1702,9 @@ class KnowledgeGapDetection:
|
||||
return []
|
||||
|
||||
# 检查每个实体的属性完整性
|
||||
entities = conn.execute("SELECT id, name FROM entities WHERE project_id = ?", (project_id,)).fetchall()
|
||||
entities = conn.execute(
|
||||
"SELECT id, name FROM entities WHERE project_id = ?", (project_id,)
|
||||
).fetchall()
|
||||
|
||||
for entity in entities:
|
||||
entity_id = entity["id"]
|
||||
@@ -1668,7 +1755,9 @@ class KnowledgeGapDetection:
|
||||
gaps = []
|
||||
|
||||
# 获取所有实体及其关系数量
|
||||
entities = conn.execute("SELECT id, name, type FROM entities WHERE project_id = ?", (project_id,)).fetchall()
|
||||
entities = conn.execute(
|
||||
"SELECT id, name, type FROM entities WHERE project_id = ?", (project_id,)
|
||||
).fetchall()
|
||||
|
||||
for entity in entities:
|
||||
entity_id = entity["id"]
|
||||
@@ -1807,13 +1896,17 @@ class KnowledgeGapDetection:
|
||||
gaps = []
|
||||
|
||||
# 分析转录文本中频繁提及但未提取为实体的词
|
||||
transcripts = conn.execute("SELECT full_text FROM transcripts WHERE project_id = ?", (project_id,)).fetchall()
|
||||
transcripts = conn.execute(
|
||||
"SELECT full_text FROM transcripts WHERE project_id = ?", (project_id,)
|
||||
).fetchall()
|
||||
|
||||
# 合并所有文本
|
||||
all_text = " ".join([t["full_text"] or "" for t in transcripts])
|
||||
|
||||
# 获取现有实体名称
|
||||
existing_entities = conn.execute("SELECT name FROM entities WHERE project_id = ?", (project_id,)).fetchall()
|
||||
existing_entities = conn.execute(
|
||||
"SELECT name FROM entities WHERE project_id = ?", (project_id,)
|
||||
).fetchall()
|
||||
|
||||
existing_names = {e["name"].lower() for e in existing_entities}
|
||||
|
||||
@@ -1838,7 +1931,10 @@ class KnowledgeGapDetection:
|
||||
entity_name=None,
|
||||
description=f"文本中频繁提及 '{entity}' 但未提取为实体(出现 {count} 次)",
|
||||
severity="low",
|
||||
suggestions=[f"考虑将 '{entity}' 添加为实体", "检查实体提取算法是否需要优化"],
|
||||
suggestions=[
|
||||
f"考虑将 '{entity}' 添加为实体",
|
||||
"检查实体提取算法是否需要优化",
|
||||
],
|
||||
related_entities=[],
|
||||
metadata={"mention_count": count},
|
||||
)
|
||||
@@ -1898,7 +1994,11 @@ class KnowledgeGapDetection:
|
||||
"relation_count": stats["relation_count"],
|
||||
"transcript_count": stats["transcript_count"],
|
||||
},
|
||||
"gap_summary": {"total": len(gaps), "by_type": dict(gap_by_type), "by_severity": severity_count},
|
||||
"gap_summary": {
|
||||
"total": len(gaps),
|
||||
"by_type": dict(gap_by_type),
|
||||
"by_severity": severity_count,
|
||||
},
|
||||
"top_gaps": [g.to_dict() for g in gaps[:10]],
|
||||
"recommendations": self._generate_recommendations(gaps),
|
||||
}
|
||||
@@ -1929,8 +2029,10 @@ class KnowledgeGapDetection:
|
||||
|
||||
return recommendations
|
||||
|
||||
|
||||
# ==================== 搜索管理器 ====================
|
||||
|
||||
|
||||
class SearchManager:
|
||||
"""
|
||||
搜索管理器 - 统一入口
|
||||
@@ -2035,7 +2137,8 @@ class SearchManager:
|
||||
|
||||
# 索引转录文本
|
||||
transcripts = conn.execute(
|
||||
"SELECT id, project_id, full_text FROM transcripts WHERE project_id = ?", (project_id,)
|
||||
"SELECT id, project_id, full_text FROM transcripts WHERE project_id = ?",
|
||||
(project_id,),
|
||||
).fetchall()
|
||||
|
||||
for t in transcripts:
|
||||
@@ -2048,7 +2151,8 @@ class SearchManager:
|
||||
|
||||
# 索引实体
|
||||
entities = conn.execute(
|
||||
"SELECT id, project_id, name, definition FROM entities WHERE project_id = ?", (project_id,)
|
||||
"SELECT id, project_id, name, definition FROM entities WHERE project_id = ?",
|
||||
(project_id,),
|
||||
).fetchall()
|
||||
|
||||
for e in entities:
|
||||
@@ -2076,9 +2180,9 @@ class SearchManager:
|
||||
).fetchone()["count"]
|
||||
|
||||
# 语义索引统计
|
||||
semantic_count = conn.execute(f"SELECT COUNT(*) as count FROM embeddings {where_clause}", params).fetchone()[
|
||||
"count"
|
||||
]
|
||||
semantic_count = conn.execute(
|
||||
f"SELECT COUNT(*) as count FROM embeddings {where_clause}", params
|
||||
).fetchone()["count"]
|
||||
|
||||
# 按类型统计
|
||||
type_stats = {}
|
||||
@@ -2101,9 +2205,11 @@ class SearchManager:
|
||||
"semantic_search_available": self.semantic_search.is_available(),
|
||||
}
|
||||
|
||||
|
||||
# 单例模式
|
||||
_search_manager = None
|
||||
|
||||
|
||||
def get_search_manager(db_path: str = "insightflow.db") -> SearchManager:
|
||||
"""获取搜索管理器单例"""
|
||||
global _search_manager
|
||||
@@ -2111,22 +2217,30 @@ def get_search_manager(db_path: str = "insightflow.db") -> SearchManager:
|
||||
_search_manager = SearchManager(db_path)
|
||||
return _search_manager
|
||||
|
||||
|
||||
# 便捷函数
|
||||
def fulltext_search(query: str, project_id: str | None = None, limit: int = 20) -> list[SearchResult]:
|
||||
def fulltext_search(
|
||||
query: str, project_id: str | None = None, limit: int = 20
|
||||
) -> list[SearchResult]:
|
||||
"""全文搜索便捷函数"""
|
||||
manager = get_search_manager()
|
||||
return manager.fulltext_search.search(query, project_id, limit=limit)
|
||||
|
||||
def semantic_search(query: str, project_id: str | None = None, top_k: int = 10) -> list[SemanticSearchResult]:
|
||||
|
||||
def semantic_search(
|
||||
query: str, project_id: str | None = None, top_k: int = 10
|
||||
) -> list[SemanticSearchResult]:
|
||||
"""语义搜索便捷函数"""
|
||||
manager = get_search_manager()
|
||||
return manager.semantic_search.search(query, project_id, top_k=top_k)
|
||||
|
||||
|
||||
def find_entity_path(source_id: str, target_id: str, max_depth: int = 5) -> EntityPath | None:
|
||||
"""查找实体路径便捷函数"""
|
||||
manager = get_search_manager()
|
||||
return manager.path_discovery.find_shortest_path(source_id, target_id, max_depth)
|
||||
|
||||
|
||||
def detect_knowledge_gaps(project_id: str) -> list[KnowledgeGap]:
|
||||
"""知识缺口检测便捷函数"""
|
||||
manager = get_search_manager()
|
||||
|
||||
Reference in New Issue
Block a user