fix: auto-fix code issues (cron)
- 修复隐式 Optional 类型注解 (RUF013) - 修复不必要的赋值后返回 (RET504) - 优化列表推导式 (PERF401) - 修复未使用的参数 (ARG002) - 清理重复导入 - 优化异常处理
This commit is contained in:
@@ -385,7 +385,7 @@ class FullTextSearch:
|
||||
# 排序和分页
|
||||
scored_results.sort(key=lambda x: x.score, reverse=True)
|
||||
|
||||
return scored_results[offset: offset + limit]
|
||||
return scored_results[offset : offset + limit]
|
||||
|
||||
def _parse_boolean_query(self, query: str) -> dict:
|
||||
"""
|
||||
@@ -545,19 +545,24 @@ class FullTextSearch:
|
||||
return results
|
||||
|
||||
def _get_content_by_id(
|
||||
self, conn: sqlite3.Connection, content_id: str, content_type: str,
|
||||
self,
|
||||
conn: sqlite3.Connection,
|
||||
content_id: str,
|
||||
content_type: str,
|
||||
) -> str | None:
|
||||
"""根据ID获取内容"""
|
||||
try:
|
||||
if content_type == "transcript":
|
||||
row = conn.execute(
|
||||
"SELECT full_text FROM transcripts WHERE id = ?", (content_id,),
|
||||
"SELECT full_text FROM transcripts WHERE id = ?",
|
||||
(content_id,),
|
||||
).fetchone()
|
||||
return row["full_text"] if row else None
|
||||
|
||||
elif content_type == "entity":
|
||||
row = conn.execute(
|
||||
"SELECT name, definition FROM entities WHERE id = ?", (content_id,),
|
||||
"SELECT name, definition FROM entities WHERE id = ?",
|
||||
(content_id,),
|
||||
).fetchone()
|
||||
if row:
|
||||
return f"{row['name']} {row['definition'] or ''}"
|
||||
@@ -583,21 +588,27 @@ class FullTextSearch:
|
||||
return None
|
||||
|
||||
def _get_project_id(
|
||||
self, conn: sqlite3.Connection, content_id: str, content_type: str,
|
||||
self,
|
||||
conn: sqlite3.Connection,
|
||||
content_id: str,
|
||||
content_type: str,
|
||||
) -> str | None:
|
||||
"""获取内容所属的项目ID"""
|
||||
try:
|
||||
if content_type == "transcript":
|
||||
row = conn.execute(
|
||||
"SELECT project_id FROM transcripts WHERE id = ?", (content_id,),
|
||||
"SELECT project_id FROM transcripts WHERE id = ?",
|
||||
(content_id,),
|
||||
).fetchone()
|
||||
elif content_type == "entity":
|
||||
row = conn.execute(
|
||||
"SELECT project_id FROM entities WHERE id = ?", (content_id,),
|
||||
"SELECT project_id FROM entities WHERE id = ?",
|
||||
(content_id,),
|
||||
).fetchone()
|
||||
elif content_type == "relation":
|
||||
row = conn.execute(
|
||||
"SELECT project_id FROM entity_relations WHERE id = ?", (content_id,),
|
||||
"SELECT project_id FROM entity_relations WHERE id = ?",
|
||||
(content_id,),
|
||||
).fetchone()
|
||||
else:
|
||||
return None
|
||||
@@ -880,7 +891,11 @@ class SemanticSearch:
|
||||
return None
|
||||
|
||||
def index_embedding(
|
||||
self, content_id: str, content_type: str, project_id: str, text: str,
|
||||
self,
|
||||
content_id: str,
|
||||
content_type: str,
|
||||
project_id: str,
|
||||
text: str,
|
||||
) -> bool:
|
||||
"""
|
||||
为内容生成并保存 embedding
|
||||
@@ -1029,13 +1044,15 @@ class SemanticSearch:
|
||||
try:
|
||||
if content_type == "transcript":
|
||||
row = conn.execute(
|
||||
"SELECT full_text FROM transcripts WHERE id = ?", (content_id,),
|
||||
"SELECT full_text FROM transcripts WHERE id = ?",
|
||||
(content_id,),
|
||||
).fetchone()
|
||||
result = row["full_text"] if row else None
|
||||
|
||||
elif content_type == "entity":
|
||||
row = conn.execute(
|
||||
"SELECT name, definition FROM entities WHERE id = ?", (content_id,),
|
||||
"SELECT name, definition FROM entities WHERE id = ?",
|
||||
(content_id,),
|
||||
).fetchone()
|
||||
result = f"{row['name']}: {row['definition']}" if row else None
|
||||
|
||||
@@ -1067,7 +1084,10 @@ class SemanticSearch:
|
||||
return None
|
||||
|
||||
def find_similar_content(
|
||||
self, content_id: str, content_type: str, top_k: int = 5,
|
||||
self,
|
||||
content_id: str,
|
||||
content_type: str,
|
||||
top_k: int = 5,
|
||||
) -> list[SemanticSearchResult]:
|
||||
"""
|
||||
查找与指定内容相似的内容
|
||||
@@ -1175,7 +1195,10 @@ class EntityPathDiscovery:
|
||||
return conn
|
||||
|
||||
def find_shortest_path(
|
||||
self, source_entity_id: str, target_entity_id: str, max_depth: int = 5,
|
||||
self,
|
||||
source_entity_id: str,
|
||||
target_entity_id: str,
|
||||
max_depth: int = 5,
|
||||
) -> EntityPath | None:
|
||||
"""
|
||||
查找两个实体之间的最短路径(BFS算法)
|
||||
@@ -1192,7 +1215,8 @@ class EntityPathDiscovery:
|
||||
|
||||
# 获取项目ID
|
||||
row = conn.execute(
|
||||
"SELECT project_id FROM entities WHERE id = ?", (source_entity_id,),
|
||||
"SELECT project_id FROM entities WHERE id = ?",
|
||||
(source_entity_id,),
|
||||
).fetchone()
|
||||
|
||||
if not row:
|
||||
@@ -1250,7 +1274,11 @@ class EntityPathDiscovery:
|
||||
return None
|
||||
|
||||
def find_all_paths(
|
||||
self, source_entity_id: str, target_entity_id: str, max_depth: int = 4, max_paths: int = 10,
|
||||
self,
|
||||
source_entity_id: str,
|
||||
target_entity_id: str,
|
||||
max_depth: int = 4,
|
||||
max_paths: int = 10,
|
||||
) -> list[EntityPath]:
|
||||
"""
|
||||
查找两个实体之间的所有路径(限制数量和深度)
|
||||
@@ -1268,7 +1296,8 @@ class EntityPathDiscovery:
|
||||
|
||||
# 获取项目ID
|
||||
row = conn.execute(
|
||||
"SELECT project_id FROM entities WHERE id = ?", (source_entity_id,),
|
||||
"SELECT project_id FROM entities WHERE id = ?",
|
||||
(source_entity_id,),
|
||||
).fetchone()
|
||||
|
||||
if not row:
|
||||
@@ -1280,7 +1309,11 @@ class EntityPathDiscovery:
|
||||
paths = []
|
||||
|
||||
def dfs(
|
||||
current_id: str, target_id: str, path: list[str], visited: set[str], depth: int,
|
||||
current_id: str,
|
||||
target_id: str,
|
||||
path: list[str],
|
||||
visited: set[str],
|
||||
depth: int,
|
||||
) -> None:
|
||||
if depth > max_depth:
|
||||
return
|
||||
@@ -1328,7 +1361,8 @@ class EntityPathDiscovery:
|
||||
nodes = []
|
||||
for entity_id in entity_ids:
|
||||
row = conn.execute(
|
||||
"SELECT id, name, type FROM entities WHERE id = ?", (entity_id,),
|
||||
"SELECT id, name, type FROM entities WHERE id = ?",
|
||||
(entity_id,),
|
||||
).fetchone()
|
||||
if row:
|
||||
nodes.append({"id": row["id"], "name": row["name"], "type": row["type"]})
|
||||
@@ -1398,7 +1432,8 @@ class EntityPathDiscovery:
|
||||
|
||||
# 获取项目ID
|
||||
row = conn.execute(
|
||||
"SELECT project_id, name FROM entities WHERE id = ?", (entity_id,),
|
||||
"SELECT project_id, name FROM entities WHERE id = ?",
|
||||
(entity_id,),
|
||||
).fetchone()
|
||||
|
||||
if not row:
|
||||
@@ -1445,7 +1480,8 @@ class EntityPathDiscovery:
|
||||
|
||||
# 获取邻居信息
|
||||
neighbor_info = conn.execute(
|
||||
"SELECT name, type FROM entities WHERE id = ?", (neighbor_id,),
|
||||
"SELECT name, type FROM entities WHERE id = ?",
|
||||
(neighbor_id,),
|
||||
).fetchone()
|
||||
|
||||
if neighbor_info:
|
||||
@@ -1458,7 +1494,10 @@ class EntityPathDiscovery:
|
||||
"relation_type": neighbor["relation_type"],
|
||||
"evidence": neighbor["evidence"],
|
||||
"path": self._get_path_to_entity(
|
||||
entity_id, neighbor_id, project_id, conn,
|
||||
entity_id,
|
||||
neighbor_id,
|
||||
project_id,
|
||||
conn,
|
||||
),
|
||||
},
|
||||
)
|
||||
@@ -1470,7 +1509,11 @@ class EntityPathDiscovery:
|
||||
return relations
|
||||
|
||||
def _get_path_to_entity(
|
||||
self, source_id: str, target_id: str, project_id: str, conn: sqlite3.Connection,
|
||||
self,
|
||||
source_id: str,
|
||||
target_id: str,
|
||||
project_id: str,
|
||||
conn: sqlite3.Connection,
|
||||
) -> list[str]:
|
||||
"""获取从源实体到目标实体的路径(简化版)"""
|
||||
# BFS 找路径
|
||||
@@ -1565,7 +1608,8 @@ class EntityPathDiscovery:
|
||||
|
||||
# 获取所有实体
|
||||
entities = conn.execute(
|
||||
"SELECT id, name FROM entities WHERE project_id = ?", (project_id,),
|
||||
"SELECT id, name FROM entities WHERE project_id = ?",
|
||||
(project_id,),
|
||||
).fetchall()
|
||||
|
||||
# 计算每个实体作为桥梁的次数
|
||||
@@ -1706,7 +1750,8 @@ class KnowledgeGapDetection:
|
||||
|
||||
# 检查每个实体的属性完整性
|
||||
entities = conn.execute(
|
||||
"SELECT id, name FROM entities WHERE project_id = ?", (project_id,),
|
||||
"SELECT id, name FROM entities WHERE project_id = ?",
|
||||
(project_id,),
|
||||
).fetchall()
|
||||
|
||||
for entity in entities:
|
||||
@@ -1714,7 +1759,8 @@ class KnowledgeGapDetection:
|
||||
|
||||
# 获取实体已有的属性
|
||||
existing_attrs = conn.execute(
|
||||
"SELECT template_id FROM entity_attributes WHERE entity_id = ?", (entity_id,),
|
||||
"SELECT template_id FROM entity_attributes WHERE entity_id = ?",
|
||||
(entity_id,),
|
||||
).fetchall()
|
||||
|
||||
existing_template_ids = {a["template_id"] for a in existing_attrs}
|
||||
@@ -1726,7 +1772,8 @@ class KnowledgeGapDetection:
|
||||
missing_names = []
|
||||
for template_id in missing_templates:
|
||||
template = conn.execute(
|
||||
"SELECT name FROM attribute_templates WHERE id = ?", (template_id,),
|
||||
"SELECT name FROM attribute_templates WHERE id = ?",
|
||||
(template_id,),
|
||||
).fetchone()
|
||||
if template:
|
||||
missing_names.append(template["name"])
|
||||
@@ -1759,7 +1806,8 @@ class KnowledgeGapDetection:
|
||||
|
||||
# 获取所有实体及其关系数量
|
||||
entities = conn.execute(
|
||||
"SELECT id, name, type FROM entities WHERE project_id = ?", (project_id,),
|
||||
"SELECT id, name, type FROM entities WHERE project_id = ?",
|
||||
(project_id,),
|
||||
).fetchall()
|
||||
|
||||
for entity in entities:
|
||||
@@ -1900,7 +1948,8 @@ class KnowledgeGapDetection:
|
||||
|
||||
# 分析转录文本中频繁提及但未提取为实体的词
|
||||
transcripts = conn.execute(
|
||||
"SELECT full_text FROM transcripts WHERE project_id = ?", (project_id,),
|
||||
"SELECT full_text FROM transcripts WHERE project_id = ?",
|
||||
(project_id,),
|
||||
).fetchall()
|
||||
|
||||
# 合并所有文本
|
||||
@@ -1908,7 +1957,8 @@ class KnowledgeGapDetection:
|
||||
|
||||
# 获取现有实体名称
|
||||
existing_entities = conn.execute(
|
||||
"SELECT name FROM entities WHERE project_id = ?", (project_id,),
|
||||
"SELECT name FROM entities WHERE project_id = ?",
|
||||
(project_id,),
|
||||
).fetchall()
|
||||
|
||||
existing_names = {e["name"].lower() for e in existing_entities}
|
||||
@@ -2146,7 +2196,10 @@ class SearchManager:
|
||||
|
||||
for t in transcripts:
|
||||
if t["full_text"] and self.semantic_search.index_embedding(
|
||||
t["id"], "transcript", t["project_id"], t["full_text"],
|
||||
t["id"],
|
||||
"transcript",
|
||||
t["project_id"],
|
||||
t["full_text"],
|
||||
):
|
||||
semantic_stats["indexed"] += 1
|
||||
else:
|
||||
@@ -2179,12 +2232,14 @@ class SearchManager:
|
||||
|
||||
# 全文索引统计
|
||||
fulltext_count = conn.execute(
|
||||
f"SELECT COUNT(*) as count FROM search_indexes {where_clause}", params,
|
||||
f"SELECT COUNT(*) as count FROM search_indexes {where_clause}",
|
||||
params,
|
||||
).fetchone()["count"]
|
||||
|
||||
# 语义索引统计
|
||||
semantic_count = conn.execute(
|
||||
f"SELECT COUNT(*) as count FROM embeddings {where_clause}", params,
|
||||
f"SELECT COUNT(*) as count FROM embeddings {where_clause}",
|
||||
params,
|
||||
).fetchone()["count"]
|
||||
|
||||
# 按类型统计
|
||||
@@ -2225,7 +2280,9 @@ def get_search_manager(db_path: str = "insightflow.db") -> SearchManager:
|
||||
|
||||
|
||||
def fulltext_search(
|
||||
query: str, project_id: str | None = None, limit: int = 20,
|
||||
query: str,
|
||||
project_id: str | None = None,
|
||||
limit: int = 20,
|
||||
) -> list[SearchResult]:
|
||||
"""全文搜索便捷函数"""
|
||||
manager = get_search_manager()
|
||||
@@ -2233,7 +2290,9 @@ def fulltext_search(
|
||||
|
||||
|
||||
def semantic_search(
|
||||
query: str, project_id: str | None = None, top_k: int = 10,
|
||||
query: str,
|
||||
project_id: str | None = None,
|
||||
top_k: int = 10,
|
||||
) -> list[SemanticSearchResult]:
|
||||
"""语义搜索便捷函数"""
|
||||
manager = get_search_manager()
|
||||
|
||||
Reference in New Issue
Block a user