fix: auto-fix code issues (cron)

- 修复重复导入/字段
- 修复异常处理
- 修复PEP8格式问题
- 添加类型注解
This commit is contained in:
AutoFix Bot
2026-03-03 06:03:38 +08:00
parent 2a0ed6af4d
commit 9fd1da8fb7
41 changed files with 901 additions and 768 deletions

View File

@@ -233,12 +233,12 @@ class FullTextSearch:
# 创建索引
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_search_content ON search_indexes(content_id, content_type)"
"CREATE INDEX IF NOT EXISTS idx_search_content ON search_indexes(content_id, content_type)",
)
conn.execute("CREATE INDEX IF NOT EXISTS idx_search_project ON search_indexes(project_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_term_freq_term ON search_term_freq(term)")
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_term_freq_project ON search_term_freq(project_id)"
"CREATE INDEX IF NOT EXISTS idx_term_freq_project ON search_term_freq(project_id)",
)
conn.commit()
@@ -538,26 +538,26 @@ class FullTextSearch:
or self._get_project_id(conn, content_id, content_type),
"content": content,
"terms": parsed_query["and"] + parsed_query["or"] + parsed_query["phrases"],
}
},
)
conn.close()
return results
def _get_content_by_id(
self, conn: sqlite3.Connection, content_id: str, content_type: str
self, conn: sqlite3.Connection, content_id: str, content_type: str,
) -> str | None:
"""根据ID获取内容"""
try:
if content_type == "transcript":
row = conn.execute(
"SELECT full_text FROM transcripts WHERE id = ?", (content_id,)
"SELECT full_text FROM transcripts WHERE id = ?", (content_id,),
).fetchone()
return row["full_text"] if row else None
elif content_type == "entity":
row = conn.execute(
"SELECT name, definition FROM entities WHERE id = ?", (content_id,)
"SELECT name, definition FROM entities WHERE id = ?", (content_id,),
).fetchone()
if row:
return f"{row['name']} {row['definition'] or ''}"
@@ -583,21 +583,21 @@ class FullTextSearch:
return None
def _get_project_id(
self, conn: sqlite3.Connection, content_id: str, content_type: str
self, conn: sqlite3.Connection, content_id: str, content_type: str,
) -> str | None:
"""获取内容所属的项目ID"""
try:
if content_type == "transcript":
row = conn.execute(
"SELECT project_id FROM transcripts WHERE id = ?", (content_id,)
"SELECT project_id FROM transcripts WHERE id = ?", (content_id,),
).fetchone()
elif content_type == "entity":
row = conn.execute(
"SELECT project_id FROM entities WHERE id = ?", (content_id,)
"SELECT project_id FROM entities WHERE id = ?", (content_id,),
).fetchone()
elif content_type == "relation":
row = conn.execute(
"SELECT project_id FROM entity_relations WHERE id = ?", (content_id,)
"SELECT project_id FROM entity_relations WHERE id = ?", (content_id,),
).fetchone()
else:
return None
@@ -661,7 +661,7 @@ class FullTextSearch:
score=round(score, 4),
highlights=highlights[:10], # 限制高亮数量
metadata={},
)
),
)
return scored
@@ -843,7 +843,7 @@ class SemanticSearch:
""")
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_embedding_content ON embeddings(content_id, content_type)"
"CREATE INDEX IF NOT EXISTS idx_embedding_content ON embeddings(content_id, content_type)",
)
conn.execute("CREATE INDEX IF NOT EXISTS idx_embedding_project ON embeddings(project_id)")
@@ -880,7 +880,7 @@ class SemanticSearch:
return None
def index_embedding(
self, content_id: str, content_type: str, project_id: str, text: str
self, content_id: str, content_type: str, project_id: str, text: str,
) -> bool:
"""
为内容生成并保存 embedding
@@ -1012,7 +1012,7 @@ class SemanticSearch:
similarity=float(similarity),
embedding=None, # 不返回 embedding 以节省带宽
metadata={},
)
),
)
except Exception as e:
print(f"计算相似度失败: {e}")
@@ -1029,13 +1029,13 @@ class SemanticSearch:
try:
if content_type == "transcript":
row = conn.execute(
"SELECT full_text FROM transcripts WHERE id = ?", (content_id,)
"SELECT full_text FROM transcripts WHERE id = ?", (content_id,),
).fetchone()
result = row["full_text"] if row else None
elif content_type == "entity":
row = conn.execute(
"SELECT name, definition FROM entities WHERE id = ?", (content_id,)
"SELECT name, definition FROM entities WHERE id = ?", (content_id,),
).fetchone()
result = f"{row['name']}: {row['definition']}" if row else None
@@ -1067,7 +1067,7 @@ class SemanticSearch:
return None
def find_similar_content(
self, content_id: str, content_type: str, top_k: int = 5
self, content_id: str, content_type: str, top_k: int = 5,
) -> list[SemanticSearchResult]:
"""
查找与指定内容相似的内容
@@ -1127,7 +1127,7 @@ class SemanticSearch:
project_id=row["project_id"],
similarity=float(similarity),
metadata={},
)
),
)
except (KeyError, ValueError):
continue
@@ -1175,7 +1175,7 @@ class EntityPathDiscovery:
return conn
def find_shortest_path(
self, source_entity_id: str, target_entity_id: str, max_depth: int = 5
self, source_entity_id: str, target_entity_id: str, max_depth: int = 5,
) -> EntityPath | None:
"""
查找两个实体之间的最短路径BFS算法
@@ -1192,7 +1192,7 @@ class EntityPathDiscovery:
# 获取项目ID
row = conn.execute(
"SELECT project_id FROM entities WHERE id = ?", (source_entity_id,)
"SELECT project_id FROM entities WHERE id = ?", (source_entity_id,),
).fetchone()
if not row:
@@ -1250,7 +1250,7 @@ class EntityPathDiscovery:
return None
def find_all_paths(
self, source_entity_id: str, target_entity_id: str, max_depth: int = 4, max_paths: int = 10
self, source_entity_id: str, target_entity_id: str, max_depth: int = 4, max_paths: int = 10,
) -> list[EntityPath]:
"""
查找两个实体之间的所有路径(限制数量和深度)
@@ -1268,7 +1268,7 @@ class EntityPathDiscovery:
# 获取项目ID
row = conn.execute(
"SELECT project_id FROM entities WHERE id = ?", (source_entity_id,)
"SELECT project_id FROM entities WHERE id = ?", (source_entity_id,),
).fetchone()
if not row:
@@ -1280,7 +1280,7 @@ class EntityPathDiscovery:
paths = []
def dfs(
current_id: str, target_id: str, path: list[str], visited: set[str], depth: int
current_id: str, target_id: str, path: list[str], visited: set[str], depth: int,
) -> None:
if depth > max_depth:
return
@@ -1328,7 +1328,7 @@ class EntityPathDiscovery:
nodes = []
for entity_id in entity_ids:
row = conn.execute(
"SELECT id, name, type FROM entities WHERE id = ?", (entity_id,)
"SELECT id, name, type FROM entities WHERE id = ?", (entity_id,),
).fetchone()
if row:
nodes.append({"id": row["id"], "name": row["name"], "type": row["type"]})
@@ -1358,7 +1358,7 @@ class EntityPathDiscovery:
"target": target_id,
"relation_type": row["relation_type"],
"evidence": row["evidence"],
}
},
)
conn.close()
@@ -1398,7 +1398,7 @@ class EntityPathDiscovery:
# 获取项目ID
row = conn.execute(
"SELECT project_id, name FROM entities WHERE id = ?", (entity_id,)
"SELECT project_id, name FROM entities WHERE id = ?", (entity_id,),
).fetchone()
if not row:
@@ -1445,7 +1445,7 @@ class EntityPathDiscovery:
# 获取邻居信息
neighbor_info = conn.execute(
"SELECT name, type FROM entities WHERE id = ?", (neighbor_id,)
"SELECT name, type FROM entities WHERE id = ?", (neighbor_id,),
).fetchone()
if neighbor_info:
@@ -1458,9 +1458,9 @@ class EntityPathDiscovery:
"relation_type": neighbor["relation_type"],
"evidence": neighbor["evidence"],
"path": self._get_path_to_entity(
entity_id, neighbor_id, project_id, conn
entity_id, neighbor_id, project_id, conn,
),
}
},
)
conn.close()
@@ -1470,7 +1470,7 @@ class EntityPathDiscovery:
return relations
def _get_path_to_entity(
self, source_id: str, target_id: str, project_id: str, conn: sqlite3.Connection
self, source_id: str, target_id: str, project_id: str, conn: sqlite3.Connection,
) -> list[str]:
"""获取从源实体到目标实体的路径(简化版)"""
# BFS 找路径
@@ -1528,7 +1528,7 @@ class EntityPathDiscovery:
"type": node["type"],
"is_source": node["id"] == path.source_entity_id,
"is_target": node["id"] == path.target_entity_id,
}
},
)
# 边数据
@@ -1540,7 +1540,7 @@ class EntityPathDiscovery:
"target": edge["target"],
"relation_type": edge["relation_type"],
"evidence": edge["evidence"],
}
},
)
return {
@@ -1565,7 +1565,7 @@ class EntityPathDiscovery:
# 获取所有实体
entities = conn.execute(
"SELECT id, name FROM entities WHERE project_id = ?", (project_id,)
"SELECT id, name FROM entities WHERE project_id = ?", (project_id,),
).fetchall()
# 计算每个实体作为桥梁的次数
@@ -1617,7 +1617,7 @@ class EntityPathDiscovery:
"entity_name": entity["name"],
"neighbor_count": len(neighbor_ids),
"bridge_score": round(bridge_score, 4),
}
},
)
conn.close()
@@ -1706,7 +1706,7 @@ class KnowledgeGapDetection:
# 检查每个实体的属性完整性
entities = conn.execute(
"SELECT id, name FROM entities WHERE project_id = ?", (project_id,)
"SELECT id, name FROM entities WHERE project_id = ?", (project_id,),
).fetchall()
for entity in entities:
@@ -1714,7 +1714,7 @@ class KnowledgeGapDetection:
# 获取实体已有的属性
existing_attrs = conn.execute(
"SELECT template_id FROM entity_attributes WHERE entity_id = ?", (entity_id,)
"SELECT template_id FROM entity_attributes WHERE entity_id = ?", (entity_id,),
).fetchall()
existing_template_ids = {a["template_id"] for a in existing_attrs}
@@ -1726,7 +1726,7 @@ class KnowledgeGapDetection:
missing_names = []
for template_id in missing_templates:
template = conn.execute(
"SELECT name FROM attribute_templates WHERE id = ?", (template_id,)
"SELECT name FROM attribute_templates WHERE id = ?", (template_id,),
).fetchone()
if template:
missing_names.append(template["name"])
@@ -1746,7 +1746,7 @@ class KnowledgeGapDetection:
],
related_entities=[],
metadata={"missing_attributes": missing_names},
)
),
)
conn.close()
@@ -1759,7 +1759,7 @@ class KnowledgeGapDetection:
# 获取所有实体及其关系数量
entities = conn.execute(
"SELECT id, name, type FROM entities WHERE project_id = ?", (project_id,)
"SELECT id, name, type FROM entities WHERE project_id = ?", (project_id,),
).fetchall()
for entity in entities:
@@ -1812,7 +1812,7 @@ class KnowledgeGapDetection:
"relation_count": relation_count,
"potential_related": [r["name"] for r in potential_related],
},
)
),
)
conn.close()
@@ -1853,7 +1853,7 @@ class KnowledgeGapDetection:
],
related_entities=[],
metadata={"entity_type": entity["type"]},
)
),
)
conn.close()
@@ -1887,7 +1887,7 @@ class KnowledgeGapDetection:
suggestions=[f"'{entity['name']}' 添加定义", "从转录文本中提取定义信息"],
related_entities=[],
metadata={"entity_type": entity["type"]},
)
),
)
conn.close()
@@ -1900,7 +1900,7 @@ class KnowledgeGapDetection:
# 分析转录文本中频繁提及但未提取为实体的词
transcripts = conn.execute(
"SELECT full_text FROM transcripts WHERE project_id = ?", (project_id,)
"SELECT full_text FROM transcripts WHERE project_id = ?", (project_id,),
).fetchall()
# 合并所有文本
@@ -1908,7 +1908,7 @@ class KnowledgeGapDetection:
# 获取现有实体名称
existing_entities = conn.execute(
"SELECT name FROM entities WHERE project_id = ?", (project_id,)
"SELECT name FROM entities WHERE project_id = ?", (project_id,),
).fetchall()
existing_names = {e["name"].lower() for e in existing_entities}
@@ -1940,7 +1940,7 @@ class KnowledgeGapDetection:
],
related_entities=[],
metadata={"mention_count": count},
)
),
)
conn.close()
@@ -2146,7 +2146,7 @@ class SearchManager:
for t in transcripts:
if t["full_text"] and self.semantic_search.index_embedding(
t["id"], "transcript", t["project_id"], t["full_text"]
t["id"], "transcript", t["project_id"], t["full_text"],
):
semantic_stats["indexed"] += 1
else:
@@ -2179,12 +2179,12 @@ class SearchManager:
# 全文索引统计
fulltext_count = conn.execute(
f"SELECT COUNT(*) as count FROM search_indexes {where_clause}", params
f"SELECT COUNT(*) as count FROM search_indexes {where_clause}", params,
).fetchone()["count"]
# 语义索引统计
semantic_count = conn.execute(
f"SELECT COUNT(*) as count FROM embeddings {where_clause}", params
f"SELECT COUNT(*) as count FROM embeddings {where_clause}", params,
).fetchone()["count"]
# 按类型统计
@@ -2225,7 +2225,7 @@ def get_search_manager(db_path: str = "insightflow.db") -> SearchManager:
def fulltext_search(
query: str, project_id: str | None = None, limit: int = 20
query: str, project_id: str | None = None, limit: int = 20,
) -> list[SearchResult]:
"""全文搜索便捷函数"""
manager = get_search_manager()
@@ -2233,7 +2233,7 @@ def fulltext_search(
def semantic_search(
query: str, project_id: str | None = None, top_k: int = 10
query: str, project_id: str | None = None, top_k: int = 10,
) -> list[SemanticSearchResult]:
"""语义搜索便捷函数"""
manager = get_search_manager()