fix: auto-fix code issues (cron)

- 修复隐式 Optional 类型注解 (RUF013) - 修复不必要的赋值后返回 (RET504) - 优化列表推导式 (PERF401) - 修复未使用的参数 (ARG002) - 清理重复导入 - 优化异常处理
2026-03-03 21:11:47 +08:00
parent d17a58ceae
commit 259f2c90d0
36 changed files with 1651 additions and 863 deletions
--- a/backend/search_manager.py
+++ b/backend/search_manager.py
@@ -385,7 +385,7 @@ class FullTextSearch:
        # 排序和分页
        scored_results.sort(key=lambda x: x.score, reverse=True)

-        return scored_results[offset: offset + limit]
+        return scored_results[offset : offset + limit]

    def _parse_boolean_query(self, query: str) -> dict:
        """
@@ -545,19 +545,24 @@ class FullTextSearch:
        return results

    def _get_content_by_id(
-        self, conn: sqlite3.Connection, content_id: str, content_type: str,
+        self,
+        conn: sqlite3.Connection,
+        content_id: str,
+        content_type: str,
    ) -> str | None:
        """根据ID获取内容"""
        try:
            if content_type == "transcript":
                row = conn.execute(
-                    "SELECT full_text FROM transcripts WHERE id = ?", (content_id,),
+                    "SELECT full_text FROM transcripts WHERE id = ?",
+                    (content_id,),
                ).fetchone()
                return row["full_text"] if row else None

            elif content_type == "entity":
                row = conn.execute(
-                    "SELECT name, definition FROM entities WHERE id = ?", (content_id,),
+                    "SELECT name, definition FROM entities WHERE id = ?",
+                    (content_id,),
                ).fetchone()
                if row:
                    return f"{row['name']} {row['definition'] or ''}"
@@ -583,21 +588,27 @@ class FullTextSearch:
            return None

    def _get_project_id(
-        self, conn: sqlite3.Connection, content_id: str, content_type: str,
+        self,
+        conn: sqlite3.Connection,
+        content_id: str,
+        content_type: str,
    ) -> str | None:
        """获取内容所属的项目ID"""
        try:
            if content_type == "transcript":
                row = conn.execute(
-                    "SELECT project_id FROM transcripts WHERE id = ?", (content_id,),
+                    "SELECT project_id FROM transcripts WHERE id = ?",
+                    (content_id,),
                ).fetchone()
            elif content_type == "entity":
                row = conn.execute(
-                    "SELECT project_id FROM entities WHERE id = ?", (content_id,),
+                    "SELECT project_id FROM entities WHERE id = ?",
+                    (content_id,),
                ).fetchone()
            elif content_type == "relation":
                row = conn.execute(
-                    "SELECT project_id FROM entity_relations WHERE id = ?", (content_id,),
+                    "SELECT project_id FROM entity_relations WHERE id = ?",
+                    (content_id,),
                ).fetchone()
            else:
                return None
@@ -880,7 +891,11 @@ class SemanticSearch:
            return None

    def index_embedding(
-        self, content_id: str, content_type: str, project_id: str, text: str,
+        self,
+        content_id: str,
+        content_type: str,
+        project_id: str,
+        text: str,
    ) -> bool:
        """
        为内容生成并保存 embedding
@@ -1029,13 +1044,15 @@ class SemanticSearch:
        try:
            if content_type == "transcript":
                row = conn.execute(
-                    "SELECT full_text FROM transcripts WHERE id = ?", (content_id,),
+                    "SELECT full_text FROM transcripts WHERE id = ?",
+                    (content_id,),
                ).fetchone()
                result = row["full_text"] if row else None

            elif content_type == "entity":
                row = conn.execute(
-                    "SELECT name, definition FROM entities WHERE id = ?", (content_id,),
+                    "SELECT name, definition FROM entities WHERE id = ?",
+                    (content_id,),
                ).fetchone()
                result = f"{row['name']}: {row['definition']}" if row else None

@@ -1067,7 +1084,10 @@ class SemanticSearch:
            return None

    def find_similar_content(
-        self, content_id: str, content_type: str, top_k: int = 5,
+        self,
+        content_id: str,
+        content_type: str,
+        top_k: int = 5,
    ) -> list[SemanticSearchResult]:
        """
        查找与指定内容相似的内容
@@ -1175,7 +1195,10 @@ class EntityPathDiscovery:
        return conn

    def find_shortest_path(
-        self, source_entity_id: str, target_entity_id: str, max_depth: int = 5,
+        self,
+        source_entity_id: str,
+        target_entity_id: str,
+        max_depth: int = 5,
    ) -> EntityPath | None:
        """
        查找两个实体之间的最短路径（BFS算法）
@@ -1192,7 +1215,8 @@ class EntityPathDiscovery:

        # 获取项目ID
        row = conn.execute(
-            "SELECT project_id FROM entities WHERE id = ?", (source_entity_id,),
+            "SELECT project_id FROM entities WHERE id = ?",
+            (source_entity_id,),
        ).fetchone()

        if not row:
@@ -1250,7 +1274,11 @@ class EntityPathDiscovery:
        return None

    def find_all_paths(
-        self, source_entity_id: str, target_entity_id: str, max_depth: int = 4, max_paths: int = 10,
+        self,
+        source_entity_id: str,
+        target_entity_id: str,
+        max_depth: int = 4,
+        max_paths: int = 10,
    ) -> list[EntityPath]:
        """
        查找两个实体之间的所有路径（限制数量和深度）
@@ -1268,7 +1296,8 @@ class EntityPathDiscovery:

        # 获取项目ID
        row = conn.execute(
-            "SELECT project_id FROM entities WHERE id = ?", (source_entity_id,),
+            "SELECT project_id FROM entities WHERE id = ?",
+            (source_entity_id,),
        ).fetchone()

        if not row:
@@ -1280,7 +1309,11 @@ class EntityPathDiscovery:
        paths = []

        def dfs(
-            current_id: str, target_id: str, path: list[str], visited: set[str], depth: int,
+            current_id: str,
+            target_id: str,
+            path: list[str],
+            visited: set[str],
+            depth: int,
        ) -> None:
            if depth > max_depth:
                return
@@ -1328,7 +1361,8 @@ class EntityPathDiscovery:
        nodes = []
        for entity_id in entity_ids:
            row = conn.execute(
-                "SELECT id, name, type FROM entities WHERE id = ?", (entity_id,),
+                "SELECT id, name, type FROM entities WHERE id = ?",
+                (entity_id,),
            ).fetchone()
            if row:
                nodes.append({"id": row["id"], "name": row["name"], "type": row["type"]})
@@ -1398,7 +1432,8 @@ class EntityPathDiscovery:

        # 获取项目ID
        row = conn.execute(
-            "SELECT project_id, name FROM entities WHERE id = ?", (entity_id,),
+            "SELECT project_id, name FROM entities WHERE id = ?",
+            (entity_id,),
        ).fetchone()

        if not row:
@@ -1445,7 +1480,8 @@ class EntityPathDiscovery:

                    # 获取邻居信息
                    neighbor_info = conn.execute(
-                        "SELECT name, type FROM entities WHERE id = ?", (neighbor_id,),
+                        "SELECT name, type FROM entities WHERE id = ?",
+                        (neighbor_id,),
                    ).fetchone()

                    if neighbor_info:
@@ -1458,7 +1494,10 @@ class EntityPathDiscovery:
                                "relation_type": neighbor["relation_type"],
                                "evidence": neighbor["evidence"],
                                "path": self._get_path_to_entity(
-                                    entity_id, neighbor_id, project_id, conn,
+                                    entity_id,
+                                    neighbor_id,
+                                    project_id,
+                                    conn,
                                ),
                            },
                        )
@@ -1470,7 +1509,11 @@ class EntityPathDiscovery:
        return relations

    def _get_path_to_entity(
-        self, source_id: str, target_id: str, project_id: str, conn: sqlite3.Connection,
+        self,
+        source_id: str,
+        target_id: str,
+        project_id: str,
+        conn: sqlite3.Connection,
    ) -> list[str]:
        """获取从源实体到目标实体的路径（简化版）"""
        # BFS 找路径
@@ -1565,7 +1608,8 @@ class EntityPathDiscovery:

        # 获取所有实体
        entities = conn.execute(
-            "SELECT id, name FROM entities WHERE project_id = ?", (project_id,),
+            "SELECT id, name FROM entities WHERE project_id = ?",
+            (project_id,),
        ).fetchall()

        # 计算每个实体作为桥梁的次数
@@ -1706,7 +1750,8 @@ class KnowledgeGapDetection:

        # 检查每个实体的属性完整性
        entities = conn.execute(
-            "SELECT id, name FROM entities WHERE project_id = ?", (project_id,),
+            "SELECT id, name FROM entities WHERE project_id = ?",
+            (project_id,),
        ).fetchall()

        for entity in entities:
@@ -1714,7 +1759,8 @@ class KnowledgeGapDetection:

            # 获取实体已有的属性
            existing_attrs = conn.execute(
-                "SELECT template_id FROM entity_attributes WHERE entity_id = ?", (entity_id,),
+                "SELECT template_id FROM entity_attributes WHERE entity_id = ?",
+                (entity_id,),
            ).fetchall()

            existing_template_ids = {a["template_id"] for a in existing_attrs}
@@ -1726,7 +1772,8 @@ class KnowledgeGapDetection:
                missing_names = []
                for template_id in missing_templates:
                    template = conn.execute(
-                        "SELECT name FROM attribute_templates WHERE id = ?", (template_id,),
+                        "SELECT name FROM attribute_templates WHERE id = ?",
+                        (template_id,),
                    ).fetchone()
                    if template:
                        missing_names.append(template["name"])
@@ -1759,7 +1806,8 @@ class KnowledgeGapDetection:

        # 获取所有实体及其关系数量
        entities = conn.execute(
-            "SELECT id, name, type FROM entities WHERE project_id = ?", (project_id,),
+            "SELECT id, name, type FROM entities WHERE project_id = ?",
+            (project_id,),
        ).fetchall()

        for entity in entities:
@@ -1900,7 +1948,8 @@ class KnowledgeGapDetection:

        # 分析转录文本中频繁提及但未提取为实体的词
        transcripts = conn.execute(
-            "SELECT full_text FROM transcripts WHERE project_id = ?", (project_id,),
+            "SELECT full_text FROM transcripts WHERE project_id = ?",
+            (project_id,),
        ).fetchall()

        # 合并所有文本
@@ -1908,7 +1957,8 @@ class KnowledgeGapDetection:

        # 获取现有实体名称
        existing_entities = conn.execute(
-            "SELECT name FROM entities WHERE project_id = ?", (project_id,),
+            "SELECT name FROM entities WHERE project_id = ?",
+            (project_id,),
        ).fetchall()

        existing_names = {e["name"].lower() for e in existing_entities}
@@ -2146,7 +2196,10 @@ class SearchManager:

            for t in transcripts:
                if t["full_text"] and self.semantic_search.index_embedding(
-                    t["id"], "transcript", t["project_id"], t["full_text"],
+                    t["id"],
+                    "transcript",
+                    t["project_id"],
+                    t["full_text"],
                ):
                    semantic_stats["indexed"] += 1
                else:
@@ -2179,12 +2232,14 @@ class SearchManager:

        # 全文索引统计
        fulltext_count = conn.execute(
-            f"SELECT COUNT(*) as count FROM search_indexes {where_clause}", params,
+            f"SELECT COUNT(*) as count FROM search_indexes {where_clause}",
+            params,
        ).fetchone()["count"]

        # 语义索引统计
        semantic_count = conn.execute(
-            f"SELECT COUNT(*) as count FROM embeddings {where_clause}", params,
+            f"SELECT COUNT(*) as count FROM embeddings {where_clause}",
+            params,
        ).fetchone()["count"]

        # 按类型统计
@@ -2225,7 +2280,9 @@ def get_search_manager(db_path: str = "insightflow.db") -> SearchManager:


 def fulltext_search(
-    query: str, project_id: str | None = None, limit: int = 20,
+    query: str,
+    project_id: str | None = None,
+    limit: int = 20,
 ) -> list[SearchResult]:
    """全文搜索便捷函数"""
    manager = get_search_manager()
@@ -2233,7 +2290,9 @@ def fulltext_search(


 def semantic_search(
-    query: str, project_id: str | None = None, top_k: int = 10,
+    query: str,
+    project_id: str | None = None,
+    top_k: int = 10,
 ) -> list[SemanticSearchResult]:
    """语义搜索便捷函数"""
    manager = get_search_manager()