fix: auto-fix code issues (cron)

- 修复重复导入/字段 - 修复异常处理 - 修复PEP8格式问题 - 添加类型注解
2026-02-27 18:09:24 +08:00
parent 646b64daf7
commit 17bda3dbce
38 changed files with 1993 additions and 1972 deletions
--- a/backend/search_manager.py
+++ b/backend/search_manager.py
@@ -9,15 +9,14 @@ Phase 7 Task 6: Advanced Search & Discovery
 4. KnowledgeGapDetection - 知识缺口识别
 """

-import re
+import hashlib
 import json
 import math
+import re
 import sqlite3
-import hashlib
-from dataclasses import dataclass, field
-from typing import List, Dict, Optional, Tuple, Set
-from datetime import datetime
 from collections import defaultdict
+from dataclasses import dataclass, field
+from datetime import datetime
 from enum import Enum


@@ -46,10 +45,10 @@ class SearchResult:
    content_type: str  # transcript, entity, relation
    project_id: str
    score: float
-    highlights: List[Tuple[int, int]] = field(default_factory=list)  # 高亮位置
-    metadata: Dict = field(default_factory=dict)
+    highlights: list[tuple[int, int]] = field(default_factory=list)  # 高亮位置
+    metadata: dict = field(default_factory=dict)

-    def to_dict(self) -> Dict:
+    def to_dict(self) -> dict:
        return {
            "id": self.id,
            "content": self.content,
@@ -69,10 +68,10 @@ class SemanticSearchResult:
    content_type: str
    project_id: str
    similarity: float
-    embedding: Optional[List[float]] = None
-    metadata: Dict = field(default_factory=dict)
+    embedding: list[float] | None = None
+    metadata: dict = field(default_factory=dict)

-    def to_dict(self) -> Dict:
+    def to_dict(self) -> dict:
        result = {
            "id": self.id,
            "content": self.content[:500] + "..." if len(self.content) > 500 else self.content,
@@ -95,12 +94,12 @@ class EntityPath:
    target_entity_id: str
    target_entity_name: str
    path_length: int
-    nodes: List[Dict]  # 路径上的节点
-    edges: List[Dict]  # 路径上的边
+    nodes: list[dict]  # 路径上的节点
+    edges: list[dict]  # 路径上的边
    confidence: float
    path_description: str

-    def to_dict(self) -> Dict:
+    def to_dict(self) -> dict:
        return {
            "path_id": self.path_id,
            "source_entity_id": self.source_entity_id,
@@ -120,15 +119,15 @@ class KnowledgeGap:
    """知识缺口数据模型"""
    gap_id: str
    gap_type: str  # missing_attribute, sparse_relation, isolated_entity, incomplete_entity
-    entity_id: Optional[str]
-    entity_name: Optional[str]
+    entity_id: str | None
+    entity_name: str | None
    description: str
    severity: str  # high, medium, low
-    suggestions: List[str]
-    related_entities: List[str]
-    metadata: Dict = field(default_factory=dict)
+    suggestions: list[str]
+    related_entities: list[str]
+    metadata: dict = field(default_factory=dict)

-    def to_dict(self) -> Dict:
+    def to_dict(self) -> dict:
        return {
            "gap_id": self.gap_id,
            "gap_type": self.gap_type,
@@ -149,8 +148,8 @@ class SearchIndex:
    content_id: str
    content_type: str
    project_id: str
-    tokens: List[str]
-    token_positions: Dict[str, List[int]]  # 词 -> 位置列表
+    tokens: list[str]
+    token_positions: dict[str, list[int]]  # 词 -> 位置列表
    created_at: str
    updated_at: str

@@ -162,7 +161,7 @@ class TextEmbedding:
    content_id: str
    content_type: str
    project_id: str
-    embedding: List[float]
+    embedding: list[float]
    model_name: str
    created_at: str

@@ -231,7 +230,7 @@ class FullTextSearch:
        conn.commit()
        conn.close()

-    def _tokenize(self, text: str) -> List[str]:
+    def _tokenize(self, text: str) -> list[str]:
        """
        中文分词（简化版）

@@ -243,7 +242,7 @@ class FullTextSearch:
        tokens = re.findall(r'[\u4e00-\u9fa5]+|[a-z]+|\d+', text)
        return tokens

-    def _extract_positions(self, text: str, tokens: List[str]) -> Dict[str, List[int]]:
+    def _extract_positions(self, text: str, tokens: list[str]) -> dict[str, list[int]]:
        """提取每个词在文本中的位置"""
        positions = defaultdict(list)
        text_lower = text.lower()
@@ -326,9 +325,9 @@ class FullTextSearch:
            print(f"索引创建失败: {e}")
            return False

-    def search(self, query: str, project_id: Optional[str] = None,
-               content_types: Optional[List[str]] = None,
-               limit: int = 20, offset: int = 0) -> List[SearchResult]:
+    def search(self, query: str, project_id: str | None = None,
+               content_types: list[str] | None = None,
+               limit: int = 20, offset: int = 0) -> list[SearchResult]:
        """
        全文搜索

@@ -358,7 +357,7 @@ class FullTextSearch:

        return scored_results[offset:offset + limit]

-    def _parse_boolean_query(self, query: str) -> Dict:
+    def _parse_boolean_query(self, query: str) -> dict:
        """
        解析布尔查询

@@ -401,9 +400,9 @@ class FullTextSearch:
            "phrases": phrases
        }

-    def _execute_boolean_search(self, parsed_query: Dict,
-                                project_id: Optional[str] = None,
-                                content_types: Optional[List[str]] = None) -> List[Dict]:
+    def _execute_boolean_search(self, parsed_query: dict,
+                                project_id: str | None = None,
+                                content_types: list[str] | None = None) -> list[dict]:
        """执行布尔搜索"""
        conn = self._get_conn()

@@ -503,7 +502,7 @@ class FullTextSearch:
        return results

    def _get_content_by_id(self, conn: sqlite3.Connection,
-                           content_id: str, content_type: str) -> Optional[str]:
+                           content_id: str, content_type: str) -> str | None:
        """根据ID获取内容"""
        try:
            if content_type == "transcript":
@@ -542,7 +541,7 @@ class FullTextSearch:
            return None

    def _get_project_id(self, conn: sqlite3.Connection,
-                        content_id: str, content_type: str) -> Optional[str]:
+                        content_id: str, content_type: str) -> str | None:
        """获取内容所属的项目ID"""
        try:
            if content_type == "transcript":
@@ -567,7 +566,7 @@ class FullTextSearch:
        except Exception:
            return None

-    def _score_results(self, results: List[Dict], parsed_query: Dict) -> List[SearchResult]:
+    def _score_results(self, results: list[dict], parsed_query: dict) -> list[SearchResult]:
        """计算搜索结果的相关性分数"""
        scored = []
        all_terms = parsed_query["and"] + parsed_query["or"] + parsed_query["phrases"]
@@ -689,7 +688,7 @@ class FullTextSearch:
            print(f"删除索引失败: {e}")
            return False

-    def reindex_project(self, project_id: str) -> Dict:
+    def reindex_project(self, project_id: str) -> dict:
        """重新索引整个项目"""
        conn = self._get_conn()
        stats = {"transcripts": 0, "entities": 0, "relations": 0, "errors": 0}
@@ -808,7 +807,7 @@ class SemanticSearch:
        """检查语义搜索是否可用"""
        return self.model is not None and SENTENCE_TRANSFORMERS_AVAILABLE

-    def generate_embedding(self, text: str) -> Optional[List[float]]:
+    def generate_embedding(self, text: str) -> list[float] | None:
        """
        生成文本的 embedding 向量

@@ -878,9 +877,9 @@ class SemanticSearch:
            print(f"索引 embedding 失败: {e}")
            return False

-    def search(self, query: str, project_id: Optional[str] = None,
-               content_types: Optional[List[str]] = None,
-               top_k: int = 10, threshold: float = 0.5) -> List[SemanticSearchResult]:
+    def search(self, query: str, project_id: str | None = None,
+               content_types: list[str] | None = None,
+               top_k: int = 10, threshold: float = 0.5) -> list[SemanticSearchResult]:
        """
        语义搜索

@@ -959,7 +958,7 @@ class SemanticSearch:
        results.sort(key=lambda x: x.similarity, reverse=True)
        return results[:top_k]

-    def _get_content_text(self, content_id: str, content_type: str) -> Optional[str]:
+    def _get_content_text(self, content_id: str, content_type: str) -> str | None:
        """获取内容文本"""
        conn = self._get_conn()

@@ -1002,7 +1001,7 @@ class SemanticSearch:
            return None

    def find_similar_content(self, content_id: str, content_type: str,
-                             top_k: int = 5) -> List[SemanticSearchResult]:
+                             top_k: int = 5) -> list[SemanticSearchResult]:
        """
        查找与指定内容相似的内容

@@ -1107,7 +1106,7 @@ class EntityPathDiscovery:

    def find_shortest_path(self, source_entity_id: str,
                           target_entity_id: str,
-                           max_depth: int = 5) -> Optional[EntityPath]:
+                           max_depth: int = 5) -> EntityPath | None:
        """
        查找两个实体之间的最短路径（BFS算法）

@@ -1181,7 +1180,7 @@ class EntityPathDiscovery:
    def find_all_paths(self, source_entity_id: str,
                       target_entity_id: str,
                       max_depth: int = 4,
-                       max_paths: int = 10) -> List[EntityPath]:
+                       max_paths: int = 10) -> list[EntityPath]:
        """
        查找两个实体之间的所有路径（限制数量和深度）

@@ -1211,7 +1210,7 @@ class EntityPathDiscovery:
        paths = []

        def dfs(current_id: str, target_id: str,
-                path: List[str], visited: Set[str], depth: int):
+                path: list[str], visited: set[str], depth: int):
            if depth > max_depth:
                return

@@ -1247,7 +1246,7 @@ class EntityPathDiscovery:
        # 构建路径对象
        return [self._build_path_object(path, project_id) for path in paths]

-    def _build_path_object(self, entity_ids: List[str],
+    def _build_path_object(self, entity_ids: list[str],
                           project_id: str) -> EntityPath:
        """构建路径对象"""
        conn = self._get_conn()
@@ -1312,7 +1311,7 @@ class EntityPathDiscovery:
        )

    def find_multi_hop_relations(self, entity_id: str,
-                                 max_hops: int = 3) -> List[Dict]:
+                                 max_hops: int = 3) -> list[dict]:
        """
        查找实体的多跳关系

@@ -1394,7 +1393,7 @@ class EntityPathDiscovery:
        return relations

    def _get_path_to_entity(self, source_id: str, target_id: str,
-                            project_id: str, conn: sqlite3.Connection) -> List[str]:
+                            project_id: str, conn: sqlite3.Connection) -> list[str]:
        """获取从源实体到目标实体的路径（简化版）"""
        # BFS 找路径
        visited = {source_id}
@@ -1428,7 +1427,7 @@ class EntityPathDiscovery:

        return []

-    def generate_path_visualization(self, path: EntityPath) -> Dict:
+    def generate_path_visualization(self, path: EntityPath) -> dict:
        """
        生成路径可视化数据

@@ -1467,7 +1466,7 @@ class EntityPathDiscovery:
            "confidence": path.confidence
        }

-    def analyze_path_centrality(self, project_id: str) -> List[Dict]:
+    def analyze_path_centrality(self, project_id: str) -> list[dict]:
        """
        分析项目中实体的路径中心性（桥接程度）

@@ -1558,7 +1557,7 @@ class KnowledgeGapDetection:
        conn.row_factory = sqlite3.Row
        return conn

-    def analyze_project(self, project_id: str) -> List[KnowledgeGap]:
+    def analyze_project(self, project_id: str) -> list[KnowledgeGap]:
        """
        分析项目中的知识缺口

@@ -1591,7 +1590,7 @@ class KnowledgeGapDetection:

        return gaps

-    def _check_entity_attribute_completeness(self, project_id: str) -> List[KnowledgeGap]:
+    def _check_entity_attribute_completeness(self, project_id: str) -> list[KnowledgeGap]:
        """检查实体属性完整性"""
        conn = self._get_conn()
        gaps = []
@@ -1661,7 +1660,7 @@ class KnowledgeGapDetection:
        conn.close()
        return gaps

-    def _check_relation_sparsity(self, project_id: str) -> List[KnowledgeGap]:
+    def _check_relation_sparsity(self, project_id: str) -> list[KnowledgeGap]:
        """检查关系稀疏度"""
        conn = self._get_conn()
        gaps = []
@@ -1720,7 +1719,7 @@ class KnowledgeGapDetection:
        conn.close()
        return gaps

-    def _check_isolated_entities(self, project_id: str) -> List[KnowledgeGap]:
+    def _check_isolated_entities(self, project_id: str) -> list[KnowledgeGap]:
        """检查孤立实体（没有任何关系）"""
        conn = self._get_conn()
        gaps = []
@@ -1756,7 +1755,7 @@ class KnowledgeGapDetection:
        conn.close()
        return gaps

-    def _check_incomplete_entities(self, project_id: str) -> List[KnowledgeGap]:
+    def _check_incomplete_entities(self, project_id: str) -> list[KnowledgeGap]:
        """检查不完整实体（缺少名称、类型或定义）"""
        conn = self._get_conn()
        gaps = []
@@ -1788,7 +1787,7 @@ class KnowledgeGapDetection:
        conn.close()
        return gaps

-    def _check_missing_key_entities(self, project_id: str) -> List[KnowledgeGap]:
+    def _check_missing_key_entities(self, project_id: str) -> list[KnowledgeGap]:
        """检查可能缺失的关键实体"""
        conn = self._get_conn()
        gaps = []
@@ -1841,7 +1840,7 @@ class KnowledgeGapDetection:
        conn.close()
        return gaps[:10]  # 限制数量

-    def generate_completeness_report(self, project_id: str) -> Dict:
+    def generate_completeness_report(self, project_id: str) -> dict:
        """
        生成知识完整性报告

@@ -1898,7 +1897,7 @@ class KnowledgeGapDetection:
            "recommendations": self._generate_recommendations(gaps)
        }

-    def _generate_recommendations(self, gaps: List[KnowledgeGap]) -> List[str]:
+    def _generate_recommendations(self, gaps: list[KnowledgeGap]) -> list[str]:
        """生成改进建议"""
        recommendations = []

@@ -1941,8 +1940,8 @@ class SearchManager:
        self.path_discovery = EntityPathDiscovery(db_path)
        self.gap_detection = KnowledgeGapDetection(db_path)

-    def hybrid_search(self, query: str, project_id: Optional[str] = None,
-                      limit: int = 20) -> Dict:
+    def hybrid_search(self, query: str, project_id: str | None = None,
+                      limit: int = 20) -> dict:
        """
        混合搜索（全文 + 语义）

@@ -2014,7 +2013,7 @@ class SearchManager:
            "results": results[:limit]
        }

-    def index_project(self, project_id: str) -> Dict:
+    def index_project(self, project_id: str) -> dict:
        """
        为项目建立所有索引

@@ -2071,7 +2070,7 @@ class SearchManager:
            "semantic": semantic_stats
        }

-    def get_search_stats(self, project_id: Optional[str] = None) -> Dict:
+    def get_search_stats(self, project_id: str | None = None) -> dict:
        """获取搜索统计信息"""
        conn = sqlite3.connect(self.db_path)
        conn.row_factory = sqlite3.Row
@@ -2126,28 +2125,28 @@ def get_search_manager(db_path: str = "insightflow.db") -> SearchManager:


 # 便捷函数
-def fulltext_search(query: str, project_id: Optional[str] = None,
-                    limit: int = 20) -> List[SearchResult]:
+def fulltext_search(query: str, project_id: str | None = None,
+                    limit: int = 20) -> list[SearchResult]:
    """全文搜索便捷函数"""
    manager = get_search_manager()
    return manager.fulltext_search.search(query, project_id, limit=limit)


-def semantic_search(query: str, project_id: Optional[str] = None,
-                    top_k: int = 10) -> List[SemanticSearchResult]:
+def semantic_search(query: str, project_id: str | None = None,
+                    top_k: int = 10) -> list[SemanticSearchResult]:
    """语义搜索便捷函数"""
    manager = get_search_manager()
    return manager.semantic_search.search(query, project_id, top_k=top_k)


 def find_entity_path(source_id: str, target_id: str,
-                     max_depth: int = 5) -> Optional[EntityPath]:
+                     max_depth: int = 5) -> EntityPath | None:
    """查找实体路径便捷函数"""
    manager = get_search_manager()
    return manager.path_discovery.find_shortest_path(source_id, target_id, max_depth)


-def detect_knowledge_gaps(project_id: str) -> List[KnowledgeGap]:
+def detect_knowledge_gaps(project_id: str) -> list[KnowledgeGap]:
    """知识缺口检测便捷函数"""
    manager = get_search_manager()
    return manager.gap_detection.analyze_project(project_id)