fix: auto-fix code issues (cron)

- 修复重复导入/字段
- 修复异常处理
- 修复PEP8格式问题
- 添加类型注解
This commit is contained in:
OpenClaw Bot
2026-02-27 18:09:24 +08:00
parent 646b64daf7
commit 17bda3dbce
38 changed files with 1993 additions and 1972 deletions

View File

@@ -9,15 +9,14 @@ Phase 7 Task 6: Advanced Search & Discovery
4. KnowledgeGapDetection - 知识缺口识别
"""
import re
import hashlib
import json
import math
import re
import sqlite3
import hashlib
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Tuple, Set
from datetime import datetime
from collections import defaultdict
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
@@ -46,10 +45,10 @@ class SearchResult:
content_type: str # transcript, entity, relation
project_id: str
score: float
highlights: List[Tuple[int, int]] = field(default_factory=list) # 高亮位置
metadata: Dict = field(default_factory=dict)
highlights: list[tuple[int, int]] = field(default_factory=list) # 高亮位置
metadata: dict = field(default_factory=dict)
def to_dict(self) -> Dict:
def to_dict(self) -> dict:
return {
"id": self.id,
"content": self.content,
@@ -69,10 +68,10 @@ class SemanticSearchResult:
content_type: str
project_id: str
similarity: float
embedding: Optional[List[float]] = None
metadata: Dict = field(default_factory=dict)
embedding: list[float] | None = None
metadata: dict = field(default_factory=dict)
def to_dict(self) -> Dict:
def to_dict(self) -> dict:
result = {
"id": self.id,
"content": self.content[:500] + "..." if len(self.content) > 500 else self.content,
@@ -95,12 +94,12 @@ class EntityPath:
target_entity_id: str
target_entity_name: str
path_length: int
nodes: List[Dict] # 路径上的节点
edges: List[Dict] # 路径上的边
nodes: list[dict] # 路径上的节点
edges: list[dict] # 路径上的边
confidence: float
path_description: str
def to_dict(self) -> Dict:
def to_dict(self) -> dict:
return {
"path_id": self.path_id,
"source_entity_id": self.source_entity_id,
@@ -120,15 +119,15 @@ class KnowledgeGap:
"""知识缺口数据模型"""
gap_id: str
gap_type: str # missing_attribute, sparse_relation, isolated_entity, incomplete_entity
entity_id: Optional[str]
entity_name: Optional[str]
entity_id: str | None
entity_name: str | None
description: str
severity: str # high, medium, low
suggestions: List[str]
related_entities: List[str]
metadata: Dict = field(default_factory=dict)
suggestions: list[str]
related_entities: list[str]
metadata: dict = field(default_factory=dict)
def to_dict(self) -> Dict:
def to_dict(self) -> dict:
return {
"gap_id": self.gap_id,
"gap_type": self.gap_type,
@@ -149,8 +148,8 @@ class SearchIndex:
content_id: str
content_type: str
project_id: str
tokens: List[str]
token_positions: Dict[str, List[int]] # 词 -> 位置列表
tokens: list[str]
token_positions: dict[str, list[int]] # 词 -> 位置列表
created_at: str
updated_at: str
@@ -162,7 +161,7 @@ class TextEmbedding:
content_id: str
content_type: str
project_id: str
embedding: List[float]
embedding: list[float]
model_name: str
created_at: str
@@ -231,7 +230,7 @@ class FullTextSearch:
conn.commit()
conn.close()
def _tokenize(self, text: str) -> List[str]:
def _tokenize(self, text: str) -> list[str]:
"""
中文分词(简化版)
@@ -243,7 +242,7 @@ class FullTextSearch:
tokens = re.findall(r'[\u4e00-\u9fa5]+|[a-z]+|\d+', text)
return tokens
def _extract_positions(self, text: str, tokens: List[str]) -> Dict[str, List[int]]:
def _extract_positions(self, text: str, tokens: list[str]) -> dict[str, list[int]]:
"""提取每个词在文本中的位置"""
positions = defaultdict(list)
text_lower = text.lower()
@@ -326,9 +325,9 @@ class FullTextSearch:
print(f"索引创建失败: {e}")
return False
def search(self, query: str, project_id: Optional[str] = None,
content_types: Optional[List[str]] = None,
limit: int = 20, offset: int = 0) -> List[SearchResult]:
def search(self, query: str, project_id: str | None = None,
content_types: list[str] | None = None,
limit: int = 20, offset: int = 0) -> list[SearchResult]:
"""
全文搜索
@@ -358,7 +357,7 @@ class FullTextSearch:
return scored_results[offset:offset + limit]
def _parse_boolean_query(self, query: str) -> Dict:
def _parse_boolean_query(self, query: str) -> dict:
"""
解析布尔查询
@@ -401,9 +400,9 @@ class FullTextSearch:
"phrases": phrases
}
def _execute_boolean_search(self, parsed_query: Dict,
project_id: Optional[str] = None,
content_types: Optional[List[str]] = None) -> List[Dict]:
def _execute_boolean_search(self, parsed_query: dict,
project_id: str | None = None,
content_types: list[str] | None = None) -> list[dict]:
"""执行布尔搜索"""
conn = self._get_conn()
@@ -503,7 +502,7 @@ class FullTextSearch:
return results
def _get_content_by_id(self, conn: sqlite3.Connection,
content_id: str, content_type: str) -> Optional[str]:
content_id: str, content_type: str) -> str | None:
"""根据ID获取内容"""
try:
if content_type == "transcript":
@@ -542,7 +541,7 @@ class FullTextSearch:
return None
def _get_project_id(self, conn: sqlite3.Connection,
content_id: str, content_type: str) -> Optional[str]:
content_id: str, content_type: str) -> str | None:
"""获取内容所属的项目ID"""
try:
if content_type == "transcript":
@@ -567,7 +566,7 @@ class FullTextSearch:
except Exception:
return None
def _score_results(self, results: List[Dict], parsed_query: Dict) -> List[SearchResult]:
def _score_results(self, results: list[dict], parsed_query: dict) -> list[SearchResult]:
"""计算搜索结果的相关性分数"""
scored = []
all_terms = parsed_query["and"] + parsed_query["or"] + parsed_query["phrases"]
@@ -689,7 +688,7 @@ class FullTextSearch:
print(f"删除索引失败: {e}")
return False
def reindex_project(self, project_id: str) -> Dict:
def reindex_project(self, project_id: str) -> dict:
"""重新索引整个项目"""
conn = self._get_conn()
stats = {"transcripts": 0, "entities": 0, "relations": 0, "errors": 0}
@@ -808,7 +807,7 @@ class SemanticSearch:
"""检查语义搜索是否可用"""
return self.model is not None and SENTENCE_TRANSFORMERS_AVAILABLE
def generate_embedding(self, text: str) -> Optional[List[float]]:
def generate_embedding(self, text: str) -> list[float] | None:
"""
生成文本的 embedding 向量
@@ -878,9 +877,9 @@ class SemanticSearch:
print(f"索引 embedding 失败: {e}")
return False
def search(self, query: str, project_id: Optional[str] = None,
content_types: Optional[List[str]] = None,
top_k: int = 10, threshold: float = 0.5) -> List[SemanticSearchResult]:
def search(self, query: str, project_id: str | None = None,
content_types: list[str] | None = None,
top_k: int = 10, threshold: float = 0.5) -> list[SemanticSearchResult]:
"""
语义搜索
@@ -959,7 +958,7 @@ class SemanticSearch:
results.sort(key=lambda x: x.similarity, reverse=True)
return results[:top_k]
def _get_content_text(self, content_id: str, content_type: str) -> Optional[str]:
def _get_content_text(self, content_id: str, content_type: str) -> str | None:
"""获取内容文本"""
conn = self._get_conn()
@@ -1002,7 +1001,7 @@ class SemanticSearch:
return None
def find_similar_content(self, content_id: str, content_type: str,
top_k: int = 5) -> List[SemanticSearchResult]:
top_k: int = 5) -> list[SemanticSearchResult]:
"""
查找与指定内容相似的内容
@@ -1107,7 +1106,7 @@ class EntityPathDiscovery:
def find_shortest_path(self, source_entity_id: str,
target_entity_id: str,
max_depth: int = 5) -> Optional[EntityPath]:
max_depth: int = 5) -> EntityPath | None:
"""
查找两个实体之间的最短路径BFS算法
@@ -1181,7 +1180,7 @@ class EntityPathDiscovery:
def find_all_paths(self, source_entity_id: str,
target_entity_id: str,
max_depth: int = 4,
max_paths: int = 10) -> List[EntityPath]:
max_paths: int = 10) -> list[EntityPath]:
"""
查找两个实体之间的所有路径(限制数量和深度)
@@ -1211,7 +1210,7 @@ class EntityPathDiscovery:
paths = []
def dfs(current_id: str, target_id: str,
path: List[str], visited: Set[str], depth: int):
path: list[str], visited: set[str], depth: int):
if depth > max_depth:
return
@@ -1247,7 +1246,7 @@ class EntityPathDiscovery:
# 构建路径对象
return [self._build_path_object(path, project_id) for path in paths]
def _build_path_object(self, entity_ids: List[str],
def _build_path_object(self, entity_ids: list[str],
project_id: str) -> EntityPath:
"""构建路径对象"""
conn = self._get_conn()
@@ -1312,7 +1311,7 @@ class EntityPathDiscovery:
)
def find_multi_hop_relations(self, entity_id: str,
max_hops: int = 3) -> List[Dict]:
max_hops: int = 3) -> list[dict]:
"""
查找实体的多跳关系
@@ -1394,7 +1393,7 @@ class EntityPathDiscovery:
return relations
def _get_path_to_entity(self, source_id: str, target_id: str,
project_id: str, conn: sqlite3.Connection) -> List[str]:
project_id: str, conn: sqlite3.Connection) -> list[str]:
"""获取从源实体到目标实体的路径(简化版)"""
# BFS 找路径
visited = {source_id}
@@ -1428,7 +1427,7 @@ class EntityPathDiscovery:
return []
def generate_path_visualization(self, path: EntityPath) -> Dict:
def generate_path_visualization(self, path: EntityPath) -> dict:
"""
生成路径可视化数据
@@ -1467,7 +1466,7 @@ class EntityPathDiscovery:
"confidence": path.confidence
}
def analyze_path_centrality(self, project_id: str) -> List[Dict]:
def analyze_path_centrality(self, project_id: str) -> list[dict]:
"""
分析项目中实体的路径中心性(桥接程度)
@@ -1558,7 +1557,7 @@ class KnowledgeGapDetection:
conn.row_factory = sqlite3.Row
return conn
def analyze_project(self, project_id: str) -> List[KnowledgeGap]:
def analyze_project(self, project_id: str) -> list[KnowledgeGap]:
"""
分析项目中的知识缺口
@@ -1591,7 +1590,7 @@ class KnowledgeGapDetection:
return gaps
def _check_entity_attribute_completeness(self, project_id: str) -> List[KnowledgeGap]:
def _check_entity_attribute_completeness(self, project_id: str) -> list[KnowledgeGap]:
"""检查实体属性完整性"""
conn = self._get_conn()
gaps = []
@@ -1661,7 +1660,7 @@ class KnowledgeGapDetection:
conn.close()
return gaps
def _check_relation_sparsity(self, project_id: str) -> List[KnowledgeGap]:
def _check_relation_sparsity(self, project_id: str) -> list[KnowledgeGap]:
"""检查关系稀疏度"""
conn = self._get_conn()
gaps = []
@@ -1720,7 +1719,7 @@ class KnowledgeGapDetection:
conn.close()
return gaps
def _check_isolated_entities(self, project_id: str) -> List[KnowledgeGap]:
def _check_isolated_entities(self, project_id: str) -> list[KnowledgeGap]:
"""检查孤立实体(没有任何关系)"""
conn = self._get_conn()
gaps = []
@@ -1756,7 +1755,7 @@ class KnowledgeGapDetection:
conn.close()
return gaps
def _check_incomplete_entities(self, project_id: str) -> List[KnowledgeGap]:
def _check_incomplete_entities(self, project_id: str) -> list[KnowledgeGap]:
"""检查不完整实体(缺少名称、类型或定义)"""
conn = self._get_conn()
gaps = []
@@ -1788,7 +1787,7 @@ class KnowledgeGapDetection:
conn.close()
return gaps
def _check_missing_key_entities(self, project_id: str) -> List[KnowledgeGap]:
def _check_missing_key_entities(self, project_id: str) -> list[KnowledgeGap]:
"""检查可能缺失的关键实体"""
conn = self._get_conn()
gaps = []
@@ -1841,7 +1840,7 @@ class KnowledgeGapDetection:
conn.close()
return gaps[:10] # 限制数量
def generate_completeness_report(self, project_id: str) -> Dict:
def generate_completeness_report(self, project_id: str) -> dict:
"""
生成知识完整性报告
@@ -1898,7 +1897,7 @@ class KnowledgeGapDetection:
"recommendations": self._generate_recommendations(gaps)
}
def _generate_recommendations(self, gaps: List[KnowledgeGap]) -> List[str]:
def _generate_recommendations(self, gaps: list[KnowledgeGap]) -> list[str]:
"""生成改进建议"""
recommendations = []
@@ -1941,8 +1940,8 @@ class SearchManager:
self.path_discovery = EntityPathDiscovery(db_path)
self.gap_detection = KnowledgeGapDetection(db_path)
def hybrid_search(self, query: str, project_id: Optional[str] = None,
limit: int = 20) -> Dict:
def hybrid_search(self, query: str, project_id: str | None = None,
limit: int = 20) -> dict:
"""
混合搜索(全文 + 语义)
@@ -2014,7 +2013,7 @@ class SearchManager:
"results": results[:limit]
}
def index_project(self, project_id: str) -> Dict:
def index_project(self, project_id: str) -> dict:
"""
为项目建立所有索引
@@ -2071,7 +2070,7 @@ class SearchManager:
"semantic": semantic_stats
}
def get_search_stats(self, project_id: Optional[str] = None) -> Dict:
def get_search_stats(self, project_id: str | None = None) -> dict:
"""获取搜索统计信息"""
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
@@ -2126,28 +2125,28 @@ def get_search_manager(db_path: str = "insightflow.db") -> SearchManager:
# 便捷函数
def fulltext_search(query: str, project_id: Optional[str] = None,
limit: int = 20) -> List[SearchResult]:
def fulltext_search(query: str, project_id: str | None = None,
limit: int = 20) -> list[SearchResult]:
"""全文搜索便捷函数"""
manager = get_search_manager()
return manager.fulltext_search.search(query, project_id, limit=limit)
def semantic_search(query: str, project_id: Optional[str] = None,
top_k: int = 10) -> List[SemanticSearchResult]:
def semantic_search(query: str, project_id: str | None = None,
top_k: int = 10) -> list[SemanticSearchResult]:
"""语义搜索便捷函数"""
manager = get_search_manager()
return manager.semantic_search.search(query, project_id, top_k=top_k)
def find_entity_path(source_id: str, target_id: str,
max_depth: int = 5) -> Optional[EntityPath]:
max_depth: int = 5) -> EntityPath | None:
"""查找实体路径便捷函数"""
manager = get_search_manager()
return manager.path_discovery.find_shortest_path(source_id, target_id, max_depth)
def detect_knowledge_gaps(project_id: str) -> List[KnowledgeGap]:
def detect_knowledge_gaps(project_id: str) -> list[KnowledgeGap]:
"""知识缺口检测便捷函数"""
manager = get_search_manager()
return manager.gap_detection.analyze_project(project_id)