fix: auto-fix code issues (cron)

- 修复重复导入/字段
- 修复异常处理
- 修复PEP8格式问题
- 添加类型注解
This commit is contained in:
AutoFix Bot
2026-03-02 12:14:39 +08:00
parent e23f1fec08
commit 98527c4de4
39 changed files with 8109 additions and 8147 deletions

View File

@@ -12,8 +12,8 @@ import httpx
import numpy as np
# API Keys
KIMI_API_KEY = os.getenv("KIMI_API_KEY", "")
KIMI_BASE_URL = os.getenv("KIMI_BASE_URL", "https://api.kimi.com/coding")
KIMI_API_KEY = os.getenv("KIMI_API_KEY", "")
KIMI_BASE_URL = os.getenv("KIMI_BASE_URL", "https://api.kimi.com/coding")
@dataclass
@@ -27,9 +27,9 @@ class EntityEmbedding:
class EntityAligner:
"""实体对齐器 - 使用 embedding 进行相似度匹配"""
def __init__(self, similarity_threshold: float = 0.85) -> None:
self.similarity_threshold = similarity_threshold
self.embedding_cache: dict[str, list[float]] = {}
def __init__(self, similarity_threshold: float = 0.85) -> None:
self.similarity_threshold = similarity_threshold
self.embedding_cache: dict[str, list[float]] = {}
def get_embedding(self, text: str) -> list[float] | None:
"""
@@ -45,25 +45,25 @@ class EntityAligner:
return None
# 检查缓存
cache_key = hash(text)
cache_key = hash(text)
if cache_key in self.embedding_cache:
return self.embedding_cache[cache_key]
try:
response = httpx.post(
response = httpx.post(
f"{KIMI_BASE_URL}/v1/embeddings",
headers = {
headers={
"Authorization": f"Bearer {KIMI_API_KEY}",
"Content-Type": "application/json",
},
json = {"model": "k2p5", "input": text[:500]}, # 限制长度
timeout = 30.0,
json={"model": "k2p5", "input": text[:500]}, # 限制长度
timeout=30.0,
)
response.raise_for_status()
result = response.json()
result = response.json()
embedding = result["data"][0]["embedding"]
self.embedding_cache[cache_key] = embedding
embedding = result["data"][0]["embedding"]
self.embedding_cache[cache_key] = embedding
return embedding
except (httpx.HTTPError, json.JSONDecodeError, KeyError) as e:
@@ -81,20 +81,20 @@ class EntityAligner:
Returns:
相似度分数 (0-1)
"""
vec1 = np.array(embedding1)
vec2 = np.array(embedding2)
vec1 = np.array(embedding1)
vec2 = np.array(embedding2)
# 余弦相似度
dot_product = np.dot(vec1, vec2)
norm1 = np.linalg.norm(vec1)
norm2 = np.linalg.norm(vec2)
dot_product = np.dot(vec1, vec2)
norm1 = np.linalg.norm(vec1)
norm2 = np.linalg.norm(vec2)
if norm1 == 0 or norm2 == 0:
return 0.0
return float(dot_product / (norm1 * norm2))
def get_entity_text(self, name: str, definition: str = "") -> str:
def get_entity_text(self, name: str, definition: str = "") -> str:
"""
构建用于 embedding 的实体文本
@@ -113,9 +113,9 @@ class EntityAligner:
self,
project_id: str,
name: str,
definition: str = "",
exclude_id: str | None = None,
threshold: float | None = None,
definition: str = "",
exclude_id: str | None = None,
threshold: float | None = None,
) -> object | None:
"""
查找相似的实体
@@ -131,54 +131,54 @@ class EntityAligner:
相似的实体或 None
"""
if threshold is None:
threshold = self.similarity_threshold
threshold = self.similarity_threshold
try:
from db_manager import get_db_manager
db = get_db_manager()
db = get_db_manager()
except ImportError:
return None
# 获取项目的所有实体
entities = db.get_all_entities_for_embedding(project_id)
entities = db.get_all_entities_for_embedding(project_id)
if not entities:
return None
# 获取查询实体的 embedding
query_text = self.get_entity_text(name, definition)
query_embedding = self.get_embedding(query_text)
query_text = self.get_entity_text(name, definition)
query_embedding = self.get_embedding(query_text)
if query_embedding is None:
# 如果 embedding API 失败,回退到简单匹配
return self._fallback_similarity_match(entities, name, exclude_id)
best_match = None
best_score = threshold
best_match = None
best_score = threshold
for entity in entities:
if exclude_id and entity.id == exclude_id:
continue
# 获取实体的 embedding
entity_text = self.get_entity_text(entity.name, entity.definition)
entity_embedding = self.get_embedding(entity_text)
entity_text = self.get_entity_text(entity.name, entity.definition)
entity_embedding = self.get_embedding(entity_text)
if entity_embedding is None:
continue
# 计算相似度
similarity = self.compute_similarity(query_embedding, entity_embedding)
similarity = self.compute_similarity(query_embedding, entity_embedding)
if similarity > best_score:
best_score = similarity
best_match = entity
best_score = similarity
best_match = entity
return best_match
def _fallback_similarity_match(
self, entities: list[object], name: str, exclude_id: str | None = None
self, entities: list[object], name: str, exclude_id: str | None = None
) -> object | None:
"""
回退到简单的相似度匹配(不使用 embedding
@@ -191,7 +191,7 @@ class EntityAligner:
Returns:
最相似的实体或 None
"""
name_lower = name.lower()
name_lower = name.lower()
# 1. 精确匹配
for entity in entities:
@@ -212,7 +212,7 @@ class EntityAligner:
return None
def batch_align_entities(
self, project_id: str, new_entities: list[dict], threshold: float | None = None
self, project_id: str, new_entities: list[dict], threshold: float | None = None
) -> list[dict]:
"""
批量对齐实体
@@ -226,16 +226,16 @@ class EntityAligner:
对齐结果列表 [{"new_entity": {...}, "matched_entity": {...}, "similarity": 0.9}]
"""
if threshold is None:
threshold = self.similarity_threshold
threshold = self.similarity_threshold
results = []
results = []
for new_ent in new_entities:
matched = self.find_similar_entity(
project_id, new_ent["name"], new_ent.get("definition", ""), threshold = threshold
matched = self.find_similar_entity(
project_id, new_ent["name"], new_ent.get("definition", ""), threshold=threshold
)
result = {
result = {
"new_entity": new_ent,
"matched_entity": None,
"similarity": 0.0,
@@ -244,28 +244,28 @@ class EntityAligner:
if matched:
# 计算相似度
query_text = self.get_entity_text(new_ent["name"], new_ent.get("definition", ""))
matched_text = self.get_entity_text(matched.name, matched.definition)
query_text = self.get_entity_text(new_ent["name"], new_ent.get("definition", ""))
matched_text = self.get_entity_text(matched.name, matched.definition)
query_emb = self.get_embedding(query_text)
matched_emb = self.get_embedding(matched_text)
query_emb = self.get_embedding(query_text)
matched_emb = self.get_embedding(matched_text)
if query_emb and matched_emb:
similarity = self.compute_similarity(query_emb, matched_emb)
result["matched_entity"] = {
similarity = self.compute_similarity(query_emb, matched_emb)
result["matched_entity"] = {
"id": matched.id,
"name": matched.name,
"type": matched.type,
"definition": matched.definition,
}
result["similarity"] = similarity
result["should_merge"] = similarity >= threshold
result["similarity"] = similarity
result["should_merge"] = similarity >= threshold
results.append(result)
return results
def suggest_entity_aliases(self, entity_name: str, entity_definition: str = "") -> list[str]:
def suggest_entity_aliases(self, entity_name: str, entity_definition: str = "") -> list[str]:
"""
使用 LLM 建议实体的别名
@@ -279,7 +279,7 @@ class EntityAligner:
if not KIMI_API_KEY:
return []
prompt = f"""为以下实体生成可能的别名或简称:
prompt = f"""为以下实体生成可能的别名或简称:
实体名称:{entity_name}
定义:{entity_definition}
@@ -290,28 +290,28 @@ class EntityAligner:
只返回 JSON不要其他内容。"""
try:
response = httpx.post(
response = httpx.post(
f"{KIMI_BASE_URL}/v1/chat/completions",
headers = {
headers={
"Authorization": f"Bearer {KIMI_API_KEY}",
"Content-Type": "application/json",
},
json = {
json={
"model": "k2p5",
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.3,
},
timeout = 30.0,
timeout=30.0,
)
response.raise_for_status()
result = response.json()
content = result["choices"][0]["message"]["content"]
result = response.json()
content = result["choices"][0]["message"]["content"]
import re
json_match = re.search(r"\{{.*?\}}", content, re.DOTALL)
json_match = re.search(r"\{{.*?\}}", content, re.DOTALL)
if json_match:
data = json.loads(json_match.group())
data = json.loads(json_match.group())
return data.get("aliases", [])
except (httpx.HTTPError, json.JSONDecodeError, KeyError) as e:
print(f"Alias suggestion failed: {e}")
@@ -340,8 +340,8 @@ def simple_similarity(str1: str, str2: str) -> float:
return 0.0
# 转换为小写
s1 = str1.lower()
s2 = str2.lower()
s1 = str1.lower()
s2 = str2.lower()
# 包含关系
if s1 in s2 or s2 in s1:
@@ -355,11 +355,11 @@ def simple_similarity(str1: str, str2: str) -> float:
if __name__ == "__main__":
# 测试
aligner = EntityAligner()
aligner = EntityAligner()
# 测试 embedding
test_text = "Kubernetes 容器编排平台"
embedding = aligner.get_embedding(test_text)
test_text = "Kubernetes 容器编排平台"
embedding = aligner.get_embedding(test_text)
if embedding:
print(f"Embedding dimension: {len(embedding)}")
print(f"First 5 values: {embedding[:5]}")
@@ -367,7 +367,7 @@ if __name__ == "__main__":
print("Embedding API not available")
# 测试相似度计算
emb1 = [1.0, 0.0, 0.0]
emb2 = [0.9, 0.1, 0.0]
sim = aligner.compute_similarity(emb1, emb2)
emb1 = [1.0, 0.0, 0.0]
emb2 = [0.9, 0.1, 0.0]
sim = aligner.compute_similarity(emb1, emb2)
print(f"Similarity: {sim:.4f}")