fix: auto-fix code issues (cron)
- 修复重复导入/字段 - 修复异常处理 - 修复PEP8格式问题 - 添加类型注解 - 修复缺失的urllib.parse导入
This commit is contained in:
@@ -14,6 +14,7 @@ try:
|
||||
except ImportError:
|
||||
NUMPY_AVAILABLE = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class MultimodalEntity:
|
||||
"""多模态实体"""
|
||||
@@ -32,6 +33,7 @@ class MultimodalEntity:
|
||||
if self.modality_features is None:
|
||||
self.modality_features = {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class EntityLink:
|
||||
"""实体关联"""
|
||||
@@ -46,6 +48,7 @@ class EntityLink:
|
||||
confidence: float
|
||||
evidence: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class AlignmentResult:
|
||||
"""对齐结果"""
|
||||
@@ -56,6 +59,7 @@ class AlignmentResult:
|
||||
match_type: str # exact, fuzzy, embedding
|
||||
confidence: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class FusionResult:
|
||||
"""知识融合结果"""
|
||||
@@ -66,11 +70,17 @@ class FusionResult:
|
||||
source_modalities: list[str]
|
||||
confidence: float
|
||||
|
||||
|
||||
class MultimodalEntityLinker:
|
||||
"""多模态实体关联器 - 跨模态实体对齐和知识融合"""
|
||||
|
||||
# 关联类型
|
||||
LINK_TYPES = {"same_as": "同一实体", "related_to": "相关实体", "part_of": "组成部分", "mentions": "提及关系"}
|
||||
LINK_TYPES = {
|
||||
"same_as": "同一实体",
|
||||
"related_to": "相关实体",
|
||||
"part_of": "组成部分",
|
||||
"mentions": "提及关系",
|
||||
}
|
||||
|
||||
# 模态类型
|
||||
MODALITIES = ["audio", "video", "image", "document"]
|
||||
@@ -123,7 +133,9 @@ class MultimodalEntityLinker:
|
||||
(相似度, 匹配类型)
|
||||
"""
|
||||
# 名称相似度
|
||||
name_sim = self.calculate_string_similarity(entity1.get("name", ""), entity2.get("name", ""))
|
||||
name_sim = self.calculate_string_similarity(
|
||||
entity1.get("name", ""), entity2.get("name", "")
|
||||
)
|
||||
|
||||
# 如果名称完全匹配
|
||||
if name_sim == 1.0:
|
||||
@@ -142,7 +154,9 @@ class MultimodalEntityLinker:
|
||||
return 0.95, "alias_match"
|
||||
|
||||
# 定义相似度
|
||||
def_sim = self.calculate_string_similarity(entity1.get("definition", ""), entity2.get("definition", ""))
|
||||
def_sim = self.calculate_string_similarity(
|
||||
entity1.get("definition", ""), entity2.get("definition", "")
|
||||
)
|
||||
|
||||
# 综合相似度
|
||||
combined_sim = name_sim * 0.7 + def_sim * 0.3
|
||||
@@ -301,7 +315,9 @@ class MultimodalEntityLinker:
|
||||
fused_properties["contexts"].append(mention.get("mention_context"))
|
||||
|
||||
# 选择最佳定义(最长的那个)
|
||||
best_definition = max(fused_properties["definitions"], key=len) if fused_properties["definitions"] else ""
|
||||
best_definition = (
|
||||
max(fused_properties["definitions"], key=len) if fused_properties["definitions"] else ""
|
||||
)
|
||||
|
||||
# 选择最佳名称(最常见的那个)
|
||||
from collections import Counter
|
||||
@@ -374,7 +390,9 @@ class MultimodalEntityLinker:
|
||||
|
||||
return conflicts
|
||||
|
||||
def suggest_entity_merges(self, entities: list[dict], existing_links: list[EntityLink] = None) -> list[dict]:
|
||||
def suggest_entity_merges(
|
||||
self, entities: list[dict], existing_links: list[EntityLink] = None
|
||||
) -> list[dict]:
|
||||
"""
|
||||
建议实体合并
|
||||
|
||||
@@ -489,12 +507,16 @@ class MultimodalEntityLinker:
|
||||
"total_multimodal_records": len(multimodal_entities),
|
||||
"unique_entities": len(entity_modalities),
|
||||
"cross_modal_entities": cross_modal_count,
|
||||
"cross_modal_ratio": cross_modal_count / len(entity_modalities) if entity_modalities else 0,
|
||||
"cross_modal_ratio": cross_modal_count / len(entity_modalities)
|
||||
if entity_modalities
|
||||
else 0,
|
||||
}
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_multimodal_entity_linker = None
|
||||
|
||||
|
||||
def get_multimodal_entity_linker(similarity_threshold: float = 0.85) -> MultimodalEntityLinker:
|
||||
"""获取多模态实体关联器单例"""
|
||||
global _multimodal_entity_linker
|
||||
|
||||
Reference in New Issue
Block a user