fix: auto-fix code issues (cron)

- 修复重复导入/字段 - 修复异常处理 - 修复PEP8格式问题 - 修复语法错误（运算符空格问题） - 修复类型注解格式
2026-03-02 06:09:49 +08:00
parent b83265e5fd
commit e23f1fec08
84 changed files with 9492 additions and 9491 deletions
--- a/backend/multimodal_processor.py
+++ b/backend/multimodal_processor.py
@@ -13,30 +13,30 @@ from dataclasses import dataclass
 from pathlib import Path

 # Constants
-UUID_LENGTH = 8  # UUID 截断长度
+UUID_LENGTH  = 8  # UUID 截断长度

 # 尝试导入OCR库
 try:
    import pytesseract
    from PIL import Image

-    PYTESSERACT_AVAILABLE = True
+    PYTESSERACT_AVAILABLE  = True
 except ImportError:
-    PYTESSERACT_AVAILABLE = False
+    PYTESSERACT_AVAILABLE  = False

 try:
    import cv2

-    CV2_AVAILABLE = True
+    CV2_AVAILABLE  = True
 except ImportError:
-    CV2_AVAILABLE = False
+    CV2_AVAILABLE  = False

 try:
    import ffmpeg

-    FFMPEG_AVAILABLE = True
+    FFMPEG_AVAILABLE  = True
 except ImportError:
-    FFMPEG_AVAILABLE = False
+    FFMPEG_AVAILABLE  = False


@dataclass
@@ -48,13 +48,13 @@ class VideoFrame:
    frame_number: int
    timestamp: float
    frame_path: str
-    ocr_text: str = ""
-    ocr_confidence: float = 0.0
-    entities_detected: list[dict] = None
+    ocr_text: str  = ""
+    ocr_confidence: float  = 0.0
+    entities_detected: list[dict]  = None

-    def __post_init__(self):
+    def __post_init__(self) -> None:
        if self.entities_detected is None:
-            self.entities_detected = []
+            self.entities_detected  = []


@dataclass
@@ -65,20 +65,20 @@ class VideoInfo:
    project_id: str
    filename: str
    file_path: str
-    duration: float = 0.0
-    width: int = 0
-    height: int = 0
-    fps: float = 0.0
-    audio_extracted: bool = False
-    audio_path: str = ""
-    transcript_id: str = ""
-    status: str = "pending"
-    error_message: str = ""
-    metadata: dict = None
+    duration: float  = 0.0
+    width: int  = 0
+    height: int  = 0
+    fps: float  = 0.0
+    audio_extracted: bool  = False
+    audio_path: str  = ""
+    transcript_id: str  = ""
+    status: str  = "pending"
+    error_message: str  = ""
+    metadata: dict  = None

-    def __post_init__(self):
+    def __post_init__(self) -> None:
        if self.metadata is None:
-            self.metadata = {}
+            self.metadata  = {}


@dataclass
@@ -91,13 +91,13 @@ class VideoProcessingResult:
    ocr_results: list[dict]
    full_text: str  # 整合的文本（音频转录 + OCR文本）
    success: bool
-    error_message: str = ""
+    error_message: str  = ""


 class MultimodalProcessor:
    """多模态处理器 - 处理视频文件"""

-    def __init__(self, temp_dir: str = None, frame_interval: int = 5) -> None:
+    def __init__(self, temp_dir: str  = None, frame_interval: int  = 5) -> None:
        """
        初始化多模态处理器

@@ -105,16 +105,16 @@ class MultimodalProcessor:
            temp_dir: 临时文件目录
            frame_interval: 关键帧提取间隔（秒）
        """
-        self.temp_dir = temp_dir or tempfile.gettempdir()
-        self.frame_interval = frame_interval
-        self.video_dir = os.path.join(self.temp_dir, "videos")
-        self.frames_dir = os.path.join(self.temp_dir, "frames")
-        self.audio_dir = os.path.join(self.temp_dir, "audio")
+        self.temp_dir  = temp_dir or tempfile.gettempdir()
+        self.frame_interval  = frame_interval
+        self.video_dir  = os.path.join(self.temp_dir, "videos")
+        self.frames_dir  = os.path.join(self.temp_dir, "frames")
+        self.audio_dir  = os.path.join(self.temp_dir, "audio")

        # 创建目录
-        os.makedirs(self.video_dir, exist_ok=True)
-        os.makedirs(self.frames_dir, exist_ok=True)
-        os.makedirs(self.audio_dir, exist_ok=True)
+        os.makedirs(self.video_dir, exist_ok = True)
+        os.makedirs(self.frames_dir, exist_ok = True)
+        os.makedirs(self.audio_dir, exist_ok = True)

    def extract_video_info(self, video_path: str) -> dict:
        """
@@ -128,11 +128,11 @@ class MultimodalProcessor:
        """
        try:
            if FFMPEG_AVAILABLE:
-                probe = ffmpeg.probe(video_path)
-                video_stream = next(
+                probe  = ffmpeg.probe(video_path)
+                video_stream  = next(
                    (s for s in probe["streams"] if s["codec_type"] == "video"), None
                )
-                audio_stream = next(
+                audio_stream  = next(
                    (s for s in probe["streams"] if s["codec_type"] == "audio"), None
                )

@@ -147,21 +147,21 @@ class MultimodalProcessor:
                    }
            else:
                # 使用 ffprobe 命令行
-                cmd = [
+                cmd  = [
                    "ffprobe",
                    "-v",
                    "error",
                    "-show_entries",
-                    "format=duration,bit_rate",
+                    "format = duration, bit_rate",
                    "-show_entries",
-                    "stream=width,height,r_frame_rate",
+                    "stream = width, height, r_frame_rate",
                    "-of",
                    "json",
                    video_path,
                ]
-                result = subprocess.run(cmd, capture_output=True, text=True)
+                result  = subprocess.run(cmd, capture_output = True, text = True)
                if result.returncode == 0:
-                    data = json.loads(result.stdout)
+                    data  = json.loads(result.stdout)
                    return {
                        "duration": float(data["format"].get("duration", 0)),
                        "width": int(data["streams"][0].get("width", 0)) if data["streams"] else 0,
@@ -177,7 +177,7 @@ class MultimodalProcessor:

        return {"duration": 0, "width": 0, "height": 0, "fps": 0, "has_audio": False, "bitrate": 0}

-    def extract_audio(self, video_path: str, output_path: str = None) -> str:
+    def extract_audio(self, video_path: str, output_path: str  = None) -> str:
        """
        从视频中提取音频

@@ -189,20 +189,20 @@ class MultimodalProcessor:
            提取的音频文件路径
        """
        if output_path is None:
-            video_name = Path(video_path).stem
-            output_path = os.path.join(self.audio_dir, f"{video_name}.wav")
+            video_name  = Path(video_path).stem
+            output_path  = os.path.join(self.audio_dir, f"{video_name}.wav")

        try:
            if FFMPEG_AVAILABLE:
                (
                    ffmpeg.input(video_path)
-                    .output(output_path, ac=1, ar=16000, vn=None)
+                    .output(output_path, ac = 1, ar = 16000, vn = None)
                    .overwrite_output()
-                    .run(quiet=True)
+                    .run(quiet = True)
                )
            else:
                # 使用命令行 ffmpeg
-                cmd = [
+                cmd  = [
                    "ffmpeg",
                    "-i",
                    video_path,
@@ -216,14 +216,14 @@ class MultimodalProcessor:
                    "-y",
                    output_path,
                ]
-                subprocess.run(cmd, check=True, capture_output=True)
+                subprocess.run(cmd, check = True, capture_output = True)

            return output_path
        except Exception as e:
            print(f"Error extracting audio: {e}")
            raise

-    def extract_keyframes(self, video_path: str, video_id: str, interval: int = None) -> list[str]:
+    def extract_keyframes(self, video_path: str, video_id: str, interval: int  = None) -> list[str]:
        """
        从视频中提取关键帧

@@ -235,31 +235,31 @@ class MultimodalProcessor:
        Returns:
            提取的帧文件路径列表
        """
-        interval = interval or self.frame_interval
-        frame_paths = []
+        interval  = interval or self.frame_interval
+        frame_paths  = []

        # 创建帧存储目录
-        video_frames_dir = os.path.join(self.frames_dir, video_id)
-        os.makedirs(video_frames_dir, exist_ok=True)
+        video_frames_dir  = os.path.join(self.frames_dir, video_id)
+        os.makedirs(video_frames_dir, exist_ok = True)

        try:
            if CV2_AVAILABLE:
                # 使用 OpenCV 提取帧
-                cap = cv2.VideoCapture(video_path)
-                fps = cap.get(cv2.CAP_PROP_FPS)
+                cap  = cv2.VideoCapture(video_path)
+                fps  = cap.get(cv2.CAP_PROP_FPS)
                int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

-                frame_interval_frames = int(fps * interval)
-                frame_number = 0
+                frame_interval_frames  = int(fps * interval)
+                frame_number  = 0

                while True:
-                    ret, frame = cap.read()
+                    ret, frame  = cap.read()
                    if not ret:
                        break

                    if frame_number % frame_interval_frames == 0:
-                        timestamp = frame_number / fps
-                        frame_path = os.path.join(
+                        timestamp  = frame_number / fps
+                        frame_path  = os.path.join(
                            video_frames_dir, f"frame_{frame_number:06d}_{timestamp:.2f}.jpg"
                        )
                        cv2.imwrite(frame_path, frame)
@@ -271,23 +271,23 @@ class MultimodalProcessor:
            else:
                # 使用 ffmpeg 命令行提取帧
                Path(video_path).stem
-                output_pattern = os.path.join(video_frames_dir, "frame_%06d_%t.jpg")
+                output_pattern  = os.path.join(video_frames_dir, "frame_%06d_%t.jpg")

-                cmd = [
+                cmd  = [
                    "ffmpeg",
                    "-i",
                    video_path,
                    "-vf",
-                    f"fps=1/{interval}",
+                    f"fps = 1/{interval}",
                    "-frame_pts",
                    "1",
                    "-y",
                    output_pattern,
                ]
-                subprocess.run(cmd, check=True, capture_output=True)
+                subprocess.run(cmd, check = True, capture_output = True)

                # 获取生成的帧文件列表
-                frame_paths = sorted(
+                frame_paths  = sorted(
                    [
                        os.path.join(video_frames_dir, f)
                        for f in os.listdir(video_frames_dir)
@@ -313,19 +313,19 @@ class MultimodalProcessor:
            return "", 0.0

        try:
-            image = Image.open(image_path)
+            image  = Image.open(image_path)

            # 预处理：转换为灰度图
            if image.mode != "L":
-                image = image.convert("L")
+                image  = image.convert("L")

            # 使用 pytesseract 进行 OCR
-            text = pytesseract.image_to_string(image, lang="chi_sim+eng")
+            text  = pytesseract.image_to_string(image, lang = "chi_sim+eng")

            # 获取置信度数据
-            data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
-            confidences = [int(c) for c in data["conf"] if int(c) > 0]
-            avg_confidence = sum(confidences) / len(confidences) if confidences else 0
+            data  = pytesseract.image_to_data(image, output_type = pytesseract.Output.DICT)
+            confidences  = [int(c) for c in data["conf"] if int(c) > 0]
+            avg_confidence  = sum(confidences) / len(confidences) if confidences else 0

            return text.strip(), avg_confidence / 100.0
        except Exception as e:
@@ -333,7 +333,7 @@ class MultimodalProcessor:
            return "", 0.0

    def process_video(
-        self, video_data: bytes, filename: str, project_id: str, video_id: str = None
+        self, video_data: bytes, filename: str, project_id: str, video_id: str  = None
    ) -> VideoProcessingResult:
        """
        处理视频文件：提取音频、关键帧、OCR
@@ -347,48 +347,48 @@ class MultimodalProcessor:
        Returns:
            视频处理结果
        """
-        video_id = video_id or str(uuid.uuid4())[:UUID_LENGTH]
+        video_id  = video_id or str(uuid.uuid4())[:UUID_LENGTH]

        try:
            # 保存视频文件
-            video_path = os.path.join(self.video_dir, f"{video_id}_{filename}")
+            video_path  = os.path.join(self.video_dir, f"{video_id}_{filename}")
            with open(video_path, "wb") as f:
                f.write(video_data)

            # 提取视频信息
-            video_info = self.extract_video_info(video_path)
+            video_info  = self.extract_video_info(video_path)

            # 提取音频
-            audio_path = ""
+            audio_path  = ""
            if video_info["has_audio"]:
-                audio_path = self.extract_audio(video_path)
+                audio_path  = self.extract_audio(video_path)

            # 提取关键帧
-            frame_paths = self.extract_keyframes(video_path, video_id)
+            frame_paths  = self.extract_keyframes(video_path, video_id)

            # 对关键帧进行 OCR
-            frames = []
-            ocr_results = []
-            all_ocr_text = []
+            frames  = []
+            ocr_results  = []
+            all_ocr_text  = []

            for i, frame_path in enumerate(frame_paths):
                # 解析帧信息
-                frame_name = os.path.basename(frame_path)
-                parts = frame_name.replace(".jpg", "").split("_")
-                frame_number = int(parts[1]) if len(parts) > 1 else i
-                timestamp = float(parts[2]) if len(parts) > 2 else i * self.frame_interval
+                frame_name  = os.path.basename(frame_path)
+                parts  = frame_name.replace(".jpg", "").split("_")
+                frame_number  = int(parts[1]) if len(parts) > 1 else i
+                timestamp  = float(parts[2]) if len(parts) > 2 else i * self.frame_interval

                # OCR 识别
-                ocr_text, confidence = self.perform_ocr(frame_path)
+                ocr_text, confidence  = self.perform_ocr(frame_path)

-                frame = VideoFrame(
-                    id=str(uuid.uuid4())[:UUID_LENGTH],
-                    video_id=video_id,
-                    frame_number=frame_number,
-                    timestamp=timestamp,
-                    frame_path=frame_path,
-                    ocr_text=ocr_text,
-                    ocr_confidence=confidence,
+                frame  = VideoFrame(
+                    id = str(uuid.uuid4())[:UUID_LENGTH],
+                    video_id = video_id,
+                    frame_number = frame_number,
+                    timestamp = timestamp,
+                    frame_path = frame_path,
+                    ocr_text = ocr_text,
+                    ocr_confidence = confidence,
                )
                frames.append(frame)

@@ -404,29 +404,29 @@ class MultimodalProcessor:
                    all_ocr_text.append(ocr_text)

            # 整合所有 OCR 文本
-            full_ocr_text = "\n\n".join(all_ocr_text)
+            full_ocr_text  = "\n\n".join(all_ocr_text)

            return VideoProcessingResult(
-                video_id=video_id,
-                audio_path=audio_path,
-                frames=frames,
-                ocr_results=ocr_results,
-                full_text=full_ocr_text,
-                success=True,
+                video_id = video_id,
+                audio_path = audio_path,
+                frames = frames,
+                ocr_results = ocr_results,
+                full_text = full_ocr_text,
+                success = True,
            )

        except Exception as e:
            return VideoProcessingResult(
-                video_id=video_id,
-                audio_path="",
-                frames=[],
-                ocr_results=[],
-                full_text="",
-                success=False,
-                error_message=str(e),
+                video_id = video_id,
+                audio_path = "",
+                frames = [],
+                ocr_results = [],
+                full_text = "",
+                success = False,
+                error_message = str(e),
            )

-    def cleanup(self, video_id: str = None) -> None:
+    def cleanup(self, video_id: str  = None) -> None:
        """
        清理临时文件

@@ -438,7 +438,7 @@ class MultimodalProcessor:
        if video_id:
            # 清理特定视频的文件
            for dir_path in [self.video_dir, self.frames_dir, self.audio_dir]:
-                target_dir = (
+                target_dir  = (
                    os.path.join(dir_path, video_id) if dir_path == self.frames_dir else dir_path
                )
                if os.path.exists(target_dir):
@@ -450,16 +450,16 @@ class MultimodalProcessor:
            for dir_path in [self.video_dir, self.frames_dir, self.audio_dir]:
                if os.path.exists(dir_path):
                    shutil.rmtree(dir_path)
-                    os.makedirs(dir_path, exist_ok=True)
+                    os.makedirs(dir_path, exist_ok = True)


 # Singleton instance
-_multimodal_processor = None
+_multimodal_processor  = None


-def get_multimodal_processor(temp_dir: str = None, frame_interval: int = 5) -> MultimodalProcessor:
+def get_multimodal_processor(temp_dir: str  = None, frame_interval: int  = 5) -> MultimodalProcessor:
    """获取多模态处理器单例"""
    global _multimodal_processor
    if _multimodal_processor is None:
-        _multimodal_processor = MultimodalProcessor(temp_dir, frame_interval)
+        _multimodal_processor  = MultimodalProcessor(temp_dir, frame_interval)
    return _multimodal_processor