fix: auto-fix code issues (cron)

- 修复重复导入/字段
- 修复异常处理
- 修复PEP8格式问题
- 添加类型注解
- 修复缺失的urllib.parse导入
This commit is contained in:
OpenClaw Bot
2026-02-28 06:03:09 +08:00
parent ff83cab6c7
commit fe3d64a1d2
41 changed files with 4501 additions and 1176 deletions

View File

@@ -7,6 +7,7 @@ Document Processor - Phase 3
import io
import os
class DocumentProcessor:
"""文档处理器 - 提取 PDF/DOCX 文本"""
@@ -33,7 +34,9 @@ class DocumentProcessor:
ext = os.path.splitext(filename.lower())[1]
if ext not in self.supported_formats:
raise ValueError(f"Unsupported file format: {ext}. Supported: {list(self.supported_formats.keys())}")
raise ValueError(
f"Unsupported file format: {ext}. Supported: {list(self.supported_formats.keys())}"
)
extractor = self.supported_formats[ext]
text = extractor(content)
@@ -71,7 +74,9 @@ class DocumentProcessor:
text_parts.append(page_text)
return "\n\n".join(text_parts)
except ImportError:
raise ImportError("PDF processing requires PyPDF2 or pdfplumber. Install with: pip install PyPDF2")
raise ImportError(
"PDF processing requires PyPDF2 or pdfplumber. Install with: pip install PyPDF2"
)
except Exception as e:
raise ValueError(f"PDF extraction failed: {str(e)}")
@@ -100,7 +105,9 @@ class DocumentProcessor:
return "\n\n".join(text_parts)
except ImportError:
raise ImportError("DOCX processing requires python-docx. Install with: pip install python-docx")
raise ImportError(
"DOCX processing requires python-docx. Install with: pip install python-docx"
)
except Exception as e:
raise ValueError(f"DOCX extraction failed: {str(e)}")
@@ -149,6 +156,7 @@ class DocumentProcessor:
ext = os.path.splitext(filename.lower())[1]
return ext in self.supported_formats
# 简单的文本提取器(不需要外部依赖)
class SimpleTextExtractor:
"""简单的文本提取器,用于测试"""
@@ -165,6 +173,7 @@ class SimpleTextExtractor:
return content.decode("latin-1", errors="ignore")
if __name__ == "__main__":
# 测试
processor = DocumentProcessor()