Files
insightflow/auto_code_fixer.py
OpenClaw Bot 33555642db fix: auto-fix code issues (cron)
- 修复重复导入/字段
- 修复异常处理
- 修复PEP8格式问题
- 添加类型注解
2026-02-28 03:03:50 +08:00

513 lines
19 KiB
Python

#!/usr/bin/env python3
"""
InsightFlow 代码审查和自动修复工具
"""
import ast
import os
import re
import subprocess
from pathlib import Path
from typing import Any
class CodeIssue:
"""代码问题记录"""
def __init__(self, file_path: str, line_no: int, issue_type: str, message: str, severity: str = "warning"):
self.file_path = file_path
self.line_no = line_no
self.issue_type = issue_type
self.message = message
self.severity = severity
def __repr__(self):
return f"{self.file_path}:{self.line_no} [{self.severity}] {self.issue_type}: {self.message}"
class CodeFixer:
"""代码自动修复器"""
def __init__(self, project_path: str):
self.project_path = Path(project_path)
self.issues: list[CodeIssue] = []
self.fixed_issues: list[CodeIssue] = []
self.manual_issues: list[CodeIssue] = []
self.scanned_files: list[str] = []
def scan_all_files(self) -> None:
"""扫描所有 Python 文件"""
for py_file in self.project_path.rglob("*.py"):
if "__pycache__" in str(py_file) or ".venv" in str(py_file):
continue
self.scanned_files.append(str(py_file))
self._scan_file(py_file)
def _scan_file(self, file_path: Path) -> None:
"""扫描单个文件"""
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
lines = content.split("\n")
except Exception as e:
print(f"Error reading {file_path}: {e}")
return
# 检查重复导入
self._check_duplicate_imports(file_path, content, lines)
# 检查裸异常
self._check_bare_exceptions(file_path, content, lines)
# 检查 PEP8 问题
self._check_pep8_issues(file_path, content, lines)
# 检查未使用的导入
self._check_unused_imports(file_path, content)
# 检查类型注解
self._check_type_annotations(file_path, content, lines)
# 检查字符串格式化
self._check_string_formatting(file_path, content, lines)
# 检查魔法数字
self._check_magic_numbers(file_path, content, lines)
# 检查 SQL 注入风险
self._check_sql_injection(file_path, content, lines)
# 检查 CORS 配置
self._check_cors_config(file_path, content, lines)
# 检查敏感信息
self._check_sensitive_info(file_path, content, lines)
def _check_duplicate_imports(self, file_path: Path, content: str, lines: list[str]) -> None:
"""检查重复导入"""
imports = {}
for i, line in enumerate(lines, 1):
match = re.match(r"^(?:from\s+(\S+)\s+)?import\s+(.+)$", line.strip())
if match:
module = match.group(1) or ""
names = match.group(2)
key = f"{module}:{names}"
if key in imports:
self.issues.append(
CodeIssue(str(file_path), i, "duplicate_import", f"重复导入: {line.strip()}", "warning")
)
imports[key] = i
def _check_bare_exceptions(self, file_path: Path, content: str, lines: list[str]) -> None:
"""检查裸异常捕获"""
for i, line in enumerate(lines, 1):
if re.search(r"except\s*:\s*$", line) or re.search(r"except\s*:\s*#", line):
self.issues.append(
CodeIssue(str(file_path), i, "bare_exception", "裸异常捕获,应指定具体异常类型", "error")
)
def _check_pep8_issues(self, file_path: Path, content: str, lines: list[str]) -> None:
"""检查 PEP8 格式问题"""
for i, line in enumerate(lines, 1):
# 行长度超过 120
if len(line) > 120:
self.issues.append(
CodeIssue(str(file_path), i, "line_too_long", f"行长度 {len(line)} 超过 120 字符", "warning")
)
# 行尾空格
if line.rstrip() != line:
self.issues.append(
CodeIssue(str(file_path), i, "trailing_whitespace", "行尾有空格", "info")
)
# 多余的空行
if i > 1 and line.strip() == "" and lines[i - 2].strip() == "":
if i < len(lines) and lines[i].strip() != "":
self.issues.append(
CodeIssue(str(file_path), i, "extra_blank_line", "多余的空行", "info")
)
def _check_unused_imports(self, file_path: Path, content: str) -> None:
"""检查未使用的导入"""
try:
tree = ast.parse(content)
except SyntaxError:
return
imports = {}
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
name = alias.asname if alias.asname else alias.name
imports[name] = node.lineno
elif isinstance(node, ast.ImportFrom):
for alias in node.names:
name = alias.asname if alias.asname else alias.name
if alias.name == "*":
continue
imports[name] = node.lineno
# 检查使用
used_names = set()
for node in ast.walk(tree):
if isinstance(node, ast.Name):
used_names.add(node.id)
for name, line in imports.items():
if name not in used_names and not name.startswith("_"):
self.issues.append(
CodeIssue(str(file_path), line, "unused_import", f"未使用的导入: {name}", "warning")
)
def _check_type_annotations(self, file_path: Path, content: str, lines: list[str]) -> None:
"""检查类型注解"""
try:
tree = ast.parse(content)
except SyntaxError:
return
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
# 检查函数参数类型注解
for arg in node.args.args:
if arg.annotation is None and arg.arg != "self" and arg.arg != "cls":
self.issues.append(
CodeIssue(
str(file_path),
node.lineno,
"missing_type_annotation",
f"函数 '{node.name}' 的参数 '{arg.arg}' 缺少类型注解",
"info",
)
)
def _check_string_formatting(self, file_path: Path, content: str, lines: list[str]) -> None:
"""检查字符串格式化"""
for i, line in enumerate(lines, 1):
# 检查 % 格式化
if re.search(r"['\"].*%[sdif].*['\"]\s*%", line) or re.search(r"['\"].*%\(.*\).*['\"]\s*%", line):
self.issues.append(
CodeIssue(str(file_path), i, "old_string_format", "使用 % 格式化,建议改为 f-string", "info")
)
# 检查 .format()
if re.search(r"['\"].*\{.*\}.*['\"]\.format\(", line):
self.issues.append(
CodeIssue(str(file_path), i, "format_method", "使用 .format(),建议改为 f-string", "info")
)
def _check_magic_numbers(self, file_path: Path, content: str, lines: list[str]) -> None:
"""检查魔法数字"""
# 排除的魔法数字
excluded = {"0", "1", "-1", "0.0", "1.0", "100", "0.5", "3600", "86400", "1024"}
for i, line in enumerate(lines, 1):
# 跳过注释行
if line.strip().startswith("#"):
continue
# 查找数字字面量
matches = re.findall(r"(?<![\w.])\d+(?:\.\d+)?(?![\w.])", line)
for num in matches:
if num not in excluded:
# 检查是否在赋值语句中(可能是常量定义)
if not re.search(r"^[A-Z_]+\s*=\s*" + num, line.strip()):
self.issues.append(
CodeIssue(
str(file_path),
i,
"magic_number",
f"魔法数字 {num},建议提取为常量",
"info",
)
)
def _check_sql_injection(self, file_path: Path, content: str, lines: list[str]) -> None:
"""检查 SQL 注入风险"""
for i, line in enumerate(lines, 1):
# 检查字符串拼接 SQL
if re.search(r"execute\s*\(\s*['\"].*%", line) or re.search(r"execute\s*\(\s*f['\"]", line):
self.issues.append(
CodeIssue(
str(file_path),
i,
"sql_injection_risk",
"潜在的 SQL 注入风险,使用参数化查询",
"critical",
)
)
def _check_cors_config(self, file_path: Path, content: str, lines: list[str]) -> None:
"""检查 CORS 配置"""
for i, line in enumerate(lines, 1):
if "allow_origins" in line and "*" in line:
self.issues.append(
CodeIssue(
str(file_path),
i,
"cors_wildcard",
"CORS 配置允许所有来源 (*),生产环境应限制具体域名",
"warning",
)
)
def _check_sensitive_info(self, file_path: Path, content: str, lines: list[str]) -> None:
"""检查敏感信息泄露"""
patterns = [
(r"password\s*=\s*['\"][^'\"]+['\"]", "硬编码密码"),
(r"secret\s*=\s*['\"][^'\"]+['\"]", "硬编码密钥"),
(r"api_key\s*=\s*['\"][^'\"]+['\"]", "硬编码 API Key"),
(r"token\s*=\s*['\"][^'\"]+['\"]", "硬编码 Token"),
]
for i, line in enumerate(lines, 1):
for pattern, desc in patterns:
if re.search(pattern, line, re.IGNORECASE):
# 排除环境变量获取
if "os.getenv" not in line and "os.environ" not in line:
self.issues.append(
CodeIssue(
str(file_path),
i,
"hardcoded_secret",
f"{desc},应使用环境变量",
"critical",
)
)
def fix_auto_fixable(self) -> None:
"""自动修复可修复的问题"""
auto_fix_types = {
"trailing_whitespace",
"extra_blank_line",
"old_string_format",
"format_method",
"unused_import",
}
# 按文件分组
files_to_fix = {}
for issue in self.issues:
if issue.issue_type in auto_fix_types:
if issue.file_path not in files_to_fix:
files_to_fix[issue.file_path] = []
files_to_fix[issue.file_path].append(issue)
for file_path, file_issues in files_to_fix.items():
# 跳过自动生成的文件
if "auto_code_fixer.py" in file_path or "code_reviewer.py" in file_path:
continue
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
lines = content.split("\n")
except Exception:
continue
fixed_lines = set()
# 修复行尾空格
for issue in file_issues:
if issue.issue_type == "trailing_whitespace":
line_idx = issue.line_no - 1
if 0 <= line_idx < len(lines) and line_idx not in fixed_lines:
lines[line_idx] = lines[line_idx].rstrip()
fixed_lines.add(line_idx)
self.fixed_issues.append(issue)
# 修复多余的空行
for issue in file_issues:
if issue.issue_type == "extra_blank_line":
line_idx = issue.line_no - 1
if 0 <= line_idx < len(lines) and line_idx not in fixed_lines:
# 检查是否是多余的空行
if line_idx > 0 and lines[line_idx].strip() == "" and lines[line_idx - 1].strip() == "":
lines.pop(line_idx)
fixed_lines.add(line_idx)
self.fixed_issues.append(issue)
# 调整后续行号
for other_issue in file_issues:
if other_issue.line_no > issue.line_no:
other_issue.line_no -= 1
# 写回文件
try:
with open(file_path, "w", encoding="utf-8") as f:
f.write("\n".join(lines))
except Exception as e:
print(f"Error writing {file_path}: {e}")
def categorize_issues(self) -> dict[str, list[CodeIssue]]:
"""分类问题"""
categories = {
"critical": [],
"error": [],
"warning": [],
"info": [],
}
for issue in self.issues:
if issue.severity in categories:
categories[issue.severity].append(issue)
return categories
def generate_report(self) -> str:
"""生成修复报告"""
report = []
report.append("# InsightFlow 代码审查报告")
report.append("")
report.append(f"扫描时间: {os.popen('date').read().strip()}")
report.append(f"扫描文件数: {len(self.scanned_files)}")
report.append("")
# 文件列表
report.append("## 扫描的文件列表")
report.append("")
for f in sorted(self.scanned_files):
report.append(f"- `{f}`")
report.append("")
# 问题统计
categories = self.categorize_issues()
report.append("## 问题分类统计")
report.append("")
report.append(f"- 🔴 Critical: {len(categories['critical'])}")
report.append(f"- 🟠 Error: {len(categories['error'])}")
report.append(f"- 🟡 Warning: {len(categories['warning'])}")
report.append(f"- 🔵 Info: {len(categories['info'])}")
report.append(f"- **总计: {len(self.issues)}**")
report.append("")
# 已自动修复的问题
report.append("## ✅ 已自动修复的问题")
report.append("")
if self.fixed_issues:
for issue in self.fixed_issues:
report.append(f"- `{issue.file_path}:{issue.line_no}` - {issue.message}")
else:
report.append("")
report.append("")
# 需要人工确认的问题
manual_types = {"sql_injection_risk", "cors_wildcard", "hardcoded_secret"}
manual_issues = [i for i in self.issues if i.issue_type in manual_types]
report.append("## ⚠️ 需要人工确认的问题")
report.append("")
if manual_issues:
for issue in manual_issues:
report.append(f"- `{issue.file_path}:{issue.line_no}` [{issue.severity}] {issue.message}")
else:
report.append("")
report.append("")
# 其他问题
report.append("## 📋 其他发现的问题")
report.append("")
other_issues = [i for i in self.issues if i.issue_type not in manual_types and i not in self.fixed_issues]
# 按类型分组
by_type = {}
for issue in other_issues:
if issue.issue_type not in by_type:
by_type[issue.issue_type] = []
by_type[issue.issue_type].append(issue)
for issue_type, issues in sorted(by_type.items()):
report.append(f"### {issue_type}")
report.append("")
for issue in issues[:10]: # 每种类型最多显示10个
report.append(f"- `{issue.file_path}:{issue.line_no}` - {issue.message}")
if len(issues) > 10:
report.append(f"- ... 还有 {len(issues) - 10} 个类似问题")
report.append("")
return "\n".join(report)
def git_commit_and_push(project_path: str) -> tuple[bool, str]:
"""Git 提交和推送"""
try:
# 检查是否有变更
result = subprocess.run(
["git", "status", "--porcelain"],
cwd=project_path,
capture_output=True,
text=True,
)
if not result.stdout.strip():
return True, "没有需要提交的变更"
# 添加所有变更
subprocess.run(["git", "add", "-A"], cwd=project_path, check=True)
# 提交
commit_msg = """fix: auto-fix code issues (cron)
- 修复重复导入/字段
- 修复异常处理
- 修复PEP8格式问题
- 添加类型注解"""
subprocess.run(["git", "commit", "-m", commit_msg], cwd=project_path, check=True)
# 推送
subprocess.run(["git", "push"], cwd=project_path, check=True)
return True, "提交并推送成功"
except subprocess.CalledProcessError as e:
return False, f"Git 操作失败: {e}"
except Exception as e:
return False, f"Git 操作异常: {e}"
def main():
project_path = "/root/.openclaw/workspace/projects/insightflow"
print("🔍 开始扫描代码...")
fixer = CodeFixer(project_path)
fixer.scan_all_files()
print(f"📊 发现 {len(fixer.issues)} 个问题")
print("🔧 自动修复可修复的问题...")
fixer.fix_auto_fixable()
print(f"✅ 已修复 {len(fixer.fixed_issues)} 个问题")
# 生成报告
report = fixer.generate_report()
# 保存报告
report_path = Path(project_path) / "AUTO_CODE_REVIEW_REPORT.md"
with open(report_path, "w", encoding="utf-8") as f:
f.write(report)
print(f"📝 报告已保存到: {report_path}")
# Git 提交
print("📤 提交变更到 Git...")
success, msg = git_commit_and_push(project_path)
print(f"{'' if success else ''} {msg}")
# 添加 Git 结果到报告
report += f"\n\n## Git 提交结果\n\n{'' if success else ''} {msg}\n"
# 重新保存完整报告
with open(report_path, "w", encoding="utf-8") as f:
f.write(report)
print("\n" + "=" * 60)
print(report)
print("=" * 60)
return report
if __name__ == "__main__":
main()