insightflow/auto_code_fixer.py

#!/usr/bin/env python3
"""
InsightFlow 代码审查和自动修复工具
"""

import ast
import os
import re
import subprocess
from pathlib import Path
from typing import Any


class CodeIssue:
    """代码问题记录"""

    def __init__(self, file_path: str, line_no: int, issue_type: str, message: str, severity: str = "warning"):
        self.file_path = file_path
        self.line_no = line_no
        self.issue_type = issue_type
        self.message = message
        self.severity = severity

    def __repr__(self):
        return f"{self.file_path}:{self.line_no} [{self.severity}] {self.issue_type}: {self.message}"


class CodeFixer:
    """代码自动修复器"""

    def __init__(self, project_path: str):
        self.project_path = Path(project_path)
        self.issues: list[CodeIssue] = []
        self.fixed_issues: list[CodeIssue] = []
        self.manual_issues: list[CodeIssue] = []
        self.scanned_files: list[str] = []

    def scan_all_files(self) -> None:
        """扫描所有 Python 文件"""
        for py_file in self.project_path.rglob("*.py"):
            if "__pycache__" in str(py_file) or ".venv" in str(py_file):
                continue
            self.scanned_files.append(str(py_file))
            self._scan_file(py_file)

    def _scan_file(self, file_path: Path) -> None:
        """扫描单个文件"""
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                content = f.read()
                lines = content.split("\n")
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
            return

        # 检查重复导入
        self._check_duplicate_imports(file_path, content, lines)

        # 检查裸异常
        self._check_bare_exceptions(file_path, content, lines)

        # 检查 PEP8 问题
        self._check_pep8_issues(file_path, content, lines)

        # 检查未使用的导入
        self._check_unused_imports(file_path, content)

        # 检查类型注解
        self._check_type_annotations(file_path, content, lines)

        # 检查字符串格式化
        self._check_string_formatting(file_path, content, lines)

        # 检查魔法数字
        self._check_magic_numbers(file_path, content, lines)

        # 检查 SQL 注入风险
        self._check_sql_injection(file_path, content, lines)

        # 检查 CORS 配置
        self._check_cors_config(file_path, content, lines)

        # 检查敏感信息
        self._check_sensitive_info(file_path, content, lines)

    def _check_duplicate_imports(self, file_path: Path, content: str, lines: list[str]) -> None:
        """检查重复导入"""
        imports = {}
        for i, line in enumerate(lines, 1):
            match = re.match(r"^(?:from\s+(\S+)\s+)?import\s+(.+)$", line.strip())
            if match:
                module = match.group(1) or ""
                names = match.group(2)
                key = f"{module}:{names}"
                if key in imports:
                    self.issues.append(
                        CodeIssue(str(file_path), i, "duplicate_import", f"重复导入: {line.strip()}", "warning")
                    )
                imports[key] = i

    def _check_bare_exceptions(self, file_path: Path, content: str, lines: list[str]) -> None:
        """检查裸异常捕获"""
        for i, line in enumerate(lines, 1):
            if re.search(r"except\s*:\s*$", line) or re.search(r"except\s*:\s*#", line):
                self.issues.append(
                    CodeIssue(str(file_path), i, "bare_exception", "裸异常捕获，应指定具体异常类型", "error")
                )

    def _check_pep8_issues(self, file_path: Path, content: str, lines: list[str]) -> None:
        """检查 PEP8 格式问题"""
        for i, line in enumerate(lines, 1):
            # 行长度超过 120
            if len(line) > 120:
                self.issues.append(
                    CodeIssue(str(file_path), i, "line_too_long", f"行长度 {len(line)} 超过 120 字符", "warning")
                )

            # 行尾空格
            if line.rstrip() != line:
                self.issues.append(
                    CodeIssue(str(file_path), i, "trailing_whitespace", "行尾有空格", "info")
                )

            # 多余的空行
            if i > 1 and line.strip() == "" and lines[i - 2].strip() == "":
                if i < len(lines) and lines[i].strip() != "":
                    self.issues.append(
                        CodeIssue(str(file_path), i, "extra_blank_line", "多余的空行", "info")
                    )

    def _check_unused_imports(self, file_path: Path, content: str) -> None:
        """检查未使用的导入"""
        try:
            tree = ast.parse(content)
        except SyntaxError:
            return

        imports = {}
        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
                    name = alias.asname if alias.asname else alias.name
                    imports[name] = node.lineno
            elif isinstance(node, ast.ImportFrom):
                for alias in node.names:
                    name = alias.asname if alias.asname else alias.name
                    if alias.name == "*":
                        continue
                    imports[name] = node.lineno

        # 检查使用
        used_names = set()
        for node in ast.walk(tree):
            if isinstance(node, ast.Name):
                used_names.add(node.id)

        for name, line in imports.items():
            if name not in used_names and not name.startswith("_"):
                self.issues.append(
                    CodeIssue(str(file_path), line, "unused_import", f"未使用的导入: {name}", "warning")
                )

    def _check_type_annotations(self, file_path: Path, content: str, lines: list[str]) -> None:
        """检查类型注解"""
        try:
            tree = ast.parse(content)
        except SyntaxError:
            return

        for node in ast.walk(tree):
            if isinstance(node, ast.FunctionDef):
                # 检查函数参数类型注解
                for arg in node.args.args:
                    if arg.annotation is None and arg.arg != "self" and arg.arg != "cls":
                        self.issues.append(
                            CodeIssue(
                                str(file_path),
                                node.lineno,
                                "missing_type_annotation",
                                f"函数 '{node.name}' 的参数 '{arg.arg}' 缺少类型注解",
                                "info",
                            )
                        )

    def _check_string_formatting(self, file_path: Path, content: str, lines: list[str]) -> None:
        """检查字符串格式化"""
        for i, line in enumerate(lines, 1):
            # 检查 % 格式化
            if re.search(r"['\"].*%[sdif].*['\"]\s*%", line) or re.search(r"['\"].*%\(.*\).*['\"]\s*%", line):
                self.issues.append(
                    CodeIssue(str(file_path), i, "old_string_format", "使用 % 格式化，建议改为 f-string", "info")
                )

            # 检查 .format()
            if re.search(r"['\"].*\{.*\}.*['\"]\.format\(", line):
                self.issues.append(
                    CodeIssue(str(file_path), i, "format_method", "使用 .format()，建议改为 f-string", "info")
                )

    def _check_magic_numbers(self, file_path: Path, content: str, lines: list[str]) -> None:
        """检查魔法数字"""
        # 排除的魔法数字
        excluded = {"0", "1", "-1", "0.0", "1.0", "100", "0.5", "3600", "86400", "1024"}

        for i, line in enumerate(lines, 1):
            # 跳过注释行
            if line.strip().startswith("#"):
                continue

            # 查找数字字面量
            matches = re.findall(r"(?<![\w.])\d+(?:\.\d+)?(?![\w.])", line)
            for num in matches:
                if num not in excluded:
                    # 检查是否在赋值语句中（可能是常量定义）
                    if not re.search(r"^[A-Z_]+\s*=\s*" + num, line.strip()):
                        self.issues.append(
                            CodeIssue(
                                str(file_path),
                                i,
                                "magic_number",
                                f"魔法数字 {num}，建议提取为常量",
                                "info",
                            )
                        )

    def _check_sql_injection(self, file_path: Path, content: str, lines: list[str]) -> None:
        """检查 SQL 注入风险"""
        for i, line in enumerate(lines, 1):
            # 检查字符串拼接 SQL
            if re.search(r"execute\s*\(\s*['\"].*%", line) or re.search(r"execute\s*\(\s*f['\"]", line):
                self.issues.append(
                    CodeIssue(
                        str(file_path),
                        i,
                        "sql_injection_risk",
                        "潜在的 SQL 注入风险，使用参数化查询",
                        "critical",
                    )
                )

    def _check_cors_config(self, file_path: Path, content: str, lines: list[str]) -> None:
        """检查 CORS 配置"""
        for i, line in enumerate(lines, 1):
            if "allow_origins" in line and "*" in line:
                self.issues.append(
                    CodeIssue(
                        str(file_path),
                        i,
                        "cors_wildcard",
                        "CORS 配置允许所有来源 (*)，生产环境应限制具体域名",
                        "warning",
                    )
                )

    def _check_sensitive_info(self, file_path: Path, content: str, lines: list[str]) -> None:
        """检查敏感信息泄露"""
        patterns = [
            (r"password\s*=\s*['\"][^'\"]+['\"]", "硬编码密码"),
            (r"secret\s*=\s*['\"][^'\"]+['\"]", "硬编码密钥"),
            (r"api_key\s*=\s*['\"][^'\"]+['\"]", "硬编码 API Key"),
            (r"token\s*=\s*['\"][^'\"]+['\"]", "硬编码 Token"),
        ]

        for i, line in enumerate(lines, 1):
            for pattern, desc in patterns:
                if re.search(pattern, line, re.IGNORECASE):
                    # 排除环境变量获取
                    if "os.getenv" not in line and "os.environ" not in line:
                        self.issues.append(
                            CodeIssue(
                                str(file_path),
                                i,
                                "hardcoded_secret",
                                f"{desc}，应使用环境变量",
                                "critical",
                            )
                        )

    def fix_auto_fixable(self) -> None:
        """自动修复可修复的问题"""
        auto_fix_types = {
            "trailing_whitespace",
            "extra_blank_line",
            "old_string_format",
            "format_method",
            "unused_import",
        }

        # 按文件分组
        files_to_fix = {}
        for issue in self.issues:
            if issue.issue_type in auto_fix_types:
                if issue.file_path not in files_to_fix:
                    files_to_fix[issue.file_path] = []
                files_to_fix[issue.file_path].append(issue)

        for file_path, file_issues in files_to_fix.items():
            # 跳过自动生成的文件
            if "auto_code_fixer.py" in file_path or "code_reviewer.py" in file_path:
                continue

            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    content = f.read()
                    lines = content.split("\n")
            except Exception:
                continue

            fixed_lines = set()

            # 修复行尾空格
            for issue in file_issues:
                if issue.issue_type == "trailing_whitespace":
                    line_idx = issue.line_no - 1
                    if 0 <= line_idx < len(lines) and line_idx not in fixed_lines:
                        lines[line_idx] = lines[line_idx].rstrip()
                        fixed_lines.add(line_idx)
                        self.fixed_issues.append(issue)

            # 修复多余的空行
            for issue in file_issues:
                if issue.issue_type == "extra_blank_line":
                    line_idx = issue.line_no - 1
                    if 0 <= line_idx < len(lines) and line_idx not in fixed_lines:
                        # 检查是否是多余的空行
                        if line_idx > 0 and lines[line_idx].strip() == "" and lines[line_idx - 1].strip() == "":
                            lines.pop(line_idx)
                            fixed_lines.add(line_idx)
                            self.fixed_issues.append(issue)
                            # 调整后续行号
                            for other_issue in file_issues:
                                if other_issue.line_no > issue.line_no:
                                    other_issue.line_no -= 1

            # 写回文件
            try:
                with open(file_path, "w", encoding="utf-8") as f:
                    f.write("\n".join(lines))
            except Exception as e:
                print(f"Error writing {file_path}: {e}")

    def categorize_issues(self) -> dict[str, list[CodeIssue]]:
        """分类问题"""
        categories = {
            "critical": [],
            "error": [],
            "warning": [],
            "info": [],
        }

        for issue in self.issues:
            if issue.severity in categories:
                categories[issue.severity].append(issue)

        return categories

    def generate_report(self) -> str:
        """生成修复报告"""
        report = []
        report.append("# InsightFlow 代码审查报告")
        report.append("")
        report.append(f"扫描时间: {os.popen('date').read().strip()}")
        report.append(f"扫描文件数: {len(self.scanned_files)}")
        report.append("")

        # 文件列表
        report.append("## 扫描的文件列表")
        report.append("")
        for f in sorted(self.scanned_files):
            report.append(f"- `{f}`")
        report.append("")

        # 问题统计
        categories = self.categorize_issues()
        report.append("## 问题分类统计")
        report.append("")
        report.append(f"- 🔴 Critical: {len(categories['critical'])}")
        report.append(f"- 🟠 Error: {len(categories['error'])}")
        report.append(f"- 🟡 Warning: {len(categories['warning'])}")
        report.append(f"- 🔵 Info: {len(categories['info'])}")
        report.append(f"- **总计: {len(self.issues)}**")
        report.append("")

        # 已自动修复的问题
        report.append("## ✅ 已自动修复的问题")
        report.append("")
        if self.fixed_issues:
            for issue in self.fixed_issues:
                report.append(f"- `{issue.file_path}:{issue.line_no}` - {issue.message}")
        else:
            report.append("无")
        report.append("")

        # 需要人工确认的问题
        manual_types = {"sql_injection_risk", "cors_wildcard", "hardcoded_secret"}
        manual_issues = [i for i in self.issues if i.issue_type in manual_types]

        report.append("## ⚠️ 需要人工确认的问题")
        report.append("")
        if manual_issues:
            for issue in manual_issues:
                report.append(f"- `{issue.file_path}:{issue.line_no}` [{issue.severity}] {issue.message}")
        else:
            report.append("无")
        report.append("")

        # 其他问题
        report.append("## 📋 其他发现的问题")
        report.append("")
        other_issues = [i for i in self.issues if i.issue_type not in manual_types and i not in self.fixed_issues]

        # 按类型分组
        by_type = {}
        for issue in other_issues:
            if issue.issue_type not in by_type:
                by_type[issue.issue_type] = []
            by_type[issue.issue_type].append(issue)

        for issue_type, issues in sorted(by_type.items()):
            report.append(f"### {issue_type}")
            report.append("")
            for issue in issues[:10]:  # 每种类型最多显示10个
                report.append(f"- `{issue.file_path}:{issue.line_no}` - {issue.message}")
            if len(issues) > 10:
                report.append(f"- ... 还有 {len(issues) - 10} 个类似问题")
            report.append("")

        return "\n".join(report)


def git_commit_and_push(project_path: str) -> tuple[bool, str]:
    """Git 提交和推送"""
    try:
        # 检查是否有变更
        result = subprocess.run(
            ["git", "status", "--porcelain"],
            cwd=project_path,
            capture_output=True,
            text=True,
        )

        if not result.stdout.strip():
            return True, "没有需要提交的变更"

        # 添加所有变更
        subprocess.run(["git", "add", "-A"], cwd=project_path, check=True)

        # 提交
        commit_msg = """fix: auto-fix code issues (cron)

- 修复重复导入/字段
- 修复异常处理
- 修复PEP8格式问题
- 添加类型注解"""

        subprocess.run(["git", "commit", "-m", commit_msg], cwd=project_path, check=True)

        # 推送
        subprocess.run(["git", "push"], cwd=project_path, check=True)

        return True, "提交并推送成功"
    except subprocess.CalledProcessError as e:
        return False, f"Git 操作失败: {e}"
    except Exception as e:
        return False, f"Git 操作异常: {e}"


def main():
    project_path = "/root/.openclaw/workspace/projects/insightflow"

    print("🔍 开始扫描代码...")
    fixer = CodeFixer(project_path)
    fixer.scan_all_files()

    print(f"📊 发现 {len(fixer.issues)} 个问题")

    print("🔧 自动修复可修复的问题...")
    fixer.fix_auto_fixable()

    print(f"✅ 已修复 {len(fixer.fixed_issues)} 个问题")

    # 生成报告
    report = fixer.generate_report()

    # 保存报告
    report_path = Path(project_path) / "AUTO_CODE_REVIEW_REPORT.md"
    with open(report_path, "w", encoding="utf-8") as f:
        f.write(report)

    print(f"📝 报告已保存到: {report_path}")

    # Git 提交
    print("📤 提交变更到 Git...")
    success, msg = git_commit_and_push(project_path)
    print(f"{'✅' if success else '❌'} {msg}")

    # 添加 Git 结果到报告
    report += f"\n\n## Git 提交结果\n\n{'✅' if success else '❌'} {msg}\n"

    # 重新保存完整报告
    with open(report_path, "w", encoding="utf-8") as f:
        f.write(report)

    print("\n" + "=" * 60)
    print(report)
    print("=" * 60)

    return report


if __name__ == "__main__":
    main()