fix: auto-fix code issues (cron)
- 修复重复导入/字段 - 修复异常处理 - 修复PEP8格式问题 - 添加类型注解
This commit is contained in:
@@ -26,7 +26,7 @@ from ops_manager import (
|
||||
)
|
||||
|
||||
# Add backend directory to path
|
||||
backend_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
backend_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
if backend_dir not in sys.path:
|
||||
sys.path.insert(0, backend_dir)
|
||||
|
||||
@@ -35,13 +35,13 @@ class TestOpsManager:
|
||||
"""测试运维与监控管理器"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.manager = get_ops_manager()
|
||||
self.tenant_id = "test_tenant_001"
|
||||
self.test_results = []
|
||||
self.manager = get_ops_manager()
|
||||
self.tenant_id = "test_tenant_001"
|
||||
self.test_results = []
|
||||
|
||||
def log(self, message: str, success: bool = True) -> None:
|
||||
def log(self, message: str, success: bool = True) -> None:
|
||||
"""记录测试结果"""
|
||||
status = "✅" if success else "❌"
|
||||
status = "✅" if success else "❌"
|
||||
print(f"{status} {message}")
|
||||
self.test_results.append((message, success))
|
||||
|
||||
@@ -79,57 +79,57 @@ class TestOpsManager:
|
||||
|
||||
try:
|
||||
# 创建阈值告警规则
|
||||
rule1 = self.manager.create_alert_rule(
|
||||
tenant_id = self.tenant_id,
|
||||
name = "CPU 使用率告警",
|
||||
description = "当 CPU 使用率超过 80% 时触发告警",
|
||||
rule_type = AlertRuleType.THRESHOLD,
|
||||
severity = AlertSeverity.P1,
|
||||
metric = "cpu_usage_percent",
|
||||
condition = ">",
|
||||
threshold = 80.0,
|
||||
duration = 300,
|
||||
evaluation_interval = 60,
|
||||
channels = [],
|
||||
labels = {"service": "api", "team": "platform"},
|
||||
annotations = {"summary": "CPU 使用率过高", "runbook": "https://wiki/runbooks/cpu"},
|
||||
created_by = "test_user",
|
||||
rule1 = self.manager.create_alert_rule(
|
||||
tenant_id=self.tenant_id,
|
||||
name="CPU 使用率告警",
|
||||
description="当 CPU 使用率超过 80% 时触发告警",
|
||||
rule_type=AlertRuleType.THRESHOLD,
|
||||
severity=AlertSeverity.P1,
|
||||
metric="cpu_usage_percent",
|
||||
condition=">",
|
||||
threshold=80.0,
|
||||
duration=300,
|
||||
evaluation_interval=60,
|
||||
channels=[],
|
||||
labels={"service": "api", "team": "platform"},
|
||||
annotations={"summary": "CPU 使用率过高", "runbook": "https://wiki/runbooks/cpu"},
|
||||
created_by="test_user",
|
||||
)
|
||||
self.log(f"Created alert rule: {rule1.name} (ID: {rule1.id})")
|
||||
|
||||
# 创建异常检测告警规则
|
||||
rule2 = self.manager.create_alert_rule(
|
||||
tenant_id = self.tenant_id,
|
||||
name = "内存异常检测",
|
||||
description = "检测内存使用异常",
|
||||
rule_type = AlertRuleType.ANOMALY,
|
||||
severity = AlertSeverity.P2,
|
||||
metric = "memory_usage_percent",
|
||||
condition = ">",
|
||||
threshold = 0.0,
|
||||
duration = 600,
|
||||
evaluation_interval = 300,
|
||||
channels = [],
|
||||
labels = {"service": "database"},
|
||||
annotations = {},
|
||||
created_by = "test_user",
|
||||
rule2 = self.manager.create_alert_rule(
|
||||
tenant_id=self.tenant_id,
|
||||
name="内存异常检测",
|
||||
description="检测内存使用异常",
|
||||
rule_type=AlertRuleType.ANOMALY,
|
||||
severity=AlertSeverity.P2,
|
||||
metric="memory_usage_percent",
|
||||
condition=">",
|
||||
threshold=0.0,
|
||||
duration=600,
|
||||
evaluation_interval=300,
|
||||
channels=[],
|
||||
labels={"service": "database"},
|
||||
annotations={},
|
||||
created_by="test_user",
|
||||
)
|
||||
self.log(f"Created anomaly alert rule: {rule2.name} (ID: {rule2.id})")
|
||||
|
||||
# 获取告警规则
|
||||
fetched_rule = self.manager.get_alert_rule(rule1.id)
|
||||
fetched_rule = self.manager.get_alert_rule(rule1.id)
|
||||
assert fetched_rule is not None
|
||||
assert fetched_rule.name == rule1.name
|
||||
self.log(f"Fetched alert rule: {fetched_rule.name}")
|
||||
|
||||
# 列出租户的所有告警规则
|
||||
rules = self.manager.list_alert_rules(self.tenant_id)
|
||||
rules = self.manager.list_alert_rules(self.tenant_id)
|
||||
assert len(rules) >= 2
|
||||
self.log(f"Listed {len(rules)} alert rules for tenant")
|
||||
|
||||
# 更新告警规则
|
||||
updated_rule = self.manager.update_alert_rule(
|
||||
rule1.id, threshold = 85.0, description = "更新后的描述"
|
||||
updated_rule = self.manager.update_alert_rule(
|
||||
rule1.id, threshold=85.0, description="更新后的描述"
|
||||
)
|
||||
assert updated_rule.threshold == 85.0
|
||||
self.log(f"Updated alert rule threshold to {updated_rule.threshold}")
|
||||
@@ -140,7 +140,7 @@ class TestOpsManager:
|
||||
self.log("Deleted test alert rules")
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Alert rules test failed: {e}", success = False)
|
||||
self.log(f"Alert rules test failed: {e}", success=False)
|
||||
|
||||
def test_alert_channels(self) -> None:
|
||||
"""测试告警渠道管理"""
|
||||
@@ -148,49 +148,49 @@ class TestOpsManager:
|
||||
|
||||
try:
|
||||
# 创建飞书告警渠道
|
||||
channel1 = self.manager.create_alert_channel(
|
||||
tenant_id = self.tenant_id,
|
||||
name = "飞书告警",
|
||||
channel_type = AlertChannelType.FEISHU,
|
||||
config = {
|
||||
channel1 = self.manager.create_alert_channel(
|
||||
tenant_id=self.tenant_id,
|
||||
name="飞书告警",
|
||||
channel_type=AlertChannelType.FEISHU,
|
||||
config={
|
||||
"webhook_url": "https://open.feishu.cn/open-apis/bot/v2/hook/test",
|
||||
"secret": "test_secret",
|
||||
},
|
||||
severity_filter = ["p0", "p1"],
|
||||
severity_filter=["p0", "p1"],
|
||||
)
|
||||
self.log(f"Created Feishu channel: {channel1.name} (ID: {channel1.id})")
|
||||
|
||||
# 创建钉钉告警渠道
|
||||
channel2 = self.manager.create_alert_channel(
|
||||
tenant_id = self.tenant_id,
|
||||
name = "钉钉告警",
|
||||
channel_type = AlertChannelType.DINGTALK,
|
||||
config = {
|
||||
channel2 = self.manager.create_alert_channel(
|
||||
tenant_id=self.tenant_id,
|
||||
name="钉钉告警",
|
||||
channel_type=AlertChannelType.DINGTALK,
|
||||
config={
|
||||
"webhook_url": "https://oapi.dingtalk.com/robot/send?access_token = test",
|
||||
"secret": "test_secret",
|
||||
},
|
||||
severity_filter = ["p0", "p1", "p2"],
|
||||
severity_filter=["p0", "p1", "p2"],
|
||||
)
|
||||
self.log(f"Created DingTalk channel: {channel2.name} (ID: {channel2.id})")
|
||||
|
||||
# 创建 Slack 告警渠道
|
||||
channel3 = self.manager.create_alert_channel(
|
||||
tenant_id = self.tenant_id,
|
||||
name = "Slack 告警",
|
||||
channel_type = AlertChannelType.SLACK,
|
||||
config = {"webhook_url": "https://hooks.slack.com/services/test"},
|
||||
severity_filter = ["p0", "p1", "p2", "p3"],
|
||||
channel3 = self.manager.create_alert_channel(
|
||||
tenant_id=self.tenant_id,
|
||||
name="Slack 告警",
|
||||
channel_type=AlertChannelType.SLACK,
|
||||
config={"webhook_url": "https://hooks.slack.com/services/test"},
|
||||
severity_filter=["p0", "p1", "p2", "p3"],
|
||||
)
|
||||
self.log(f"Created Slack channel: {channel3.name} (ID: {channel3.id})")
|
||||
|
||||
# 获取告警渠道
|
||||
fetched_channel = self.manager.get_alert_channel(channel1.id)
|
||||
fetched_channel = self.manager.get_alert_channel(channel1.id)
|
||||
assert fetched_channel is not None
|
||||
assert fetched_channel.name == channel1.name
|
||||
self.log(f"Fetched alert channel: {fetched_channel.name}")
|
||||
|
||||
# 列出租户的所有告警渠道
|
||||
channels = self.manager.list_alert_channels(self.tenant_id)
|
||||
channels = self.manager.list_alert_channels(self.tenant_id)
|
||||
assert len(channels) >= 3
|
||||
self.log(f"Listed {len(channels)} alert channels for tenant")
|
||||
|
||||
@@ -203,7 +203,7 @@ class TestOpsManager:
|
||||
self.log("Deleted test alert channels")
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Alert channels test failed: {e}", success = False)
|
||||
self.log(f"Alert channels test failed: {e}", success=False)
|
||||
|
||||
def test_alerts(self) -> None:
|
||||
"""测试告警管理"""
|
||||
@@ -211,61 +211,61 @@ class TestOpsManager:
|
||||
|
||||
try:
|
||||
# 创建告警规则
|
||||
rule = self.manager.create_alert_rule(
|
||||
tenant_id = self.tenant_id,
|
||||
name = "测试告警规则",
|
||||
description = "用于测试的告警规则",
|
||||
rule_type = AlertRuleType.THRESHOLD,
|
||||
severity = AlertSeverity.P1,
|
||||
metric = "test_metric",
|
||||
condition = ">",
|
||||
threshold = 100.0,
|
||||
duration = 60,
|
||||
evaluation_interval = 60,
|
||||
channels = [],
|
||||
labels = {},
|
||||
annotations = {},
|
||||
created_by = "test_user",
|
||||
rule = self.manager.create_alert_rule(
|
||||
tenant_id=self.tenant_id,
|
||||
name="测试告警规则",
|
||||
description="用于测试的告警规则",
|
||||
rule_type=AlertRuleType.THRESHOLD,
|
||||
severity=AlertSeverity.P1,
|
||||
metric="test_metric",
|
||||
condition=">",
|
||||
threshold=100.0,
|
||||
duration=60,
|
||||
evaluation_interval=60,
|
||||
channels=[],
|
||||
labels={},
|
||||
annotations={},
|
||||
created_by="test_user",
|
||||
)
|
||||
|
||||
# 记录资源指标
|
||||
for i in range(10):
|
||||
self.manager.record_resource_metric(
|
||||
tenant_id = self.tenant_id,
|
||||
resource_type = ResourceType.CPU,
|
||||
resource_id = "server-001",
|
||||
metric_name = "test_metric",
|
||||
metric_value = 110.0 + i,
|
||||
unit = "percent",
|
||||
metadata = {"region": "cn-north-1"},
|
||||
tenant_id=self.tenant_id,
|
||||
resource_type=ResourceType.CPU,
|
||||
resource_id="server-001",
|
||||
metric_name="test_metric",
|
||||
metric_value=110.0 + i,
|
||||
unit="percent",
|
||||
metadata={"region": "cn-north-1"},
|
||||
)
|
||||
self.log("Recorded 10 resource metrics")
|
||||
|
||||
# 手动创建告警
|
||||
from ops_manager import Alert
|
||||
|
||||
alert_id = f"test_alert_{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
||||
now = datetime.now().isoformat()
|
||||
alert_id = f"test_alert_{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
||||
now = datetime.now().isoformat()
|
||||
|
||||
alert = Alert(
|
||||
id = alert_id,
|
||||
rule_id = rule.id,
|
||||
tenant_id = self.tenant_id,
|
||||
severity = AlertSeverity.P1,
|
||||
status = AlertStatus.FIRING,
|
||||
title = "测试告警",
|
||||
description = "这是一条测试告警",
|
||||
metric = "test_metric",
|
||||
value = 120.0,
|
||||
threshold = 100.0,
|
||||
labels = {"test": "true"},
|
||||
annotations = {},
|
||||
started_at = now,
|
||||
resolved_at = None,
|
||||
acknowledged_by = None,
|
||||
acknowledged_at = None,
|
||||
notification_sent = {},
|
||||
suppression_count = 0,
|
||||
alert = Alert(
|
||||
id=alert_id,
|
||||
rule_id=rule.id,
|
||||
tenant_id=self.tenant_id,
|
||||
severity=AlertSeverity.P1,
|
||||
status=AlertStatus.FIRING,
|
||||
title="测试告警",
|
||||
description="这是一条测试告警",
|
||||
metric="test_metric",
|
||||
value=120.0,
|
||||
threshold=100.0,
|
||||
labels={"test": "true"},
|
||||
annotations={},
|
||||
started_at=now,
|
||||
resolved_at=None,
|
||||
acknowledged_by=None,
|
||||
acknowledged_at=None,
|
||||
notification_sent={},
|
||||
suppression_count=0,
|
||||
)
|
||||
|
||||
with self.manager._get_db() as conn:
|
||||
@@ -299,20 +299,20 @@ class TestOpsManager:
|
||||
self.log(f"Created test alert: {alert.id}")
|
||||
|
||||
# 列出租户的告警
|
||||
alerts = self.manager.list_alerts(self.tenant_id)
|
||||
alerts = self.manager.list_alerts(self.tenant_id)
|
||||
assert len(alerts) >= 1
|
||||
self.log(f"Listed {len(alerts)} alerts for tenant")
|
||||
|
||||
# 确认告警
|
||||
self.manager.acknowledge_alert(alert_id, "test_user")
|
||||
fetched_alert = self.manager.get_alert(alert_id)
|
||||
fetched_alert = self.manager.get_alert(alert_id)
|
||||
assert fetched_alert.status == AlertStatus.ACKNOWLEDGED
|
||||
assert fetched_alert.acknowledged_by == "test_user"
|
||||
self.log(f"Acknowledged alert: {alert_id}")
|
||||
|
||||
# 解决告警
|
||||
self.manager.resolve_alert(alert_id)
|
||||
fetched_alert = self.manager.get_alert(alert_id)
|
||||
fetched_alert = self.manager.get_alert(alert_id)
|
||||
assert fetched_alert.status == AlertStatus.RESOLVED
|
||||
assert fetched_alert.resolved_at is not None
|
||||
self.log(f"Resolved alert: {alert_id}")
|
||||
@@ -326,7 +326,7 @@ class TestOpsManager:
|
||||
self.log("Cleaned up test data")
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Alerts test failed: {e}", success = False)
|
||||
self.log(f"Alerts test failed: {e}", success=False)
|
||||
|
||||
def test_capacity_planning(self) -> None:
|
||||
"""测试容量规划"""
|
||||
@@ -334,9 +334,9 @@ class TestOpsManager:
|
||||
|
||||
try:
|
||||
# 记录历史指标数据
|
||||
base_time = datetime.now() - timedelta(days = 30)
|
||||
base_time = datetime.now() - timedelta(days=30)
|
||||
for i in range(30):
|
||||
timestamp = (base_time + timedelta(days = i)).isoformat()
|
||||
timestamp = (base_time + timedelta(days=i)).isoformat()
|
||||
with self.manager._get_db() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
@@ -360,13 +360,13 @@ class TestOpsManager:
|
||||
self.log("Recorded 30 days of historical metrics")
|
||||
|
||||
# 创建容量规划
|
||||
prediction_date = (datetime.now() + timedelta(days = 30)).strftime("%Y-%m-%d")
|
||||
plan = self.manager.create_capacity_plan(
|
||||
tenant_id = self.tenant_id,
|
||||
resource_type = ResourceType.CPU,
|
||||
current_capacity = 100.0,
|
||||
prediction_date = prediction_date,
|
||||
confidence = 0.85,
|
||||
prediction_date = (datetime.now() + timedelta(days=30)).strftime("%Y-%m-%d")
|
||||
plan = self.manager.create_capacity_plan(
|
||||
tenant_id=self.tenant_id,
|
||||
resource_type=ResourceType.CPU,
|
||||
current_capacity=100.0,
|
||||
prediction_date=prediction_date,
|
||||
confidence=0.85,
|
||||
)
|
||||
|
||||
self.log(f"Created capacity plan: {plan.id}")
|
||||
@@ -375,7 +375,7 @@ class TestOpsManager:
|
||||
self.log(f" Recommended action: {plan.recommended_action}")
|
||||
|
||||
# 获取容量规划列表
|
||||
plans = self.manager.get_capacity_plans(self.tenant_id)
|
||||
plans = self.manager.get_capacity_plans(self.tenant_id)
|
||||
assert len(plans) >= 1
|
||||
self.log(f"Listed {len(plans)} capacity plans")
|
||||
|
||||
@@ -387,7 +387,7 @@ class TestOpsManager:
|
||||
self.log("Cleaned up capacity planning test data")
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Capacity planning test failed: {e}", success = False)
|
||||
self.log(f"Capacity planning test failed: {e}", success=False)
|
||||
|
||||
def test_auto_scaling(self) -> None:
|
||||
"""测试自动扩缩容"""
|
||||
@@ -395,18 +395,18 @@ class TestOpsManager:
|
||||
|
||||
try:
|
||||
# 创建自动扩缩容策略
|
||||
policy = self.manager.create_auto_scaling_policy(
|
||||
tenant_id = self.tenant_id,
|
||||
name = "API 服务自动扩缩容",
|
||||
resource_type = ResourceType.CPU,
|
||||
min_instances = 2,
|
||||
max_instances = 10,
|
||||
target_utilization = 0.7,
|
||||
scale_up_threshold = 0.8,
|
||||
scale_down_threshold = 0.3,
|
||||
scale_up_step = 2,
|
||||
scale_down_step = 1,
|
||||
cooldown_period = 300,
|
||||
policy = self.manager.create_auto_scaling_policy(
|
||||
tenant_id=self.tenant_id,
|
||||
name="API 服务自动扩缩容",
|
||||
resource_type=ResourceType.CPU,
|
||||
min_instances=2,
|
||||
max_instances=10,
|
||||
target_utilization=0.7,
|
||||
scale_up_threshold=0.8,
|
||||
scale_down_threshold=0.3,
|
||||
scale_up_step=2,
|
||||
scale_down_step=1,
|
||||
cooldown_period=300,
|
||||
)
|
||||
|
||||
self.log(f"Created auto scaling policy: {policy.name} (ID: {policy.id})")
|
||||
@@ -415,13 +415,13 @@ class TestOpsManager:
|
||||
self.log(f" Target utilization: {policy.target_utilization}")
|
||||
|
||||
# 获取策略列表
|
||||
policies = self.manager.list_auto_scaling_policies(self.tenant_id)
|
||||
policies = self.manager.list_auto_scaling_policies(self.tenant_id)
|
||||
assert len(policies) >= 1
|
||||
self.log(f"Listed {len(policies)} auto scaling policies")
|
||||
|
||||
# 模拟扩缩容评估
|
||||
event = self.manager.evaluate_scaling_policy(
|
||||
policy_id = policy.id, current_instances = 3, current_utilization = 0.85
|
||||
event = self.manager.evaluate_scaling_policy(
|
||||
policy_id=policy.id, current_instances=3, current_utilization=0.85
|
||||
)
|
||||
|
||||
if event:
|
||||
@@ -432,7 +432,7 @@ class TestOpsManager:
|
||||
self.log("No scaling action needed")
|
||||
|
||||
# 获取扩缩容事件列表
|
||||
events = self.manager.list_scaling_events(self.tenant_id)
|
||||
events = self.manager.list_scaling_events(self.tenant_id)
|
||||
self.log(f"Listed {len(events)} scaling events")
|
||||
|
||||
# 清理
|
||||
@@ -445,7 +445,7 @@ class TestOpsManager:
|
||||
self.log("Cleaned up auto scaling test data")
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Auto scaling test failed: {e}", success = False)
|
||||
self.log(f"Auto scaling test failed: {e}", success=False)
|
||||
|
||||
def test_health_checks(self) -> None:
|
||||
"""测试健康检查"""
|
||||
@@ -453,41 +453,41 @@ class TestOpsManager:
|
||||
|
||||
try:
|
||||
# 创建 HTTP 健康检查
|
||||
check1 = self.manager.create_health_check(
|
||||
tenant_id = self.tenant_id,
|
||||
name = "API 服务健康检查",
|
||||
target_type = "service",
|
||||
target_id = "api-service",
|
||||
check_type = "http",
|
||||
check_config = {"url": "https://api.insightflow.io/health", "expected_status": 200},
|
||||
interval = 60,
|
||||
timeout = 10,
|
||||
retry_count = 3,
|
||||
check1 = self.manager.create_health_check(
|
||||
tenant_id=self.tenant_id,
|
||||
name="API 服务健康检查",
|
||||
target_type="service",
|
||||
target_id="api-service",
|
||||
check_type="http",
|
||||
check_config={"url": "https://api.insightflow.io/health", "expected_status": 200},
|
||||
interval=60,
|
||||
timeout=10,
|
||||
retry_count=3,
|
||||
)
|
||||
self.log(f"Created HTTP health check: {check1.name} (ID: {check1.id})")
|
||||
|
||||
# 创建 TCP 健康检查
|
||||
check2 = self.manager.create_health_check(
|
||||
tenant_id = self.tenant_id,
|
||||
name = "数据库健康检查",
|
||||
target_type = "database",
|
||||
target_id = "postgres-001",
|
||||
check_type = "tcp",
|
||||
check_config = {"host": "db.insightflow.io", "port": 5432},
|
||||
interval = 30,
|
||||
timeout = 5,
|
||||
retry_count = 2,
|
||||
check2 = self.manager.create_health_check(
|
||||
tenant_id=self.tenant_id,
|
||||
name="数据库健康检查",
|
||||
target_type="database",
|
||||
target_id="postgres-001",
|
||||
check_type="tcp",
|
||||
check_config={"host": "db.insightflow.io", "port": 5432},
|
||||
interval=30,
|
||||
timeout=5,
|
||||
retry_count=2,
|
||||
)
|
||||
self.log(f"Created TCP health check: {check2.name} (ID: {check2.id})")
|
||||
|
||||
# 获取健康检查列表
|
||||
checks = self.manager.list_health_checks(self.tenant_id)
|
||||
checks = self.manager.list_health_checks(self.tenant_id)
|
||||
assert len(checks) >= 2
|
||||
self.log(f"Listed {len(checks)} health checks")
|
||||
|
||||
# 执行健康检查(异步)
|
||||
async def run_health_check() -> None:
|
||||
result = await self.manager.execute_health_check(check1.id)
|
||||
result = await self.manager.execute_health_check(check1.id)
|
||||
return result
|
||||
|
||||
# 由于健康检查需要网络,这里只验证方法存在
|
||||
@@ -500,7 +500,7 @@ class TestOpsManager:
|
||||
self.log("Cleaned up health check test data")
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Health checks test failed: {e}", success = False)
|
||||
self.log(f"Health checks test failed: {e}", success=False)
|
||||
|
||||
def test_failover(self) -> None:
|
||||
"""测试故障转移"""
|
||||
@@ -508,15 +508,15 @@ class TestOpsManager:
|
||||
|
||||
try:
|
||||
# 创建故障转移配置
|
||||
config = self.manager.create_failover_config(
|
||||
tenant_id = self.tenant_id,
|
||||
name = "主备数据中心故障转移",
|
||||
primary_region = "cn-north-1",
|
||||
secondary_regions = ["cn-south-1", "cn-east-1"],
|
||||
failover_trigger = "health_check_failed",
|
||||
auto_failover = False,
|
||||
failover_timeout = 300,
|
||||
health_check_id = None,
|
||||
config = self.manager.create_failover_config(
|
||||
tenant_id=self.tenant_id,
|
||||
name="主备数据中心故障转移",
|
||||
primary_region="cn-north-1",
|
||||
secondary_regions=["cn-south-1", "cn-east-1"],
|
||||
failover_trigger="health_check_failed",
|
||||
auto_failover=False,
|
||||
failover_timeout=300,
|
||||
health_check_id=None,
|
||||
)
|
||||
|
||||
self.log(f"Created failover config: {config.name} (ID: {config.id})")
|
||||
@@ -524,13 +524,13 @@ class TestOpsManager:
|
||||
self.log(f" Secondary regions: {config.secondary_regions}")
|
||||
|
||||
# 获取故障转移配置列表
|
||||
configs = self.manager.list_failover_configs(self.tenant_id)
|
||||
configs = self.manager.list_failover_configs(self.tenant_id)
|
||||
assert len(configs) >= 1
|
||||
self.log(f"Listed {len(configs)} failover configs")
|
||||
|
||||
# 发起故障转移
|
||||
event = self.manager.initiate_failover(
|
||||
config_id = config.id, reason = "Primary region health check failed"
|
||||
event = self.manager.initiate_failover(
|
||||
config_id=config.id, reason="Primary region health check failed"
|
||||
)
|
||||
|
||||
if event:
|
||||
@@ -540,12 +540,12 @@ class TestOpsManager:
|
||||
|
||||
# 更新故障转移状态
|
||||
self.manager.update_failover_status(event.id, "completed")
|
||||
updated_event = self.manager.get_failover_event(event.id)
|
||||
updated_event = self.manager.get_failover_event(event.id)
|
||||
assert updated_event.status == "completed"
|
||||
self.log("Failover completed")
|
||||
|
||||
# 获取故障转移事件列表
|
||||
events = self.manager.list_failover_events(self.tenant_id)
|
||||
events = self.manager.list_failover_events(self.tenant_id)
|
||||
self.log(f"Listed {len(events)} failover events")
|
||||
|
||||
# 清理
|
||||
@@ -556,7 +556,7 @@ class TestOpsManager:
|
||||
self.log("Cleaned up failover test data")
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Failover test failed: {e}", success = False)
|
||||
self.log(f"Failover test failed: {e}", success=False)
|
||||
|
||||
def test_backup(self) -> None:
|
||||
"""测试备份与恢复"""
|
||||
@@ -564,17 +564,17 @@ class TestOpsManager:
|
||||
|
||||
try:
|
||||
# 创建备份任务
|
||||
job = self.manager.create_backup_job(
|
||||
tenant_id = self.tenant_id,
|
||||
name = "每日数据库备份",
|
||||
backup_type = "full",
|
||||
target_type = "database",
|
||||
target_id = "postgres-main",
|
||||
schedule = "0 2 * * *", # 每天凌晨2点
|
||||
retention_days = 30,
|
||||
encryption_enabled = True,
|
||||
compression_enabled = True,
|
||||
storage_location = "s3://insightflow-backups/",
|
||||
job = self.manager.create_backup_job(
|
||||
tenant_id=self.tenant_id,
|
||||
name="每日数据库备份",
|
||||
backup_type="full",
|
||||
target_type="database",
|
||||
target_id="postgres-main",
|
||||
schedule="0 2 * * *", # 每天凌晨2点
|
||||
retention_days=30,
|
||||
encryption_enabled=True,
|
||||
compression_enabled=True,
|
||||
storage_location="s3://insightflow-backups/",
|
||||
)
|
||||
|
||||
self.log(f"Created backup job: {job.name} (ID: {job.id})")
|
||||
@@ -582,12 +582,12 @@ class TestOpsManager:
|
||||
self.log(f" Retention: {job.retention_days} days")
|
||||
|
||||
# 获取备份任务列表
|
||||
jobs = self.manager.list_backup_jobs(self.tenant_id)
|
||||
jobs = self.manager.list_backup_jobs(self.tenant_id)
|
||||
assert len(jobs) >= 1
|
||||
self.log(f"Listed {len(jobs)} backup jobs")
|
||||
|
||||
# 执行备份
|
||||
record = self.manager.execute_backup(job.id)
|
||||
record = self.manager.execute_backup(job.id)
|
||||
|
||||
if record:
|
||||
self.log(f"Executed backup: {record.id}")
|
||||
@@ -595,11 +595,11 @@ class TestOpsManager:
|
||||
self.log(f" Storage: {record.storage_path}")
|
||||
|
||||
# 获取备份记录列表
|
||||
records = self.manager.list_backup_records(self.tenant_id)
|
||||
records = self.manager.list_backup_records(self.tenant_id)
|
||||
self.log(f"Listed {len(records)} backup records")
|
||||
|
||||
# 测试恢复(模拟)
|
||||
restore_result = self.manager.restore_from_backup(record.id)
|
||||
restore_result = self.manager.restore_from_backup(record.id)
|
||||
self.log(f"Restore test result: {restore_result}")
|
||||
|
||||
# 清理
|
||||
@@ -610,7 +610,7 @@ class TestOpsManager:
|
||||
self.log("Cleaned up backup test data")
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Backup test failed: {e}", success = False)
|
||||
self.log(f"Backup test failed: {e}", success=False)
|
||||
|
||||
def test_cost_optimization(self) -> None:
|
||||
"""测试成本优化"""
|
||||
@@ -618,27 +618,27 @@ class TestOpsManager:
|
||||
|
||||
try:
|
||||
# 记录资源利用率数据
|
||||
report_date = datetime.now().strftime("%Y-%m-%d")
|
||||
report_date = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
for i in range(5):
|
||||
self.manager.record_resource_utilization(
|
||||
tenant_id = self.tenant_id,
|
||||
resource_type = ResourceType.CPU,
|
||||
resource_id = f"server-{i:03d}",
|
||||
utilization_rate = 0.05 + random.random() * 0.1, # 低利用率
|
||||
peak_utilization = 0.15,
|
||||
avg_utilization = 0.08,
|
||||
idle_time_percent = 0.85,
|
||||
report_date = report_date,
|
||||
recommendations = ["Consider downsizing this resource"],
|
||||
tenant_id=self.tenant_id,
|
||||
resource_type=ResourceType.CPU,
|
||||
resource_id=f"server-{i:03d}",
|
||||
utilization_rate=0.05 + random.random() * 0.1, # 低利用率
|
||||
peak_utilization=0.15,
|
||||
avg_utilization=0.08,
|
||||
idle_time_percent=0.85,
|
||||
report_date=report_date,
|
||||
recommendations=["Consider downsizing this resource"],
|
||||
)
|
||||
|
||||
self.log("Recorded 5 resource utilization records")
|
||||
|
||||
# 生成成本报告
|
||||
now = datetime.now()
|
||||
report = self.manager.generate_cost_report(
|
||||
tenant_id = self.tenant_id, year = now.year, month = now.month
|
||||
now = datetime.now()
|
||||
report = self.manager.generate_cost_report(
|
||||
tenant_id=self.tenant_id, year=now.year, month=now.month
|
||||
)
|
||||
|
||||
self.log(f"Generated cost report: {report.id}")
|
||||
@@ -647,11 +647,11 @@ class TestOpsManager:
|
||||
self.log(f" Anomalies detected: {len(report.anomalies)}")
|
||||
|
||||
# 检测闲置资源
|
||||
idle_resources = self.manager.detect_idle_resources(self.tenant_id)
|
||||
idle_resources = self.manager.detect_idle_resources(self.tenant_id)
|
||||
self.log(f"Detected {len(idle_resources)} idle resources")
|
||||
|
||||
# 获取闲置资源列表
|
||||
idle_list = self.manager.get_idle_resources(self.tenant_id)
|
||||
idle_list = self.manager.get_idle_resources(self.tenant_id)
|
||||
for resource in idle_list:
|
||||
self.log(
|
||||
f" Idle resource: {resource.resource_name} (est. cost: {
|
||||
@@ -660,7 +660,7 @@ class TestOpsManager:
|
||||
)
|
||||
|
||||
# 生成成本优化建议
|
||||
suggestions = self.manager.generate_cost_optimization_suggestions(self.tenant_id)
|
||||
suggestions = self.manager.generate_cost_optimization_suggestions(self.tenant_id)
|
||||
self.log(f"Generated {len(suggestions)} cost optimization suggestions")
|
||||
|
||||
for suggestion in suggestions:
|
||||
@@ -672,12 +672,12 @@ class TestOpsManager:
|
||||
self.log(f" Difficulty: {suggestion.difficulty}")
|
||||
|
||||
# 获取优化建议列表
|
||||
all_suggestions = self.manager.get_cost_optimization_suggestions(self.tenant_id)
|
||||
all_suggestions = self.manager.get_cost_optimization_suggestions(self.tenant_id)
|
||||
self.log(f"Listed {len(all_suggestions)} optimization suggestions")
|
||||
|
||||
# 应用优化建议
|
||||
if all_suggestions:
|
||||
applied = self.manager.apply_cost_optimization_suggestion(all_suggestions[0].id)
|
||||
applied = self.manager.apply_cost_optimization_suggestion(all_suggestions[0].id)
|
||||
if applied:
|
||||
self.log(f"Applied optimization suggestion: {applied.title}")
|
||||
assert applied.is_applied
|
||||
@@ -698,7 +698,7 @@ class TestOpsManager:
|
||||
self.log("Cleaned up cost optimization test data")
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Cost optimization test failed: {e}", success = False)
|
||||
self.log(f"Cost optimization test failed: {e}", success=False)
|
||||
|
||||
def print_summary(self) -> None:
|
||||
"""打印测试总结"""
|
||||
@@ -706,9 +706,9 @@ class TestOpsManager:
|
||||
print("Test Summary")
|
||||
print(" = " * 60)
|
||||
|
||||
total = len(self.test_results)
|
||||
passed = sum(1 for _, success in self.test_results if success)
|
||||
failed = total - passed
|
||||
total = len(self.test_results)
|
||||
passed = sum(1 for _, success in self.test_results if success)
|
||||
failed = total - passed
|
||||
|
||||
print(f"Total tests: {total}")
|
||||
print(f"Passed: {passed} ✅")
|
||||
@@ -725,7 +725,7 @@ class TestOpsManager:
|
||||
|
||||
def main() -> None:
|
||||
"""主函数"""
|
||||
test = TestOpsManager()
|
||||
test = TestOpsManager()
|
||||
test.run_all_tests()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user