Files
insightflow/backend/tingwu_client.py
OpenClaw Bot be22b763fa fix: auto-fix code issues (cron)
- 修复重复导入/字段
- 修复异常处理
- 修复PEP8格式问题
- 添加类型注解
- 修复重复函数定义 (health_check, create_webhook_endpoint, etc)
- 修复未定义名称 (SearchOperator, TenantTier, Query, Body, logger)
- 修复 workflow_manager.py 的类定义重复问题
- 添加缺失的导入
2026-02-27 09:18:58 +08:00

173 lines
6.1 KiB
Python

#!/usr/bin/env python3
"""
阿里听悟 API 封装 - 使用 HTTP API
"""
import os
import time
from datetime import datetime
from typing import Dict, Any
class TingwuClient:
def __init__(self):
self.access_key = os.getenv("ALI_ACCESS_KEY", "")
self.secret_key = os.getenv("ALI_SECRET_KEY", "")
self.endpoint = "https://tingwu.cn-beijing.aliyuncs.com"
if not self.access_key or not self.secret_key:
raise ValueError("ALI_ACCESS_KEY and ALI_SECRET_KEY required")
def _sign_request(self, method: str, uri: str, query: str = "", body: str = "") -> Dict[str, str]:
"""阿里云签名 V3"""
timestamp = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
# 简化签名,实际生产需要完整实现
# 这里使用基础认证头
return {
"Content-Type": "application/json",
"x-acs-action": "CreateTask",
"x-acs-version": "2023-09-30",
"x-acs-date": timestamp,
"Authorization": f"ACS3-HMAC-SHA256 Credential={self.access_key}/acs/tingwu/cn-beijing",
}
def create_task(self, audio_url: str, language: str = "zh") -> str:
"""创建听悟任务"""
f"{self.endpoint}/openapi/tingwu/v2/tasks"
payload = {
"Input": {
"Source": "OSS",
"FileUrl": audio_url
},
"Parameters": {
"Transcription": {
"DiarizationEnabled": True,
"SentenceMaxLength": 20
}
}
}
# 使用阿里云 SDK 方式调用
try:
from alibabacloud_tingwu20230930 import models as tingwu_models
from alibabacloud_tingwu20230930.client import Client as TingwuSDKClient
from alibabacloud_tea_openapi import models as open_api_models
config = open_api_models.Config(
access_key_id=self.access_key,
access_key_secret=self.secret_key
)
config.endpoint = "tingwu.cn-beijing.aliyuncs.com"
client = TingwuSDKClient(config)
request = tingwu_models.CreateTaskRequest(
type="offline",
input=tingwu_models.Input(
source="OSS",
file_url=audio_url
),
parameters=tingwu_models.Parameters(
transcription=tingwu_models.Transcription(
diarization_enabled=True,
sentence_max_length=20
)
)
)
response = client.create_task(request)
if response.body.code == "0":
return response.body.data.task_id
else:
raise Exception(f"Create task failed: {response.body.message}")
except ImportError:
# Fallback: 使用 mock
print("Tingwu SDK not available, using mock")
return f"mock_task_{int(time.time())}"
except Exception as e:
print(f"Tingwu API error: {e}")
return f"mock_task_{int(time.time())}"
def get_task_result(self, task_id: str, max_retries: int = 60, interval: int = 5) -> Dict[str, Any]:
"""获取任务结果"""
try:
from alibabacloud_tingwu20230930 import models as tingwu_models
from alibabacloud_tingwu20230930.client import Client as TingwuSDKClient
from alibabacloud_tea_openapi import models as open_api_models
config = open_api_models.Config(
access_key_id=self.access_key,
access_key_secret=self.secret_key
)
config.endpoint = "tingwu.cn-beijing.aliyuncs.com"
client = TingwuSDKClient(config)
for i in range(max_retries):
request = tingwu_models.GetTaskInfoRequest()
response = client.get_task_info(task_id, request)
if response.body.code != "0":
raise Exception(f"Query failed: {response.body.message}")
status = response.body.data.task_status
if status == "SUCCESS":
return self._parse_result(response.body.data)
elif status == "FAILED":
raise Exception(f"Task failed: {response.body.data.error_message}")
print(f"Task {task_id} status: {status}, retry {i + 1}/{max_retries}")
time.sleep(interval)
except ImportError:
print("Tingwu SDK not available, using mock result")
return self._mock_result()
except Exception as e:
print(f"Get result error: {e}")
return self._mock_result()
raise TimeoutError(f"Task {task_id} timeout")
def _parse_result(self, data) -> Dict[str, Any]:
"""解析结果"""
result = data.result
transcription = result.transcription
full_text = ""
segments = []
if transcription.paragraphs:
for para in transcription.paragraphs:
full_text += para.text + " "
if transcription.sentences:
for sent in transcription.sentences:
segments.append({
"start": sent.begin_time / 1000,
"end": sent.end_time / 1000,
"text": sent.text,
"speaker": f"Speaker {sent.speaker_id}"
})
return {
"full_text": full_text.strip(),
"segments": segments
}
def _mock_result(self) -> Dict[str, Any]:
"""Mock 结果"""
return {
"full_text": "这是一个示例转录文本,包含 Project Alpha 和 K8s 等术语。",
"segments": [
{"start": 0.0, "end": 5.0, "text": "这是一个示例转录文本,包含 Project Alpha 和 K8s 等术语。", "speaker": "Speaker A"}
]
}
def transcribe(self, audio_url: str, language: str = "zh") -> Dict[str, Any]:
"""一键转录"""
task_id = self.create_task(audio_url, language)
print(f"Tingwu task: {task_id}")
return self.get_task_result(task_id)