#!/usr/bin/env python3 """ InsightFlow LLM Client - Phase 4 用于与 Kimi API 交互,支持 RAG 问答和 Agent 功能 """ import json import os import re from collections.abc import AsyncGenerator from dataclasses import dataclass import httpx KIMI_API_KEY = os.getenv("KIMI_API_KEY", "") KIMI_BASE_URL = os.getenv("KIMI_BASE_URL", "https://api.kimi.com/coding") @dataclass class ChatMessage: role: str content: str @dataclass class EntityExtractionResult: name: str type: str definition: str confidence: float @dataclass class RelationExtractionResult: source: str target: str type: str confidence: float class LLMClient: """Kimi API 客户端""" def __init__(self, api_key: str = None, base_url: str = None): self.api_key = api_key or KIMI_API_KEY self.base_url = base_url or KIMI_BASE_URL self.headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", } async def chat( self, messages: list[ChatMessage], temperature: float = 0.3, stream: bool = False ) -> str: """发送聊天请求""" if not self.api_key: raise ValueError("KIMI_API_KEY not set") payload = { "model": "k2p5", "messages": [{"role": m.role, "content": m.content} for m in messages], "temperature": temperature, "stream": stream, } async with httpx.AsyncClient() as client: response = await client.post( f"{self.base_url}/v1/chat/completions", headers=self.headers, json=payload, timeout=120.0, ) response.raise_for_status() result = response.json() return result["choices"][0]["message"]["content"] async def chat_stream( self, messages: list[ChatMessage], temperature: float = 0.3 ) -> AsyncGenerator[str, None]: """流式聊天请求""" if not self.api_key: raise ValueError("KIMI_API_KEY not set") payload = { "model": "k2p5", "messages": [{"role": m.role, "content": m.content} for m in messages], "temperature": temperature, "stream": True, } async with httpx.AsyncClient() as client: async with client.stream( "POST", f"{self.base_url}/v1/chat/completions", headers=self.headers, json=payload, timeout=120.0, ) as response: response.raise_for_status() async for line in response.aiter_lines(): if line.startswith("data: "): data = line[6:] if data == "[DONE]": break try: chunk = json.loads(data) delta = chunk["choices"][0]["delta"] if "content" in delta: yield delta["content"] except (json.JSONDecodeError, KeyError, IndexError): pass async def extract_entities_with_confidence( self, text: str ) -> tuple[list[EntityExtractionResult], list[RelationExtractionResult]]: """提取实体和关系,带置信度分数""" prompt = f"""从以下会议文本中提取关键实体和它们之间的关系,以 JSON 格式返回: 文本:{text[:3000]} 要求: 1. entities: 每个实体包含 name(名称), type(类型: PROJECT/TECH/PERSON/ORG/OTHER), definition(一句话定义), confidence(置信度0-1) 2. relations: 每个关系包含 source(源实体名), target(目标实体名), type(关系类型: belongs_to/works_with/depends_on/mentions/related), confidence(置信度0-1) 3. 只返回 JSON 对象,格式: {{"entities": [...], "relations": [...]}} 示例: {{ "entities": [ {{"name": "Project Alpha", "type": "PROJECT", "definition": "核心项目", "confidence": 0.95}}, {{"name": "K8s", "type": "TECH", "definition": "Kubernetes容器编排平台", "confidence": 0.88}} ], "relations": [ {{"source": "Project Alpha", "target": "K8s", "type": "depends_on", "confidence": 0.82}} ] }}""" }}""" messages = [ChatMessage(role="user", content=prompt)] content = await self.chat(messages, temperature=0.1) json_match = re.search(r"\{{.*?\}}", content, re.DOTALL) if not json_match: return [], [] try: data = json.loads(json_match.group()) entities = [ EntityExtractionResult( name=e["name"], type=e.get("type", "OTHER"), definition=e.get("definition", ""), confidence=e.get("confidence", 0.8), ) for e in data.get("entities", []) ] relations = [ RelationExtractionResult( source=r["source"], target=r["target"], type=r.get("type", "related"), confidence=r.get("confidence", 0.8), ) for r in data.get("relations", []) ] return entities, relations except Exception as e: print(f"Parse extraction result failed: {e}") return [], [] async def rag_query(self, query: str, context: str, project_context: dict) -> str: """RAG 问答 - 基于项目上下文回答问题""" prompt = f"""你是一个专业的项目分析助手。基于以下项目信息回答问题: ## 项目信息 {json.dumps(project_context, ensure_ascii=False, indent=2)} ## 相关上下文 {context[:4000]} ## 用户问题 {query} 请用中文回答,保持简洁专业。如果信息不足,请明确说明。""" messages = [ ChatMessage( role="system", content="你是一个专业的项目分析助手,擅长从会议记录中提取洞察。" ), ChatMessage(role="user", content=prompt), ] return await self.chat(messages, temperature=0.3) async def agent_command(self, command: str, project_context: dict) -> dict: """Agent 指令解析 - 将自然语言指令转换为结构化操作""" prompt = f"""解析以下用户指令,转换为结构化操作: ## 项目信息 {json.dumps(project_context, ensure_ascii=False, indent=2)} ## 用户指令 {command} 请分析指令意图,返回 JSON 格式: {{ "intent": "merge_entities|answer_question|edit_entity|create_relation|unknown", "params": {{ // 根据 intent 不同,参数不同 }}, "explanation": "对用户指令的解释" }} 意图说明: - merge_entities: 合并实体,params 包含 source_names(源实体名列表), target_name(目标实体名) - answer_question: 回答问题,params 包含 question(问题内容) - edit_entity: 编辑实体,params 包含 entity_name(实体名), field(字段), value(新值) - create_relation: 创建关系,params 包含 source(源实体), target(目标实体), relation_type(关系类型) """ messages = [ChatMessage(role="user", content=prompt)] content = await self.chat(messages, temperature=0.1) json_match = re.search(r"\{{.*?\}}", content, re.DOTALL) if not json_match: return {"intent": "unknown", "explanation": "无法解析指令"} try: return json.loads(json_match.group()) except (json.JSONDecodeError, KeyError, TypeError): return {"intent": "unknown", "explanation": "解析失败"} async def analyze_entity_evolution(self, entity_name: str, mentions: list[dict]) -> str: """分析实体在项目中的演变/态度变化""" mentions_text = "\n".join( [ f"[{m.get('created_at', '未知时间')}] {m.get('text_snippet', '')}" for m in mentions[:20] ] # 限制数量 ) prompt = f"""分析实体 "{entity_name}" 在项目中的演变和态度变化: ## 提及记录 {mentions_text} 请分析: 1. 该实体的角色/重要性变化 2. 相关方对它的态度变化 3. 关键时间节点 4. 总结性洞察 用中文回答,结构清晰。""" messages = [ChatMessage(role="user", content=prompt)] return await self.chat(messages, temperature=0.3) # Singleton instance _llm_client = None def get_llm_client() -> LLMClient: global _llm_client if _llm_client is None: _llm_client = LLMClient() return _llm_client