Files
wechat-agent/src/wechat/controller.py
jesxion eb19d8d05f 初始化 WeChat Agent 项目
MVP Phase 1 核心模块:
- src/config/settings.py     - 配置管理
- src/vlm/qwen_vl.py        - Qwen-VL2 视觉模型接口
- src/wechat/controller.py  - 微信客户端控制器(UIAutomation)
- src/core/engine.py        - 核心引擎(轮询、消息处理、回复)
- src/main.py               - 主入口

文档:
- README.md
- config.example.yaml
- requirements.txt

技术方案:纯视觉AI + UIAutomation
- 截图 → Qwen-VL2 识别 → AI 判断 → UIAutomation 操作
2026-04-13 11:12:49 +08:00

288 lines
8.8 KiB
Python

"""
微信客户端控制器
WeChat Windows Client Controller using UIAutomation
"""
import time
import logging
from dataclasses import dataclass
from typing import List, Optional, Tuple
from pathlib import Path
logger = logging.getLogger(__name__)
# 尝试导入 pywinauto
try:
from pywinauto import Application, WindowSpecification
from pywinauto.win32structures import RECT
from pywinauto.controls.hwndwrapper import HwndWrapper
HAS_PYWINAUTO = True
except ImportError:
HAS_PYWINAUTO = False
logger.warning("pywinauto 未安装,请运行: pip install pywinauto")
@dataclass
class ChatMessage:
"""聊天消息"""
sender: str
content: str
time: str
is_self: bool
@dataclass
class WeChatWindow:
"""微信窗口信息"""
hwnd: int
title: str
rect: RECT
isMinimized: bool
class WeChatController:
"""微信客户端控制器"""
# 微信窗口类名
WEIXIN_WINDOW_CLASS = "WeChatLoginWnd" # 登录窗口
WEIXIN_MAIN_WINDOW_CLASS = "WeChatMainWnd" # 主窗口
def __init__(self, window_title: str = "微信"):
if not HAS_PYWINAUTO:
raise RuntimeError("pywinauto 未安装,无法控制微信客户端")
self.window_title = window_title
self.app: Optional[Application] = None
self.main_window: Optional[WindowSpecification] = None
self._connected = False
def connect(self, timeout: float = 10) -> bool:
"""连接到微信窗口
Args:
timeout: 超时时间(秒)
Returns:
是否连接成功
"""
try:
# 尝试连接已运行的微信
self.app = Application(backend="win32").connect(
title=self.window_title,
timeout=timeout
)
self.main_window = self.app.window(title=self.window_title)
self._connected = True
logger.info("成功连接到微信窗口")
return True
except Exception as e:
logger.error(f"连接微信窗口失败: {e}")
return False
def find_wechat_window(self) -> Optional[WeChatWindow]:
"""查找微信窗口"""
import ctypes
from ctypes import wintypes
user32 = ctypes.windll.user32
@ctypes.WINFUNCTYPE(wintypes.BOOL, wintypes.HWND, wintypes.LPARAM)
def enum_callback(hwnd, lparam):
length = user32.GetWindowTextLengthW(hwnd)
if length > 0:
buff = ctypes.create_unicode_buffer(length + 1)
user32.GetWindowTextW(hwnd, buff, length + 1)
title = buff.value
if self.window_title in title:
rect = RECT()
user32.GetWindowRect(hwnd, ctypes.byref(rect))
is_min = user32.IsIconic(hwnd)
wechat_win = WeChatWindow(
hwnd=hwnd,
title=title,
rect=rect,
isMinimized=bool(is_min)
)
# 存储到列表
windows.append(wechat_win)
return True
windows = []
user32.EnumWindows(enum_callback, 0)
if windows:
logger.info(f"找到 {len(windows)} 个微信窗口")
return windows[0]
return None
def screenshot(self, output_path: str = None) -> str:
"""截图
Args:
output_path: 保存路径,为 None 则保存到临时文件
Returns:
截图路径
"""
if not self.main_window:
raise RuntimeError("未连接微信窗口")
if output_path is None:
import tempfile
output_path = Path(tempfile.gettempdir()) / f"wechat_screenshot_{int(time.time())}.png"
else:
output_path = Path(output_path)
try:
# 激活窗口(如果最小化)
if self.main_window.is_minimized():
self.main_window.restore()
# 截图
self.main_window.capture_as_image().save(str(output_path))
logger.debug(f"截图已保存: {output_path}")
return str(output_path)
except Exception as e:
logger.error(f"截图失败: {e}")
raise
def get_chat_list(self) -> List[str]:
"""获取聊天列表"""
# 聊天列表在左侧,需要根据具体窗口结构调整
# 这里需要根据实际 UI 结构调整
try:
# 查找子窗口
chat_list = self.main_window.window(
class_name="ChatListBox" # 假设的类名
)
items = chat_list.items()
return [item.text() for item in items]
except Exception as e:
logger.warning(f"获取聊天列表失败: {e}")
return []
def click_on_chat(self, chat_name: str) -> bool:
"""点击聊天"""
try:
# 查找聊天项
chat_list = self.main_window.window(class_name="ChatListBox")
chat_item = chat_list.window(title=chat_name)
chat_item.click()
logger.info(f"点击聊天: {chat_name}")
return True
except Exception as e:
logger.error(f"点击聊天失败: {e}")
return False
def get_message_list(self, count: int = 10) -> List[ChatMessage]:
"""获取消息列表
Args:
count: 最多获取的消息数
Returns:
消息列表
"""
messages = []
try:
# 查找消息列表窗口
msg_list = self.main_window.window(class_name="ChatMessageList")
# 获取消息项
items = msg_list.items()[-count:] # 取最后 N 条
for item in items:
text = item.text()
# 解析消息格式
# 格式: [发送者] 时间\n内容
msg = self._parse_message(text)
if msg:
messages.append(msg)
except Exception as e:
logger.warning(f"获取消息列表失败: {e}")
return messages
def _parse_message(self, text: str) -> Optional[ChatMessage]:
"""解析消息文本"""
import re
# 简单解析
# 格式: 发送者 时间\n内容
pattern = r"(.+?)\s+(\d{2}:\d{2})\n([\s\S]+)"
match = re.match(pattern, text)
if match:
sender = match.group(1).strip()
time_str = match.group(2).strip()
content = match.group(3).strip()
is_self = sender == ""
return ChatMessage(sender=sender, content=content, time=time_str, is_self=is_self)
return None
def send_text(self, text: str) -> bool:
"""发送文本消息
Args:
text: 要发送的文本
Returns:
是否发送成功
"""
try:
# 查找输入框
input_box = self.main_window.window(class_name="Edit")
# 清空并输入
input_box.set_edit_text("")
input_box.type_keys("^a") # 全选
input_box.type_keys("{DELETE}")
input_box.set_edit_text(text)
# 按回车发送
input_box.type_keys("{ENTER}")
logger.info(f"发送消息: {text[:20]}...")
return True
except Exception as e:
logger.error(f"发送消息失败: {e}")
return False
def find_button(self, name: str) -> Optional[HwndWrapper]:
"""查找按钮"""
try:
btn = self.main_window.window(title=name, class_name="Button")
return btn
except Exception:
return None
def click_button(self, name: str) -> bool:
"""点击按钮"""
try:
btn = self.find_button(name)
if btn:
btn.click()
return True
return False
except Exception as e:
logger.error(f"点击按钮失败: {e}")
return False
def is_connected(self) -> bool:
"""检查是否已连接"""
return self._connected and self.app is not None
def disconnect(self):
"""断开连接"""
if self.app:
self.app = None
self.main_window = None
self._connected = False
logger.info("已断开微信连接")
def __enter__(self):
self.connect()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.disconnect()