初始化 WeChat Agent 项目

MVP Phase 1 核心模块:
- src/config/settings.py     - 配置管理
- src/vlm/qwen_vl.py        - Qwen-VL2 视觉模型接口
- src/wechat/controller.py  - 微信客户端控制器(UIAutomation)
- src/core/engine.py        - 核心引擎(轮询、消息处理、回复)
- src/main.py               - 主入口

文档:
- README.md
- config.example.yaml
- requirements.txt

技术方案:纯视觉AI + UIAutomation
- 截图 → Qwen-VL2 识别 → AI 判断 → UIAutomation 操作
This commit is contained in:
2026-04-13 11:12:49 +08:00
commit eb19d8d05f
8 changed files with 1408 additions and 0 deletions

287
src/wechat/controller.py Normal file
View File

@@ -0,0 +1,287 @@
"""
微信客户端控制器
WeChat Windows Client Controller using UIAutomation
"""
import time
import logging
from dataclasses import dataclass
from typing import List, Optional, Tuple
from pathlib import Path
logger = logging.getLogger(__name__)
# 尝试导入 pywinauto
try:
from pywinauto import Application, WindowSpecification
from pywinauto.win32structures import RECT
from pywinauto.controls.hwndwrapper import HwndWrapper
HAS_PYWINAUTO = True
except ImportError:
HAS_PYWINAUTO = False
logger.warning("pywinauto 未安装,请运行: pip install pywinauto")
@dataclass
class ChatMessage:
"""聊天消息"""
sender: str
content: str
time: str
is_self: bool
@dataclass
class WeChatWindow:
"""微信窗口信息"""
hwnd: int
title: str
rect: RECT
isMinimized: bool
class WeChatController:
"""微信客户端控制器"""
# 微信窗口类名
WEIXIN_WINDOW_CLASS = "WeChatLoginWnd" # 登录窗口
WEIXIN_MAIN_WINDOW_CLASS = "WeChatMainWnd" # 主窗口
def __init__(self, window_title: str = "微信"):
if not HAS_PYWINAUTO:
raise RuntimeError("pywinauto 未安装,无法控制微信客户端")
self.window_title = window_title
self.app: Optional[Application] = None
self.main_window: Optional[WindowSpecification] = None
self._connected = False
def connect(self, timeout: float = 10) -> bool:
"""连接到微信窗口
Args:
timeout: 超时时间(秒)
Returns:
是否连接成功
"""
try:
# 尝试连接已运行的微信
self.app = Application(backend="win32").connect(
title=self.window_title,
timeout=timeout
)
self.main_window = self.app.window(title=self.window_title)
self._connected = True
logger.info("成功连接到微信窗口")
return True
except Exception as e:
logger.error(f"连接微信窗口失败: {e}")
return False
def find_wechat_window(self) -> Optional[WeChatWindow]:
"""查找微信窗口"""
import ctypes
from ctypes import wintypes
user32 = ctypes.windll.user32
@ctypes.WINFUNCTYPE(wintypes.BOOL, wintypes.HWND, wintypes.LPARAM)
def enum_callback(hwnd, lparam):
length = user32.GetWindowTextLengthW(hwnd)
if length > 0:
buff = ctypes.create_unicode_buffer(length + 1)
user32.GetWindowTextW(hwnd, buff, length + 1)
title = buff.value
if self.window_title in title:
rect = RECT()
user32.GetWindowRect(hwnd, ctypes.byref(rect))
is_min = user32.IsIconic(hwnd)
wechat_win = WeChatWindow(
hwnd=hwnd,
title=title,
rect=rect,
isMinimized=bool(is_min)
)
# 存储到列表
windows.append(wechat_win)
return True
windows = []
user32.EnumWindows(enum_callback, 0)
if windows:
logger.info(f"找到 {len(windows)} 个微信窗口")
return windows[0]
return None
def screenshot(self, output_path: str = None) -> str:
"""截图
Args:
output_path: 保存路径,为 None 则保存到临时文件
Returns:
截图路径
"""
if not self.main_window:
raise RuntimeError("未连接微信窗口")
if output_path is None:
import tempfile
output_path = Path(tempfile.gettempdir()) / f"wechat_screenshot_{int(time.time())}.png"
else:
output_path = Path(output_path)
try:
# 激活窗口(如果最小化)
if self.main_window.is_minimized():
self.main_window.restore()
# 截图
self.main_window.capture_as_image().save(str(output_path))
logger.debug(f"截图已保存: {output_path}")
return str(output_path)
except Exception as e:
logger.error(f"截图失败: {e}")
raise
def get_chat_list(self) -> List[str]:
"""获取聊天列表"""
# 聊天列表在左侧,需要根据具体窗口结构调整
# 这里需要根据实际 UI 结构调整
try:
# 查找子窗口
chat_list = self.main_window.window(
class_name="ChatListBox" # 假设的类名
)
items = chat_list.items()
return [item.text() for item in items]
except Exception as e:
logger.warning(f"获取聊天列表失败: {e}")
return []
def click_on_chat(self, chat_name: str) -> bool:
"""点击聊天"""
try:
# 查找聊天项
chat_list = self.main_window.window(class_name="ChatListBox")
chat_item = chat_list.window(title=chat_name)
chat_item.click()
logger.info(f"点击聊天: {chat_name}")
return True
except Exception as e:
logger.error(f"点击聊天失败: {e}")
return False
def get_message_list(self, count: int = 10) -> List[ChatMessage]:
"""获取消息列表
Args:
count: 最多获取的消息数
Returns:
消息列表
"""
messages = []
try:
# 查找消息列表窗口
msg_list = self.main_window.window(class_name="ChatMessageList")
# 获取消息项
items = msg_list.items()[-count:] # 取最后 N 条
for item in items:
text = item.text()
# 解析消息格式
# 格式: [发送者] 时间\n内容
msg = self._parse_message(text)
if msg:
messages.append(msg)
except Exception as e:
logger.warning(f"获取消息列表失败: {e}")
return messages
def _parse_message(self, text: str) -> Optional[ChatMessage]:
"""解析消息文本"""
import re
# 简单解析
# 格式: 发送者 时间\n内容
pattern = r"(.+?)\s+(\d{2}:\d{2})\n([\s\S]+)"
match = re.match(pattern, text)
if match:
sender = match.group(1).strip()
time_str = match.group(2).strip()
content = match.group(3).strip()
is_self = sender == ""
return ChatMessage(sender=sender, content=content, time=time_str, is_self=is_self)
return None
def send_text(self, text: str) -> bool:
"""发送文本消息
Args:
text: 要发送的文本
Returns:
是否发送成功
"""
try:
# 查找输入框
input_box = self.main_window.window(class_name="Edit")
# 清空并输入
input_box.set_edit_text("")
input_box.type_keys("^a") # 全选
input_box.type_keys("{DELETE}")
input_box.set_edit_text(text)
# 按回车发送
input_box.type_keys("{ENTER}")
logger.info(f"发送消息: {text[:20]}...")
return True
except Exception as e:
logger.error(f"发送消息失败: {e}")
return False
def find_button(self, name: str) -> Optional[HwndWrapper]:
"""查找按钮"""
try:
btn = self.main_window.window(title=name, class_name="Button")
return btn
except Exception:
return None
def click_button(self, name: str) -> bool:
"""点击按钮"""
try:
btn = self.find_button(name)
if btn:
btn.click()
return True
return False
except Exception as e:
logger.error(f"点击按钮失败: {e}")
return False
def is_connected(self) -> bool:
"""检查是否已连接"""
return self._connected and self.app is not None
def disconnect(self):
"""断开连接"""
if self.app:
self.app = None
self.main_window = None
self._connected = False
logger.info("已断开微信连接")
def __enter__(self):
self.connect()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.disconnect()