diff --git a/README.md b/README.md
index 9f5849b..2ef1af5 100644
--- a/README.md
+++ b/README.md
@@ -16,9 +16,9 @@
          ↓
 UIAutomation + 屏幕截图
          ↓
-  Qwen-VL2 视觉理解
+  阿里云百炼 Qwen-VL 视觉理解
          ↓
-   LLM 推理判断
+   LLM 推理判断（Qwen Plus）
          ↓
 UIAutomation 执行操作
          ↓
@@ -29,52 +29,86 @@ UIAutomation 执行操作
 
 | 模块 | 说明 |
 |-----|------|
-| `vlm` | 视觉模型接口（Qwen-VL2） |
+| `vlm` | 视觉模型接口（阿里云百炼 Bailian） |
 | `wechat` | 微信客户端控制（UIAutomation） |
 | `core` | 核心引擎（消息捕获、回复判断） |
 | `agent` | AI Agent 逻辑 |
-| `ui` | 桌面 UI 界面 |
+| `ui` | 桌面 UI 界面（待实现） |
 | `config` | 配置管理 |
 
 ## 技术栈
 
 - **语言**: Python 3.10+
-- **视觉模型**: Qwen-VL2（本地部署）
+- **视觉模型**: 阿里云百炼 Qwen-VL 系列
+- **LLM**: 阿里云百炼 Qwen Plus 系列
 - **Windows 控制**: UIAutomation (PyWinAuto)
-- **LLM**: OpenAI 兼容 API
-- **桌面 UI**: PyQt6 / Tkinter
+- **API**: OpenAI 兼容格式（阿里云百炼）
 
 ## 快速开始
 
-### 环境要求
+### 1. 获取阿里云百炼 API Key
 
-- Windows 10/11
-- Python 3.10+
-- 微信 Windows 客户端 3.8.x（推荐）
-- Qwen-VL2 模型（本地部署）
+1. 访问 [阿里云百炼控制台](https://bailian.console.aliyun.com/)
+2. 开通大模型服务
+3. 创建 API Key: https://bailian.console.aliyun.com/cn-beijing#/APIKey
 
-### 安装
+### 2. 安装依赖
 
 ```bash
 pip install -r requirements.txt
 ```
 
-### 配置
+### 3. 配置
 
 ```bash
 cp config.example.yaml config.yaml
 # 编辑 config.yaml 填入 API 配置
 ```
 
-### 运行
+或在环境变量中设置：
+```bash
+export ALIBABA_CLOUD_API_KEY=your-api-key
+# 或
+export DASHSCOPE_API_KEY=your-api-key
+```
+
+### 4. 运行
 
 ```bash
+# 模拟模式（不连接微信，用于测试）
+python src/main.py --mock --demo
+
+# 实际运行（需要微信客户端运行）
 python src/main.py
 ```
 
+## 阿里云百炼模型
+
+### VLM 视觉模型（用于截图识别）
+
+| 模型 | 说明 | 推荐场景 |
+|-----|------|---------|
+| `qwen-vl-latest` | 最新 VL 模型 | **推荐**，微信截图识别 |
+| `qwen-vl2-7b` | Qwen-VL2 7B | 轻量级场景 |
+| `qwen-vl2-72b` | Qwen-VL2 72B | 高精度场景 |
+| `qwen2-vl-72b-instruct` | Qwen2-VL 72B | 最新一代 |
+
+### LLM 模型（用于生成回复）
+
+| 模型 | 说明 | 推荐场景 |
+|-----|------|---------|
+| `qwen-plus` | Qwen Plus | **推荐**，日常对话 |
+| `qwen-max` | Qwen Max | 高质量回复 |
+| `qwen-turbo` | Qwen Turbo | 快速响应 |
+
+更多模型: https://bailian.console.aliyun.com/cn-beijing#/model-market
+
 ## MVP 功能
 
 ### Phase 1（本期）
+- [x] 项目初始化
+- [x] 阿里云百炼 VLM 集成
+- [x] 阿里云百炼 LLM 集成
 - [ ] 微信窗口识别
 - [ ] 聊天记录截图识别
 - [ ] 用户信息识别
@@ -82,14 +116,66 @@ python src/main.py
 - [ ] 定时轮询机制
 
 ### Phase 2（后续）
-- [ ] 知识库集成
+- [ ] 知识库集成（OpenViking）
 - [ ] 多账号管理
 - [ ] 复杂对话上下文
 
+## 配置示例
+
+```yaml
+vlm:
+  model_type: bailian
+  api_base: https://dashscope.aliyuncs.com/compatible-mode/v1
+  api_key: ""  # 从环境变量读取
+  model_name: qwen-vl-latest
+  max_tokens: 2048
+  temperature: 0.7
+
+llm:
+  api_base: https://dashscope.aliyuncs.com/compatible-mode/v1
+  api_key: ""
+  model_name: qwen-plus
+  max_tokens: 2048
+  temperature: 0.7
+
+wechat:
+  client_version: "3.8.x"
+  poll_interval: 2.0
+  window_title: "微信"
+
+rules:
+  - keywords: ["你好", "hi"]
+    reply_type: keyword
+    reply_content: "您好，有什么可以帮您的？"
+    enabled: true
+```
+
+## API 参考
+
+### 阿里云百炼 API
+
+- **基础 URL**: `https://dashscope.aliyuncs.com/compatible-mode/v1`
+- **认证**: `Authorization: Bearer {API_KEY}`
+- **格式**: OpenAI Chat Completions 兼容
+
+### VLM 核心方法
+
+```python
+from src.vlm.qwen_vl import BailianVLMClient, analyze_wechat_screenshot
+
+# 方式1: 直接分析微信截图
+result = analyze_wechat_screenshot("wechat.png")
+
+# 方式2: 使用客户端
+client = BailianVLMClient(api_key="your-key")
+result = client.analyze_chat_screenshot("wechat.png")
+```
+
 ## 参考项目
 
 - [thiflow-research](http://192.168.5.5:3000/jesxion/thiflow-research) - Thiflow 产品研究
 - [thiflow.com](https://thiflow.com/) - 参考产品
+- [阿里云百炼](https://bailian.console.aliyun.com/) - VLM & LLM 提供商
 
 ## License
 
diff --git a/config.example.yaml b/config.example.yaml
index 142338d..585269d 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -1,17 +1,36 @@
 # WeChat Agent 配置文件示例
 
+# ============================================
+# VLM 视觉模型配置（阿里云百炼）
+# ============================================
+# 阿里云百炼平台: https://bailian.console.aliyun.com/
+# API Key 获取: https://bailian.console.aliyun.com/cn-beijing#/APIKey
+# 
+# 支持的模型:
+#   - qwen-vl-latest (推荐，VL 理解)
+#   - qwen-vl2-7b
+#   - qwen-vl2-72b
+#   - qwen2-vl-72b-instruct
+#   - qwen2.5-vl-72b-instruct
+#   - qwen-omni-series (全模态)
+# ============================================
 vlm:
-  model_type: qwen-vl2  # qwen-vl2 / gpt-4v
-  api_base: http://localhost:8000/v1  # VLM API 地址
-  api_key: ""  # VLM API Key（如果需要）
-  model_name: Qwen-VL2  # 模型名称
+  model_type: bailian  # bailian / qwen-vl / gpt-4v
+  # 阿里云百炼 API (OpenAI 兼容格式)
+  api_base: https://dashscope.aliyuncs.com/compatible-mode/v1
+  # API Key: 设置环境变量 ALIBABA_CLOUD_API_KEY 或 DASHSCOPE_API_KEY
+  # 或直接在这里填写:
+  # api_key: your-api-key-here
+  api_key: ""
+  model_name: qwen-vl-latest  # 模型名称
   max_tokens: 2048
   temperature: 0.7
 
 llm:
-  api_base: https://api.openai.com/v1  # LLM API 地址
-  api_key: your-api-key-here  # OpenAI API Key
-  model_name: gpt-4o  # 模型名称
+  api_base: https://dashscope.aliyuncs.com/compatible-mode/v1
+  # LLM API Key (同上，可以使用相同的 API Key)
+  api_key: ""
+  model_name: qwen-plus  # 或 qwen-max, qwen-turbo 等
   max_tokens: 2048
   temperature: 0.7
 
@@ -32,15 +51,30 @@ rules:
     reply_content: "您好，有什么可以帮您的？"
     enabled: true
   
-  # AI 回复示例（无匹配关键词时）
+  # AI 回复示例（无匹配关键词时，使用 LLM 生成回复）
   - keywords: []
     reply_type: AI
     reply_content: ""
     enabled: true
 
-# 知识库（可选，后续接入）
+# 知识库（可选，后续接入 OpenViking）
 knowledge_base:
   url: http://192.168.5.5:1933
 
 # 日志级别
 log_level: INFO
+
+# ============================================
+# 环境变量
+# ============================================
+# 推荐将敏感信息放在环境变量中:
+#
+# Linux/macOS:
+#   export ALIBABA_CLOUD_API_KEY=your-api-key
+#   export DASHSCOPE_API_KEY=your-api-key
+#
+# Windows:
+#   set ALIBABA_CLOUD_API_KEY=your-api-key
+#
+# 或使用 .env 文件 (需要 python-dotenv):
+#   ALIBABA_CLOUD_API_KEY=your-api-key
diff --git a/requirements.txt b/requirements.txt
index 47269ff..ffc3ec6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,14 +8,13 @@ pywin32>=305
 requests>=2.28.0
 urllib3>=1.26.0
 
+# 阿里云百炼 / OpenAI 兼容 API
+openai>=1.0.0
+
 # 数据处理
 pyyaml>=6.0
 pillow>=9.0.0
 
-# 异步（可选）
-# asyncio
-# aiohttp>=3.8.0
-
 # 日志
 # coloredlogs>=15.0  # 可选
 
diff --git a/src/__pycache__/main.cpython-311.pyc b/src/__pycache__/main.cpython-311.pyc
new file mode 100644
index 0000000..ea78181
Binary files /dev/null and b/src/__pycache__/main.cpython-311.pyc differ
diff --git a/src/config/__pycache__/settings.cpython-311.pyc b/src/config/__pycache__/settings.cpython-311.pyc
new file mode 100644
index 0000000..d07a065
Binary files /dev/null and b/src/config/__pycache__/settings.cpython-311.pyc differ
diff --git a/src/config/settings.py b/src/config/settings.py
index 7e11dd6..65cb4bf 100644
--- a/src/config/settings.py
+++ b/src/config/settings.py
@@ -12,21 +12,21 @@ from pathlib import Path
 
 @dataclass
 class VLMSettings:
-    """视觉模型配置"""
-    model_type: str = "qwen-vl2"  # qwen-vl2 / GPT-4V
-    api_base: str = "http://localhost:8000/v1"
+    """视觉模型配置（阿里云百炼）"""
+    model_type: str = "bailian"  # bailian / qwen-vl / gpt-4v
+    api_base: str = "https://dashscope.aliyuncs.com/compatible-mode/v1"  # 阿里云百炼
     api_key: str = ""
-    model_name: str = "Qwen-VL2"
+    model_name: str = "qwen-vl-latest"  # 推荐 qwen-vl-latest
     max_tokens: int = 2048
     temperature: float = 0.7
 
 
 @dataclass
 class LLMSettings:
-    """LLM 配置"""
-    api_base: str = "https://api.openai.com/v1"
+    """LLM 配置（阿里云百炼）"""
+    api_base: str = "https://dashscope.aliyuncs.com/compatible-mode/v1"  # 阿里云百炼
     api_key: str = ""
-    model_name: str = "gpt-4o"
+    model_name: str = "qwen-plus"  # 或 qwen-max, qwen-turbo
     max_tokens: int = 2048
     temperature: float = 0.7
 
diff --git a/src/main.py b/src/main.py
index 8b31bf5..915092d 100644
--- a/src/main.py
+++ b/src/main.py
@@ -2,6 +2,7 @@
 WeChat Agent 主入口
 """
 
+import os
 import sys
 import logging
 import argparse
@@ -67,16 +68,34 @@ def main():
         logger.error(f"VLM 客户端创建失败: {e}")
         return
     
-    # 创建 LLM 客户端（简化版，后续实现）
-    class SimpleLLMClient:
+    # 创建 LLM 客户端（阿里云百炼兼容）
+    import openai
+    
+    class BailianLLMClient:
+        """阿里云百炼 LLM 客户端"""
+        
         def __init__(self, config):
             self.config = config
+            self.client = openai.OpenAI(
+                api_key=config.llm.api_key or os.environ.get("ALIBABA_CLOUD_API_KEY") or os.environ.get("DASHSCOPE_API_KEY", ""),
+                base_url=config.llm.api_base
+            )
         
         def chat(self, messages):
-            # 实际调用需要对接 OpenAI 兼容 API
-            return {"text": "测试回复"}
+            """发送对话请求"""
+            try:
+                response = self.client.chat.completions.create(
+                    model=self.config.llm.model_name,
+                    messages=messages,
+                    max_tokens=self.config.llm.max_tokens,
+                    temperature=self.config.llm.temperature
+                )
+                return {"text": response.choices[0].message.content}
+            except Exception as e:
+                logger.error(f"LLM 请求失败: {e}")
+                return {"text": ""}
     
-    llm_client = SimpleLLMClient(config.llm)
+    llm_client = BailianLLMClient(config)
     
     # 创建微信控制器
     if args.mock:
diff --git a/src/vlm/qwen_vl.py b/src/vlm/qwen_vl.py
index e0c0c9f..3d82436 100644
--- a/src/vlm/qwen_vl.py
+++ b/src/vlm/qwen_vl.py
@@ -1,14 +1,21 @@
 """
-视觉模型接口
-Vision Language Model interface for Qwen-VL2
+阿里云百炼视觉模型接口
+Vision Language Model interface for Alibaba Cloud Bailian (百炼) Platform
+
+支持模型:
+- qwen-vl-latest (推荐，用于微信截图识别)
+- qwen-omni-series (全模态，支持文本/图像/视频/音频)
+- qwen2-vl-series (Qwen2-VL 系列)
+
+API 文档: https://help.aliyun.com/zh/model-studio/qwen-api-via-openai-chat-completions
+控制台: https://bailian.console.aliyun.com/
 """
 
 import base64
 import json
 import logging
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from typing import List, Optional, Dict, Any
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict, Any, Union
 from pathlib import Path
 
 import requests
@@ -16,11 +23,15 @@ import requests
 logger = logging.getLogger(__name__)
 
 
+# 阿里云百炼 API 配置
+DASHSCOPE_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
+
+
 @dataclass
 class VLMMessages:
     """VLM 消息结构"""
     role: str  # user / assistant
-    content: str  # 文本或 image URL
+    content: Union[str, List[Dict]]  # 文本或 image URL
 
 
 @dataclass
@@ -30,23 +41,29 @@ class VLMResponse:
     raw: dict
 
 
-class BaseVLM(ABC):
-    """视觉模型基类"""
+class BailianVLMClient:
+    """阿里云百炼 VLM 客户端
     
-    @abstractmethod
-    def chat(self, messages: List[VLMMessages], **kwargs) -> VLMResponse:
-        """发送对话请求"""
-        pass
+    阿里云百炼平台的视觉模型客户端，支持 qwen-vl 系列。
+    使用 OpenAI Chat Completions 兼容格式。
     
-    @abstractmethod
-    def analyze_image(self, image_path: str, prompt: str, **kwargs) -> str:
-        """分析图片"""
-        pass
-
-
-class QwenVL2Client(BaseVLM):
-    """Qwen-VL2 客户端"""
+    使用方式:
+        client = BailianVLMClient(api_key="your-api-key")
+        
+        # 方式1: 文本对话
+        result = client.chat([VLMMessages(role="user", content="你好")])
+        
+        # 方式2: 图文对话
+        result = client.analyze_image(
+            image_path="screenshot.png",
+            prompt="描述这张图片的内容"
+        )
+        
+        # 方式3: 分析微信截图
+        chat_info = client.analyze_chat_screenshot("wechat.png")
+    """
     
+    # 系统提示词 - 微信 UI 识别专家
     SYSTEM_PROMPT = """你是一个专业的 Windows 微信客户端 UI 识别助手。
 你的任务是根据截图准确识别微信界面中的元素。
 
@@ -63,15 +80,42 @@ class QwenVL2Client(BaseVLM):
 
     def __init__(
         self,
-        api_base: str = "http://localhost:8000/v1",
-        api_key: str = "",
-        model_name: str = "Qwen-VL2",
+        api_key: str = None,
+        model_name: str = "qwen-vl-latest",
+        base_url: str = DASHSCOPE_BASE_URL,
         max_tokens: int = 2048,
         temperature: float = 0.7
     ):
-        self.api_base = api_base.rstrip("/")
+        """
+        初始化阿里云百炼 VLM 客户端
+        
+        Args:
+            api_key: 阿里云百炼 API Key
+                    可从环境变量 ALIBABA_CLOUD_API_KEY 或 DASHSCOPE_API_KEY 获取
+                    或从 https://bailian.console.aliyun.com/ 获取
+            model_name: 模型名称
+                - qwen-vl-latest (推荐，VL 理解)
+                - qwen-vl2-7b
+                - qwen-vl2-72b
+                - qwen2-vl-72b-instruct
+                - qwen2.5-vl-72b-instruct
+                - qwen-omni-series (全模态)
+            base_url: API 基础地址（OpenAI 兼容格式）
+            max_tokens: 最大生成 token 数
+            temperature: 生成温度
+        """
+        import os
+        
+        # 获取 API Key
+        if not api_key:
+            api_key = os.environ.get("ALIBABA_CLOUD_API_KEY") or os.environ.get("DASHSCOPE_API_KEY", "")
+        
+        if not api_key:
+            logger.warning("未提供阿里云百炼 API Key，请设置 ALIBABA_CLOUD_API_KEY 环境变量")
+        
         self.api_key = api_key
         self.model_name = model_name
+        self.base_url = base_url.rstrip("/")
         self.max_tokens = max_tokens
         self.temperature = temperature
     
@@ -80,10 +124,57 @@ class QwenVL2Client(BaseVLM):
         with open(image_path, "rb") as f:
             return base64.b64encode(f.read()).decode("utf-8")
     
-    def chat(self, messages: List[VLMMessages], **kwargs) -> VLMResponse:
-        """发送对话请求"""
+    def _get_image_media_type(self, image_path: str) -> str:
+        """根据文件扩展名获取 media type"""
+        ext = Path(image_path).suffix.lower()
+        mime_types = {
+            ".jpg": "image/jpeg",
+            ".jpeg": "image/jpeg",
+            ".png": "image/png",
+            ".gif": "image/gif",
+            ".webp": "image/webp",
+            ".bmp": "image/bmp",
+        }
+        return mime_types.get(ext, "image/jpeg")
+    
+    def chat(
+        self,
+        messages: List[VLMMessages],
+        system_prompt: str = None,
+        **kwargs
+    ) -> VLMResponse:
+        """发送对话请求
+        
+        Args:
+            messages: 消息列表
+            system_prompt: 系统提示词（可选）
+            **kwargs: 其他参数（max_tokens, temperature 等）
+        
+        Returns:
+            VLMResponse 对象，包含 text 属性（解析后的文本）和 raw 属性（原始响应）
+        
+        示例:
+            # 纯文本对话
+            client.chat([VLMMessages(role="user", content="你好")])
+            
+            # 图文对话
+            client.chat([
+                VLMMessages(role="user", content=[
+                    {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
+                    {"type": "text", "text": "描述图片内容"}
+                ])
+            ])
+        """
         # 构造消息格式
         formatted_messages = []
+        
+        # 添加系统提示
+        if system_prompt:
+            formatted_messages.append({
+                "role": "system",
+                "content": system_prompt
+            })
+        
         for msg in messages:
             if isinstance(msg.content, str):
                 formatted_messages.append({
@@ -97,13 +188,14 @@ class QwenVL2Client(BaseVLM):
                     "content": msg.content
                 })
         
-        # 添加系统提示
-        if not any(m.role == "system" for m in messages):
+        # 如果没有系统提示，使用默认的
+        if not system_prompt and not any(m.get("role") == "system" for m in formatted_messages):
             formatted_messages.insert(0, {
                 "role": "system",
                 "content": self.SYSTEM_PROMPT
             })
         
+        # 构建请求
         payload = {
             "model": self.model_name,
             "messages": formatted_messages,
@@ -112,14 +204,13 @@ class QwenVL2Client(BaseVLM):
         }
         
         headers = {
-            "Content-Type": "application/json"
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
         }
-        if self.api_key:
-            headers["Authorization"] = f"Bearer {self.api_key}"
         
         try:
             resp = requests.post(
-                f"{self.api_base}/chat/completions",
+                f"{self.base_url}/chat/completions",
                 headers=headers,
                 json=payload,
                 timeout=60
@@ -127,6 +218,8 @@ class QwenVL2Client(BaseVLM):
             resp.raise_for_status()
             data = resp.json()
             
+            logger.debug(f"VLM API 响应: {json.dumps(data, ensure_ascii=False)[:500]}")
+            
             return VLMResponse(
                 text=data["choices"][0]["message"]["content"],
                 raw=data
@@ -136,18 +229,35 @@ class QwenVL2Client(BaseVLM):
             raise VLMError(f"VLM 请求失败: {e}")
     
     def analyze_image(self, image_path: str, prompt: str, **kwargs) -> str:
-        """分析图片"""
+        """分析单张图片
+        
+        Args:
+            image_path: 图片路径（本地路径或 URL）
+            prompt: 分析提示词
+            **kwargs: 其他参数
+        
+        Returns:
+            分析结果文本
+        
+        示例:
+            result = client.analyze_image(
+                "screenshot.png",
+                "识别图片中的所有文字内容"
+            )
+        """
         if not Path(image_path).exists():
             raise VLMError(f"图片不存在: {image_path}")
         
-        # 构造多模态消息
+        # 编码图片为 base64
         image_data = self._encode_image(image_path)
+        media_type = self._get_image_media_type(image_path)
         
+        # 构造多模态消息
         content = [
             {
                 "type": "image_url",
                 "image_url": {
-                    "url": f"data:image/jpeg;base64,{image_data}"
+                    "url": f"data:{media_type};base64,{image_data}"
                 }
             },
             {
@@ -162,28 +272,43 @@ class QwenVL2Client(BaseVLM):
         return response.text
     
     def analyze_chat_screenshot(self, screenshot_path: str) -> Dict[str, Any]:
-        """分析聊天窗口截图
+        """分析微信聊天窗口截图
+        
+        这是最常用的功能 - 分析微信截图，提取聊天信息。
+        
+        Args:
+            screenshot_path: 截图文件路径
         
         Returns:
-            解析后的聊天信息，包含：
-            - messages: 消息列表
-            - current_chat: 当前聊天对象
+            解析后的聊天信息字典，包含：
+            - current_chat: 当前聊天对象名称
             - has_new_message: 是否有新消息
+            - messages: 消息列表，每条消息包含：
+                - sender: 发送者
+                - content: 消息内容
+                - time: 时间
+                - is_self: 是否是自己发送的
+        
+        示例:
+            result = client.analyze_chat_screenshot("wechat_chat.png")
+            if result.get("has_new_message"):
+                for msg in result["messages"]:
+                    print(f"{msg['sender']}: {msg['content']}")
         """
-        prompt = """请分析这个微信聊天截图，返回 JSON 格式：
+        prompt = """请分析这个微信聊天截图，返回严格的 JSON 格式，不要包含其他内容：
 {
-    "current_chat": "当前聊天对象名称",
-    "has_new_message": true/false,
+    "current_chat": "当前聊天对象名称（如果是群聊，返回群名）",
+    "has_new_message": true或false（根据是否有未读标记判断）,
     "messages": [
         {
-            "sender": "发送者",
-            "content": "消息内容",
-            "time": "时间",
-            "is_self": true/false
+            "sender": "发送者昵称",
+            "content": "消息内容（图片用[图片]表示，语音用[语音]表示，视频用[视频]表示，文件用[文件]表示）",
+            "time": "时间字符串，如10:30",
+            "is_self": true或false（是否是自己发送的消息）
         }
     ]
 }
-只返回 JSON，不要其他内容。"""
+只返回 JSON，不要其他文字。"""
         
         result = self.analyze_image(screenshot_path, prompt)
         
@@ -194,25 +319,33 @@ class QwenVL2Client(BaseVLM):
             end = result.rfind("}") + 1
             if start >= 0 and end > start:
                 json_str = result[start:end]
-                return json.loads(json_str)
+                parsed = json.loads(json_str)
+                logger.info(f"聊天截图解析成功: {parsed.get('current_chat', 'unknown')}, {len(parsed.get('messages', []))} 条消息")
+                return parsed
             else:
+                logger.warning(f"无法从响应中提取 JSON: {result[:200]}")
                 return {"raw": result}
-        except json.JSONDecodeError:
+        except json.JSONDecodeError as e:
+            logger.warning(f"JSON 解析失败: {e}, 原始响应: {result[:200]}")
             return {"raw": result}
     
     def detect_ui_elements(self, screenshot_path: str) -> Dict[str, Any]:
-        """检测 UI 元素位置
+        """检测 UI 元素位置和类型
+        
+        Args:
+            screenshot_path: 截图文件路径
         
         Returns:
-            UI 元素字典，包含类型和位置
+            UI 元素字典，包含 elements 列表
         """
-        prompt = """请分析这个微信界面截图，标注关键 UI 元素的位置：
+        prompt = """请分析这个微信界面截图，标注关键 UI 元素的位置和类型，返回 JSON 格式：
 {
     "elements": [
         {
-            "type": "button/input/chat_list/...",
-            "name": "元素名称",
-            "bounds": {"x": 0, "y": 0, "width": 100, "height": 50}
+            "type": "元素类型（button/input/chat_list/message_area/sidebar/title_bar等）",
+            "name": "元素名称或描述",
+            "bounds": {"x": 0, "y": 0, "width": 100, "height": 50},
+            "clickable": true或false
         }
     ]
 }
@@ -228,6 +361,55 @@ class QwenVL2Client(BaseVLM):
             return {"raw": result}
         except json.JSONDecodeError:
             return {"raw": result}
+    
+    def recognize_text(self, screenshot_path: str) -> str:
+        """识别图片中的所有文字
+        
+        Args:
+            screenshot_path: 截图文件路径
+        
+        Returns:
+            识别出的所有文字内容
+        """
+        prompt = """请识别图片中的所有文字内容，按原顺序输出，保持格式。"""
+        return self.analyze_image(screenshot_path, prompt)
+    
+    def check_for_new_messages(self, screenshot_path: str) -> bool:
+        """快速检查是否有新消息
+        
+        Args:
+            screenshot_path: 截图文件路径
+        
+        Returns:
+            True 如果有未读消息红色标记，False 否则
+        """
+        prompt = """快速判断：这张微信截图中是否有未读消息的红点或数字标记？
+只返回 true 或 false。"""
+        result = self.analyze_image(screenshot_path, prompt).strip().lower()
+        return "true" in result and "false" not in result
+
+
+class QwenVL2Client(BailianVLMClient):
+    """Qwen-VL2 客户端（向后兼容）
+    
+    兼容旧的接口，内部使用 BailianVLMClient
+    """
+    
+    def __init__(
+        self,
+        api_base: str = DASHSCOPE_BASE_URL,
+        api_key: str = "",
+        model_name: str = "qwen-vl-latest",
+        max_tokens: int = 2048,
+        temperature: float = 0.7
+    ):
+        super().__init__(
+            api_key=api_key,
+            model_name=model_name,
+            base_url=api_base,
+            max_tokens=max_tokens,
+            temperature=temperature
+        )
 
 
 class VLMError(Exception):
@@ -236,20 +418,51 @@ class VLMError(Exception):
 
 
 # 工厂函数
-def create_vlm_client(config: dict) -> BaseVLM:
-    """创建 VLM 客户端"""
-    model_type = config.get("model_type", "qwen-vl2").lower()
+def create_vlm_client(config: dict) -> BailianVLMClient:
+    """创建 VLM 客户端
     
-    if model_type == "qwen-vl2":
-        return QwenVL2Client(
-            api_base=config.get("api_base", "http://localhost:8000/v1"),
+    Args:
+        config: 配置字典，包含：
+            - model_type: 模型类型（bailian / qwen-vl）
+            - api_key: API Key（可选，从环境变量读取）
+            - model_name: 模型名称
+            - api_base: API 基础地址
+    
+    Returns:
+        BailianVLMClient 实例
+    """
+    model_type = config.get("model_type", "bailian").lower()
+    
+    if model_type in ("bailian", "qwen-vl", "qwen", "aliyun"):
+        return BailianVLMClient(
             api_key=config.get("api_key", ""),
-            model_name=config.get("model_name", "Qwen-VL2"),
+            model_name=config.get("model_name", "qwen-vl-latest"),
+            base_url=config.get("api_base", DASHSCOPE_BASE_URL),
             max_tokens=config.get("max_tokens", 2048),
             temperature=config.get("temperature", 0.7)
         )
     elif model_type == "gpt-4v":
-        # GPT-4V 客户端（待实现）
-        raise NotImplementedError("GPT-4V 客户端待实现")
+        raise NotImplementedError("GPT-4V 客户端请使用 OpenAI 兼容接口")
     else:
         raise ValueError(f"不支持的 VLM 类型: {model_type}")
+
+
+# 直接使用函数
+def analyze_wechat_screenshot(screenshot_path: str, api_key: str = None) -> Dict[str, Any]:
+    """快捷函数：分析微信截图
+    
+    这是一个便捷函数，可以直接分析微信截图。
+    
+    Args:
+        screenshot_path: 截图文件路径
+        api_key: 阿里云百炼 API Key（可选，从环境变量读取）
+    
+    Returns:
+        解析后的聊天信息
+    
+    示例:
+        result = analyze_wechat_screenshot("wechat.png")
+        print(result["messages"])
+    """
+    client = BailianVLMClient(api_key=api_key)
+    return client.analyze_chat_screenshot(screenshot_path)