Python使用DrissionPage实现上传文件的实战指南

Python使用DrisionPage实现上传文件的实战指南

DrissionPage是一个基于Python的网页自动化工具，结合了selenium和requests的优点。以下是使用DrisionPage实现文件上传的完整实战指南。

一、环境准备

1. 安装DrissionPage

pip install DrissionPage

2. 导入必要模块

from DrissionPage import ChromiumPage, ChromiumOptions
import time
import os

二、基本文件上传方法

1. 通过input标签上传文件

最常见的上传文件方式是通过<input type="file">元素：

def upload_via_input():
    """通过input元素上传文件"""
    # 创建浏览器页面
    page = ChromiumPage()

    # 访问示例上传页面
    page.get('https://example.com/upload')

    # 定位文件上传input元素
    # 方法1: 通过选择器
    file_input = page.ele('tag:input@type=file')

    # 方法2: 通过CSS选择器
    # file_input = page.ele('#file-upload')

    # 方法3: 通过XPath
    # file_input = page.ele('//input[@type="file"]')

    # 上传文件（支持绝对路径和相对路径）
    file_path = r'C:\Users\test\Desktop\example.jpg'  # Windows路径
    # file_path = '/home/user/documents/example.jpg'  # Linux/Mac路径

    file_input.input(file_path)

    # 提交表单
    submit_btn = page.ele('tag:button@type=submit')
    submit_btn.click()

    # 等待上传完成
    page.wait(3)

    # 检查上传结果
    if page.ele('text:上传成功', timeout=5):
        print("文件上传成功！")

    page.quit()

2. 多文件上传

def upload_multiple_files():
    """上传多个文件"""
    page = ChromiumPage()
    page.get('https://example.com/multi-upload')

    file_input = page.ele('tag:input@type=file')

    # 多个文件路径列表
    file_paths = [
        r'C:\file1.jpg',
        r'C:\file2.pdf',
        r'C:\file3.txt'
    ]

    # 设置multiple属性（如果有的话）
    file_input.input('\n'.join(file_paths))

    print(f"已选择 {len(file_paths)} 个文件")
    page.wait(3)
    page.quit()

三、处理复杂上传场景

1. 非input元素的文件上传（如div拖拽）

def upload_via_drag_drop():
    """处理拖拽上传"""
    page = ChromiumPage()
    page.get('https://example.com/drag-drop-upload')

    # 找到拖拽区域
    drop_area = page.ele('.drop-zone')

    # 准备文件路径
    file_path = r'C:\test\file.jpg'

    # 方法1: 直接将文件路径设置到隐藏的input
    # 有时拖拽区域后面有隐藏的input
    hidden_input = page.ele('tag:input@type=file', timeout=2)
    if hidden_input:
        hidden_input.input(file_path)
    else:
        # 方法2: 使用JavaScript注入
        js_script = """
        // 创建文件对象
        const dataTransfer = new DataTransfer();

        // 创建一个文件对象（注意：这里需要实际文件，简化示例）
        // 实际应用中可能需要更复杂的处理
        const file = new File(['content'], 'filename.jpg', {type: 'image/jpeg'});
        dataTransfer.items.add(file);

        // 触发拖放事件
        const dropEvent = new DragEvent('drop', {
            dataTransfer: dataTransfer
        });

        document.querySelector('.drop-zone').dispatchEvent(dropEvent);
        """
        page.run_js(js_script)

    page.wait(2)
    page.quit()

2. 使用autoit处理Windows文件选择对话框

def upload_with_autoit():
    """使用autoit处理系统文件选择对话框"""
    import autoit
    from DrissionPage import ChromiumPage

    page = ChromiumPage()
    page.get('https://example.com/upload')

    # 点击上传按钮触发文件对话框
    upload_btn = page.ele('#upload-button')
    upload_btn.click()

    # 等待文件对话框出现
    time.sleep(2)

    # 使用autoit操作文件对话框
    # 设置文件名
    autoit.control_set_text("打开", "Edit1", r"C:\test\file.jpg")

    # 点击打开按钮
    autoit.control_click("打开", "Button1")

    # 等待上传完成
    page.wait(5)
    page.quit()

四、实际项目示例

1. 上传文件到网盘

class CloudStorageUploader:
    """网盘文件上传器"""

    def __init__(self, username, password):
        self.page = ChromiumPage()
        self.username = username
        self.password = password
        self.logged_in = False

    def login(self):
        """登录网盘"""
        self.page.get('https://cloud.example.com/login')

        # 输入用户名密码
        self.page.ele('#username').input(self.username)
        self.page.ele('#password').input(self.password)

        # 点击登录
        self.page.ele('tag:button@type=submit').click()

        # 等待登录完成
        self.page.wait(3)
        self.logged_in = True
        print("登录成功")

    def upload_file(self, file_path, target_folder="My Files"):
        """上传文件到指定文件夹"""
        if not self.logged_in:
            self.login()

        # 导航到上传页面
        self.page.get('https://cloud.example.com/upload')

        # 选择目标文件夹
        if target_folder != "My Files":
            folder_select = self.page.ele('#folder-select')
            folder_select.select(target_folder)

        # 上传文件
        file_input = self.page.ele('tag:input@type=file', timeout=10)
        if file_input:
            file_input.input(file_path)

            # 等待上传进度条完成
            self._wait_for_upload_complete()

            # 获取上传结果
            result = self.page.ele('.upload-result').text
            return result
        else:
            raise Exception("未找到文件上传输入框")

    def upload_multiple_with_progress(self, file_paths):
        """批量上传文件并显示进度"""
        self.page.get('https://cloud.example.com/batch-upload')

        # 启用批量上传
        batch_toggle = self.page.ele('#batch-mode')
        batch_toggle.click()

        # 选择文件
        file_input = self.page.ele('#batch-file-input')
        file_input.input('\n'.join(file_paths))

        # 监控上传进度
        uploaded_count = 0
        total_files = len(file_paths)

        while uploaded_count < total_files:
            progress = self.page.ele('.progress-text').text
            print(f"上传进度: {progress}")

            # 检查完成状态
            completed = self.page.eles('.file-item.completed')
            uploaded_count = len(completed)

            time.sleep(1)

        print("所有文件上传完成！")

    def _wait_for_upload_complete(self, timeout=60):
        """等待上传完成"""
        start_time = time.time()

        while time.time() - start_time < timeout:
            # 检查上传进度条
            progress_bar = self.page.ele('.progress-bar', timeout=1)
            if progress_bar:
                progress = progress_bar.attr('value')
                if progress == '100':
                    print("上传完成")
                    return True

            # 检查完成消息
            if self.page.ele('text:上传成功', timeout=1):
                return True

            time.sleep(1)

        raise TimeoutError("上传超时")

    def close(self):
        """关闭浏览器"""
        self.page.quit()


# 使用示例
def main():
    uploader = CloudStorageUploader('your_username', 'your_password')

    try:
        # 上传单个文件
        result = uploader.upload_file(r'C:\重要文档\report.pdf', '工作文档')
        print(f"上传结果: {result}")

        # 批量上传
        files_to_upload = [
            r'C:\照片\vacation1.jpg',
            r'C:\照片\vacation2.jpg',
            r'C:\照片\vacation3.jpg'
        ]
        uploader.upload_multiple_with_progress(files_to_upload)

    except Exception as e:
        print(f"上传失败: {e}")
    finally:
        uploader.close()


if __name__ == "__main__":
    main()

2. 自动化测试中的文件上传

import unittest
from DrissionPage import ChromiumPage


class FileUploadTests(unittest.TestCase):
    """文件上传测试用例"""

    def setUp(self):
        """测试前准备"""
        self.page = ChromiumPage()
        self.page.get('http://localhost:8080/upload-test')

    def test_single_file_upload(self):
        """测试单个文件上传"""
        # 选择文件
        file_input = self.page.ele('#fileInput')
        file_input.input('test_data/test_image.jpg')

        # 提交
        self.page.ele('#submitBtn').click()

        # 验证
        success_msg = self.page.ele('.success-message', timeout=5)
        self.assertIsNotNone(success_msg, "上传成功消息未显示")

        # 验证文件名
        filename_display = self.page.ele('#filename').text
        self.assertIn('test_image.jpg', filename_display)

    def test_file_type_validation(self):
        """测试文件类型验证"""
        # 上传不支持的文件类型
        file_input = self.page.ele('#fileInput')
        file_input.input('test_data/invalid.exe')

        self.page.ele('#submitBtn').click()

        # 应该显示错误消息
        error_msg = self.page.ele('.error-message', timeout=5)
        self.assertIsNotNone(error_msg, "文件类型错误提示未显示")

    def test_file_size_limit(self):
        """测试文件大小限制"""
        # 创建大文件（仅测试用，实际项目中应有测试文件）
        large_file = 'test_data/large_file.bin'
        with open(large_file, 'wb') as f:
            f.write(b'0' * (11 * 1024 * 1024))  # 11MB

        file_input = self.page.ele('#fileInput')
        file_input.input(large_file)

        self.page.ele('#submitBtn').click()

        # 应该显示大小限制错误
        size_error = self.page.ele('text:文件大小不能超过10MB', timeout=5)
        self.assertIsNotNone(size_error)

    def tearDown(self):
        """测试后清理"""
        self.page.quit()


if __name__ == '__main__':
    unittest.main()

五、高级技巧和问题解决

1. 处理动态加载的元素

def upload_to_dynamic_form():
    """处理动态加载的上传表单"""
    page = ChromiumPage()
    page.get('https://example.com/dynamic-form')

    # 等待表单动态加载
    page.wait.ele_displayed('#dynamicUploadForm', timeout=10)

    # 有时需要点击按钮显示上传区域
    show_upload_btn = page.ele('#showUploadArea')
    if show_upload_btn:
        show_upload_btn.click()

    # 等待上传区域出现
    upload_area = page.wait.ele_displayed('#uploadArea', timeout=5)

    # 上传文件
    file_input = upload_area.ele('tag:input@type=file')
    file_input.input('/path/to/file.pdf')

2. 处理iframe中的上传

def upload_in_iframe():
    """处理iframe中的文件上传"""
    page = ChromiumPage()
    page.get('https://example.com/page-with-iframe')

    # 切换到iframe
    iframe = page.get_frame('iframe#uploadFrame')

    # 在iframe中操作
    file_input = iframe.ele('tag:input@type=file')
    file_input.input('/path/to/file.jpg')

    # 切换回主页面
    page.switch_to.main_frame()

3. 设置上传超时和重试

def upload_with_retry(file_path, max_retries=3):
    """带重试机制的文件上传"""
    page = ChromiumPage()

    for attempt in range(max_retries):
        try:
            page.get('https://example.com/upload')
            file_input = page.ele('tag:input@type=file', timeout=10)
            file_input.input(file_path)

            # 设置上传超时
            page.wait.ele_displayed('.upload-complete', timeout=30)
            print("上传成功")
            return True

        except Exception as e:
            print(f"上传尝试 {attempt + 1} 失败: {e}")
            if attempt < max_retries - 1:
                print("重试中...")
                time.sleep(2)
            else:
                print("上传失败，已达最大重试次数")
                return False

    page.quit()

4. 验证文件上传结果

def verify_upload(file_path):
    """验证文件是否成功上传"""
    page = ChromiumPage()

    # 上传文件
    page.get('https://example.com/upload')
    file_input = page.ele('#fileInput')
    file_input.input(file_path)
    page.ele('#uploadBtn').click()

    # 多种验证方式
    verification_passed = False

    # 方法1: 检查成功消息
    if page.ele('text:上传成功', timeout=10):
        print("通过成功消息验证")
        verification_passed = True

    # 方法2: 检查文件列表
    file_list = page.ele('#fileList')
    if file_list:
        file_items = file_list.eles('tag:li')
        for item in file_items:
            if os.path.basename(file_path) in item.text:
                print("通过文件列表验证")
                verification_passed = True
                break

    # 方法3: 检查服务器响应
    # 如果有API返回，可以检查响应内容
    # response_data = page.wait.json(timeout=5)

    if verification_passed:
        print("文件上传验证成功")
        return True
    else:
        print("文件上传验证失败")
        return False

六、最佳实践和注意事项

1. 路径处理最佳实践

def handle_file_paths():
    """跨平台文件路径处理"""
    import os
    from pathlib import Path

    # 使用pathlib处理路径
    base_dir = Path.home() / 'Documents'  # 跨平台

    # 要上传的文件
    files = [
        base_dir / 'photos' / 'vacation.jpg',
        base_dir / 'work' / 'report.pdf',
        base_dir / 'data' / 'dataset.csv'
    ]

    # 检查文件是否存在
    for file_path in files:
        if not file_path.exists():
            print(f"文件不存在: {file_path}")
            continue

        # 获取绝对路径（字符串形式）
        abs_path = str(file_path.absolute())
        print(f"准备上传: {abs_path}")

        # 对于Windows，可能需要处理反斜杠
        if os.name == 'nt':  # Windows
            abs_path = abs_path.replace('\\', '\\\\')

2. 配置浏览器选项

def configure_browser():
    """配置浏览器选项以优化文件上传"""
    co = ChromiumOptions()

    # 禁用GPU加速（有时可以解决渲染问题）
    co.no_imgs(False)  # 不禁止图片加载
    co.mute(True)  # 静音

    # 设置下载路径（如果需要）
    download_path = r'C:\Downloads'
    co.set_pref('download.default_directory', download_path)

    # 创建页面时传入配置
    page = ChromiumPage(chromium_options=co)

    return page

3. 错误处理和日志记录

import logging
from datetime import datetime

def setup_logging():
    """配置日志记录"""
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(f'upload_log_{datetime.now().strftime("%Y%m%d")}.log'),
            logging.StreamHandler()
        ]
    )
    return logging.getLogger(__name__)

def safe_upload(file_path, logger):
    """安全的文件上传函数"""
    page = None
    try:
        logger.info(f"开始上传文件: {file_path}")

        page = ChromiumPage()
        page.get('https://example.com/upload')

        # 文件大小检查
        file_size = os.path.getsize(file_path)
        if file_size > 50 * 1024 * 1024:  # 50MB限制
            logger.error(f"文件过大: {file_size} bytes")
            return False

        # 执行上传
        file_input = page.ele('tag:input@type=file')
        file_input.input(file_path)

        # 等待上传完成
        page.wait.ele_displayed('.upload-success', timeout=60)

        logger.info("文件上传成功")
        return True

    except Exception as e:
        logger.error(f"上传失败: {str(e)}", exc_info=True)
        return False

    finally:
        if page:
            page.quit()

七、总结

使用DrissionPage进行文件上传的关键点：

识别上传元素：大多数情况下是<input type="file">元素 使用input()方法：直接向input元素输入文件路径 处理复杂场景：拖拽上传、iframe、动态加载等 添加等待机制：确保元素加载完成 错误处理和重试：提高脚本的健壮性 验证上传结果：确保文件真正上传成功

通过上述示例和方法，你可以处理大多数文件上传场景。根据具体的网站实现，可能需要调整选择器、等待时间和交互方式。