用Python下载woocommerce导出的产品数据

从woocommerce导出了产品的数据，需要全部down到本地。

需求：基于产品名称在本地创建产品文件夹，然后在产品文件夹中把主图和详情中的图分别放在一个文件夹里，同时在产品文件夹中生成商品详情和多种信息的html

import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
import shutil
import pandas as pd

# 读取产品数据
file_path = r'D:\1_Products\aicaigoulist\productsList.xlsx'  # 替换为您的文件路径
products_data = pd.read_excel(file_path)

# 基本 URL
base_url = 'https://shop.xxxxx.com'

# 创建目录
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

# 下载图片
def download_image(image_url, save_path, headers):
    # 如果 URL 是相对路径，则添加基本 URL
    if not image_url.startswith('http://') and not image_url.startswith('https://'):
        image_url = urljoin(base_url, image_url)

    response = requests.get(image_url, stream=True, headers=headers)
    if response.status_code == 200:
        with open(save_path, 'wb') as file:
            shutil.copyfileobj(response.raw, file)

# 设置请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

# 基本目录
base_dir = 'products/'
create_dir(base_dir)

# 处理每个产品
for index, row in products_data.iterrows():
    product_dir = os.path.join(base_dir, row['名称'])
    html_file_path = os.path.join(product_dir, 'product_info.html')

    # 检查产品是否已被处理
    if os.path.exists(html_file_path):
        continue  # 如果已存在，则跳过此产品

    # 为每个产品创建目录
    create_dir(product_dir)

    # 主图和详情图的目录
    main_images_dir = os.path.join(product_dir, 'main_images')
    details_images_dir = os.path.join(product_dir, 'details_images')
    create_dir(main_images_dir)
    create_dir(details_images_dir)

    # 处理主图，可能有多张
    if pd.notna(row['图片']):
        main_images = row['图片'].split(',')  # 假设主图URL以逗号分隔
        for img_url in main_images:
            if img_url.strip():  # 确保URL非空
                main_image_name = os.path.basename(urlparse(img_url.strip()).path)
                main_image_path = os.path.join(main_images_dir, main_image_name)
                download_image(img_url.strip(), main_image_path, headers)

    # 从描述中解析并下载详情图片
    description = row['描述']
    if pd.notna(description):
        soup = BeautifulSoup(description, 'html.parser')
        images = soup.find_all('img')
        for img in images:
            img_url = img['src']
            # 如果图片 URL 是相对路径，则添加基本 URL
            if not img_url.startswith('http://') and not img_url.startswith('https://'):
                img_url = urljoin(base_url, img_url)
            img_name = os.path.basename(urlparse(img_url).path)
            img_path = os.path.join(details_images_dir, img_name)
            download_image(img_url, img_path, headers)
            # 更新 src 属性为本地路径
            img['src'] = os.path.join('details_images', img_name)

    # 创建 HTML 内容
    html_content = f"""
    <html>
    <head>
        <title>{row['名称']}</title>
        <style>
            body {{
                text-align: center;
            }}
            .container {{
                width: 80%;
                margin: auto;
                text-align: left;
            }}
            .container img {{
                display: block;
                margin: auto;
            }}
        </style>
    </head>
    <body>
        <div class="container">
            <h1>{row['名称']}</h1>
            <p><strong>常规售价:</strong> {row['常规售价']}</p>
            <p><strong>分类:</strong> {row['分类']}</p>
            <p><strong>标签:</strong> {row['标签']}</p>
    """

    # 添加属性
    for i in range(1, 9):
        attr_name = row.get(f'属性 {i} 名称')
        attr_value = row.get(f'属性 {i} 值')
        if pd.notna(attr_name) and pd.notna(attr_value):
            html_content += f"<p><strong>{attr_name}:</strong> {attr_value}</p>"

    # 添加简短描述和产品详情
    html_content += f"""
            <p><strong>简短描述:</strong> {row['简短描述']}</p>
            <div><strong>产品详情:</strong> {soup.prettify() if pd.notna(description) else ''}</div>
        </div>
    </body>
    </html>
    """

    # 保存 HTML 文件
    with open(html_file_path, 'w', encoding='utf-8') as file:
        file.write(html_content)

本文作者：𝙕𝙆𝘾𝙊𝙄

文章名称：用Python下载woocommerce导出的产品数据

文章链接：https://www.zkcoi.com/365up/program/3065.html

本站资源仅供个人学习交流，请于下载后24小时内删除，不允许用于商业用途，否则法律问题自行承担。