从woocommerce导出了产品的数据,需要全部down到本地。
需求:基于产品名称在本地创建产品文件夹,然后在产品文件夹中把主图和详情中的图分别放在一个文件夹里,同时在产品文件夹中生成商品详情和多种信息的html
import os import requests from bs4 import BeautifulSoup from urllib.parse import urlparse, urljoin import shutil import pandas as pd # 读取产品数据 file_path = r'D:\1_Products\aicaigoulist\productsList.xlsx' # 替换为您的文件路径 products_data = pd.read_excel(file_path) # 基本 URL base_url = 'https://shop.xxxxx.com' # 创建目录 def create_dir(path): if not os.path.exists(path): os.makedirs(path) # 下载图片 def download_image(image_url, save_path, headers): # 如果 URL 是相对路径,则添加基本 URL if not image_url.startswith('http://') and not image_url.startswith('https://'): image_url = urljoin(base_url, image_url) response = requests.get(image_url, stream=True, headers=headers) if response.status_code == 200: with open(save_path, 'wb') as file: shutil.copyfileobj(response.raw, file) # 设置请求头 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } # 基本目录 base_dir = 'products/' create_dir(base_dir) # 处理每个产品 for index, row in products_data.iterrows(): product_dir = os.path.join(base_dir, row['名称']) html_file_path = os.path.join(product_dir, 'product_info.html') # 检查产品是否已被处理 if os.path.exists(html_file_path): continue # 如果已存在,则跳过此产品 # 为每个产品创建目录 create_dir(product_dir) # 主图和详情图的目录 main_images_dir = os.path.join(product_dir, 'main_images') details_images_dir = os.path.join(product_dir, 'details_images') create_dir(main_images_dir) create_dir(details_images_dir) # 处理主图,可能有多张 if pd.notna(row['图片']): main_images = row['图片'].split(',') # 假设主图URL以逗号分隔 for img_url in main_images: if img_url.strip(): # 确保URL非空 main_image_name = os.path.basename(urlparse(img_url.strip()).path) main_image_path = os.path.join(main_images_dir, main_image_name) download_image(img_url.strip(), main_image_path, headers) # 从描述中解析并下载详情图片 description = row['描述'] if pd.notna(description): soup = BeautifulSoup(description, 'html.parser') images = soup.find_all('img') for img in images: img_url = img['src'] # 如果图片 URL 是相对路径,则添加基本 URL if not img_url.startswith('http://') and not img_url.startswith('https://'): img_url = urljoin(base_url, img_url) img_name = os.path.basename(urlparse(img_url).path) img_path = os.path.join(details_images_dir, img_name) download_image(img_url, img_path, headers) # 更新 src 属性为本地路径 img['src'] = os.path.join('details_images', img_name) # 创建 HTML 内容 html_content = f""" <html> <head> <title>{row['名称']}</title> <style> body {{ text-align: center; }} .container {{ width: 80%; margin: auto; text-align: left; }} .container img {{ display: block; margin: auto; }} </style> </head> <body> <div class="container"> <h1>{row['名称']}</h1> <p><strong>常规售价:</strong> {row['常规售价']}</p> <p><strong>分类:</strong> {row['分类']}</p> <p><strong>标签:</strong> {row['标签']}</p> """ # 添加属性 for i in range(1, 9): attr_name = row.get(f'属性 {i} 名称') attr_value = row.get(f'属性 {i} 值') if pd.notna(attr_name) and pd.notna(attr_value): html_content += f"<p><strong>{attr_name}:</strong> {attr_value}</p>" # 添加简短描述和产品详情 html_content += f""" <p><strong>简短描述:</strong> {row['简短描述']}</p> <div><strong>产品详情:</strong> {soup.prettify() if pd.notna(description) else ''}</div> </div> </body> </html> """ # 保存 HTML 文件 with open(html_file_path, 'w', encoding='utf-8') as file: file.write(html_content)
本文作者:𝙕𝙆𝘾𝙊𝙄
文章名称:用Python下载woocommerce导出的产品数据
文章链接:https://www.zkcoi.com/365up/program/3065.html
本站资源仅供个人学习交流,请于下载后24小时内删除,不允许用于商业用途,否则法律问题自行承担。