Python采集商品评论接口及JSON数据返回示例
下面我将提供一个完整的Python示例,用于采集商品评论数据,并展示典型的JSON返回格式。
示例代码:采集商品评论
import requests | |
import json | |
from typing import List, Dict | |
def fetch_product_reviews(product_id: str, page: int = 1, page_size: int = 10) -> Dict: | |
""" | |
获取商品评论数据 | |
参数: | |
product_id: 商品ID | |
page: 页码 | |
page_size: 每页评论数 | |
返回: | |
包含评论数据的字典 | |
""" | |
# 模拟API请求 - 实际使用时替换为真实API端点 | |
url = f"https://api.example.com/products/{product_id}/reviews" | |
params = { | |
'page': page, | |
'page_size': page_size, | |
# 可能需要添加其他参数如排序方式、时间范围等 | |
} | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', | |
'Accept': 'application/json', | |
# 如果需要认证,添加以下内容 | |
# 'Authorization': 'Bearer your_access_token' | |
} | |
try: | |
response = requests.get(url, headers=headers, params=params, timeout=10) | |
response.raise_for_status() # 检查请求是否成功 | |
# 返回JSON格式数据 | |
return response.json() | |
except requests.exceptions.RequestException as e: | |
print(f"请求失败: {e}") | |
return { | |
'success': False, | |
'error': str(e), | |
'data': None | |
} | |
# 示例使用 | |
if __name__ == "__main__": | |
product_id = "123456" # 替换为实际商品ID | |
reviews_data = fetch_product_reviews(product_id, page=1, page_size=5) | |
# 美化打印JSON结果 | |
print(json.dumps(reviews_data, indent=2, ensure_ascii=False)) |
典型JSON返回格式
以下是商品评论API可能返回的JSON数据结构示例:
{ | |
"success": true, | |
"code": 200, | |
"message": "请求成功", | |
"data": { | |
"product_id": "123456", | |
"product_name": "示例商品名称", | |
"total_reviews": 1256, | |
"average_rating": 4.5, | |
"rating_distribution": { | |
"5": 789, | |
"4": 321, | |
"3": 102, | |
"2": 32, | |
"1": 12 | |
}, | |
"reviews": [ | |
{ | |
"review_id": "r1001", | |
"user": { | |
"user_id": "u2001", | |
"nickname": "用户昵称", | |
"avatar": "https://example.com/avatar/u2001.jpg", | |
"level": "VIP3" | |
}, | |
"rating": 5, | |
"content": "非常满意的一次购物,商品质量很好,物流也很快!", | |
"images": [ | |
"https://example.com/reviews/1001/1.jpg", | |
"https://example.com/reviews/1001/2.jpg" | |
], | |
"created_at": "2023-05-15T14:30:22Z", | |
"likes": 24, | |
"replies": [ | |
{ | |
"reply_id": "rp3001", | |
"user": { | |
"user_id": "seller01", | |
"nickname": "商家回复", | |
"type": "seller" | |
}, | |
"content": "感谢您的支持,我们会继续努力提供优质服务!", | |
"created_at": "2023-05-15T16:45:10Z" | |
} | |
] | |
}, | |
{ | |
"review_id": "r1002", | |
"user": { | |
"user_id": "u2002", | |
"nickname": "另一个用户", | |
"avatar": "https://example.com/avatar/u2002.jpg" | |
}, | |
"rating": 4, | |
"content": "商品不错,但包装可以更好一些。", | |
"created_at": "2023-05-14T09:12:33Z", | |
"likes": 5, | |
"replies": [] | |
} | |
], | |
"page_info": { | |
"current_page": 1, | |
"page_size": 5, | |
"total_pages": 252, | |
"has_next": true | |
} | |
} | |
} |
关键字段说明
- 基础信息:
success
: 请求是否成功code
: 状态码message
: 状态信息
- 商品信息:
product_id
: 商品唯一标识product_name
: 商品名称total_reviews
: 总评论数average_rating
: 平均评分
- 评分分布:
rating_distribution
: 各星级评论数量统计
- 评论列表:
review_id
: 评论IDuser
: 评论用户信息rating
: 评分(1-5)content
: 评论内容images
: 评论附带的图片created_at
: 评论时间likes
: 点赞数replies
: 商家或其他用户的回复
- 分页信息:
current_page
: 当前页码page_size
: 每页条数total_pages
: 总页数has_next
: 是否有下一页
实际应用建议
- 错误处理: 添加更完善的错误处理机制
- 限流控制: 遵守API的速率限制
- 数据存储: 将采集的数据保存到数据库或文件
- 反爬策略: 如果遇到反爬,可能需要添加随机延迟、使用代理等
- 认证: 许多API需要API key或OAuth认证
扩展:保存到JSON文件
def save_reviews_to_file(data: Dict, filename: str): | |
"""将评论数据保存到JSON文件""" | |
try: | |
with open(filename, 'w', encoding='utf-8') as f: | |
json.dump(data, f, indent=2, ensure_ascii=False) | |
print(f"数据已成功保存到 {filename}") | |
except Exception as e: | |
print(f"保存文件失败: {e}") | |
# 使用示例 | |
reviews_data = fetch_product_reviews("123456") | |
save_reviews_to_file(reviews_data, "product_reviews.json") |
希望这个示例能帮助你开始采集商品评论数据!根据实际API的不同,你可能需要调整请求参数和数据处理逻辑。