文章目录
- 前言
- 一、首先分析网页
- 二、编写代码
- 总结
前言
这次主要是获取一个快手小店后台的评价数据和小额打款。采用的是selenium,因为据说这个网站的反爬比较麻烦。使用selenium的话直接可以无视大部分网站的反爬。因为这篇的主题是selenium的使用,所以我会着重分析selenium需要定位的元素。至于获取到网页源码后的数据提取我会略过,可以自己根据代码分析。先来看看效果吧。
编写代码
1.分析网页
首先肯定是登录,找到我是店员的标签。 然后是找到手机号、密码和登录按钮的标签,这个网页并没有验证码。 登录以后找到评价管理。 然后是搜索的参数评价时间,分别是开始时间和结束时间。结束时间的标签就在开始时间标签的下方,我就不单独截图了。这里要注意的是,这俩个的参数不是在这里输入的,需要在点击其中一个后弹出来的界面输入。 这里要注意的是你必须先模拟点击俩个日期,然后再在上面的输入框内输入。不然它会将你输入的重置。还有输入框必须先模拟输入ctrl+a全选再输入,不然上面淡淡的开始日期这些字还会在。这俩点必须要注意!!! 这些输入好后我们就可以点击确定,然后点击筛选。 接下来就是获取每一页的评价了。因为有很多页,所以需要模拟点击下一页。当到最后一页时,这个标签会多出来一个disabled的属性。当然每次筛选的也就只会出现100页,你也可以使用for循环,但使用这种方法的话会有一个缺点,就是当你搜索的时间比较短的话,评价数量可能会不够100页。所以并不推荐。 评价管理完毕后就到了小额打款。 首先点击小额打款,然后点击打款记录。 但这里要注意的是点击小额打款后出现的页面里并没有打款记录。需要点击立即使用后才会出现打款记录,而且需要先最大化浏览器然后点击确认,不然是无法点击到立即使用的。 进入到打款记录后,这里要输入的搜索参数有俩个,分别是申请时间和成功时间。 这里输入的方法和评价管理的输入参数输入方法是一样的,我就不重复了。
2.编写代码
这次我写的注释比较详细,我就不一步步介绍每一段代码的作用了。直接上总的代码。
#coding:utf-8
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from lxml import etree
from queue import Queue
from openpyxl import Workbook
import time
import threading
import re
class DDThread(threading.Thread):
#评价解析多线程类
def __init__(self, queue):
threading.Thread.__init__(self)
self.dd_queue = queue
self.dd_data = []
def run(self):
while not self.dd_queue.empty(): #当队列不为空
html = self.dd_queue.get()
data = jx_dd(html)
if data != False:
self.dd_data.append(data)
class DKThread(threading.Thread):
# 打款解析多线程类
def __init__(self, queue):
threading.Thread.__init__(self)
self.dk_queue = queue
self.dk_data = []
def run(self):
while not self.dk_queue.empty(): #当队列不为空
html = self.dk_queue.get()
data = jx_dk(html)
if data != False:
self.dk_data.append(data)
def jx_dd(html):
try:
#解析评价数据
xp = etree.HTML(html)
dd_date_one = []
# comlist = xp.xpath('//div[@class="comment-list"]/div')
#当前页码
page = xp.xpath('//*[@class="number active"]/text()')[0]
for i in range(1,11):
# com = comlist[i]
# text = xp.xpath(f'//div[@class="comment-list"]/div[{i}]//text()')
# print(text)
#订单号
ddh = xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="oid-value"]/text()')[0]
#买家昵称
mjnc = xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="nickname"]/text()')[0]
#买家ID
mjid = xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="userid"]/text()')[0]
#商品
sp = xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="good-name"]/text()')[0]
#数量
sl = xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="sku-desc"]/text()')[0]
#价格
jg = xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="sku-price"]/text()')[0]
#评论时间
plsj = xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="main-comment"]/div[@class="date"]/text()')[0]
#评论
pl = xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="main-comment"]/div[@class="content"]/text()')[0]
#追评时间
zpsj = xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="attach-comment"]/div[@class="date"]/text()')
if len(zpsj) != 0:
zpsj = zpsj[0]
else:
zpsj = '无'
#追评
zp = xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="attach-comment"]/div[@class="content"]/text()')
if len(zp) != 0:
zp = zp[0]
else:
zp = '无'
#评分
#商品质量
spzl = len(xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="td star-rating"]/div/div[1]/div[@class="star"]'))
#用*表示
spzl = '*'*spzl
#服务态度
fwtd = len(xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="td star-rating"]/div/div[2]/div[@class="star"]'))
# 用*表示
fwtd = '*' * fwtd
#物流服务
wlfw = len(xp.xpath(f'//div[@class="comment-list"]/div[{i}]//*[@class="td star-rating"]/div/div[3]/div[@class="star"]'))
# 用*表示
wlfw = '*' * wlfw
#pf = ''.join([str(spzl),str(fwtd),str(wlfw)])
# print({'订单号': ddh,'买家昵称': mjnc,'买家ID': mjid,'商品': sp,'数量': sl,'价格': jg,'评论时间': plsj,'评论': pl,'追评时间': zpsj,'追评': zp,'评分': pf})
dd_date_one.append({'订单号': ddh,'买家昵称': mjnc,'买家ID': mjid,'商品': sp,'数量': sl,'价格': jg,'评论时间': plsj,'评论': pl,'追评时间': zpsj,'追评': zp,'商品质量': str(spzl),'服务态度':str(fwtd),'物流服务':str(wlfw)})
print('订单第', page, '页已解析完成')
return {page: dd_date_one}
except BaseException:
print('无数据')
return False
def jx_dk(html):
try:
#解析打款数据
xp = etree.HTML(html)
dk_date_one = []
#提取总数目
nu = xp.xpath('//*[@class="ant-pagination-total-text"]/text()')[0]
num = re.findall('\d+', nu, re.S)[0]
#提取总页数
total_page = xp.xpath('//ul[@unselectable="unselectable"]/li[last()-1]/a/text()')[0]
#最后一页记录的数目
last_num = int(num)-20*(int(total_page)-1)
#当前页数
page = xp.xpath('//*[contains(@class, "ant-pagination-item-active")]/a/text()')[0]
#当数据不为最后一页时
if page != total_page:
for i in range(1, 21):
#订单编号
ddbh = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[1]/a/text()')[0]
#商品ID
spid = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[2]/a/text()')[0]
#买家ID
mjid = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[3]/text()')[0]
#打款类型
dklx = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[4]/text()')[0]
#打款金额
dkje = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[5]/span/text()')[0]
#留言
ly = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[6]/text()')
if len(ly) == 0:
ly = '无'
else:
ly = ly[0]
#申请时间
sqsj = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[7]/text()')[0]
#成功时间
cgsj = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[8]/text()')[0]
#打款状态
dkzt = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[9]/text()')[0]
#操作人
czr = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[10]/text()')[0]
#审批人
spr = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[11]/text()')[0]
# print({'订单编号': ddbh, '商品ID': spid, '买家ID': mjid, '打款类型': dklx, '打款金额': dkje, '留言': ly, '申请时间': sqsj, '成功时间': cgsj, '打款状态': dkzt, '操作人': czr, '审批人': spr})
dk_date_one.append({'订单编号': ddbh, '商品ID': spid, '买家ID': mjid, '打款类型': dklx, '打款金额': dkje, '留言': ly, '申请时间': sqsj, '成功时间': cgsj, '打款状态': dkzt, '操作人': czr, '审批人': spr})
else:
for i in range(1, last_num+1):
#订单编号
ddbh = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[1]/a/text()')[0]
#商品ID
spid = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[2]/a/text()')[0]
#买家ID
mjid = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[3]/text()')[0]
#打款类型
dklx = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[4]/text()')[0]
#打款金额
dkje = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[5]/span/text()')[0]
#留言
ly = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[6]/text()')
if len(ly) == 0:
ly = '无'
else:
ly = ly[0]
#申请时间
sqsj = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[7]/text()')[0]
#成功时间
cgsj = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[8]/text()')[0]
#打款状态
dkzt = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[9]/text()')[0]
#操作人
czr = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[10]/text()')[0]
#审批人
spr = xp.xpath(f'//div[@class="ant-table ant-table-default ant-table-bordered ant-table-scroll-position-left"]//tbody[@class="ant-table-tbody"]/tr[{i}]/td[11]/text()')[0]
# print({'订单编号': ddbh, '商品ID': spid, '买家ID': mjid, '打款类型': dklx, '打款金额': dkje, '留言': ly, '申请时间': sqsj, '成功时间': cgsj, '打款状态': dkzt, '操作人': czr, '审批人': spr})
dk_date_one.append({'订单编号': ddbh, '商品ID': spid, '买家ID': mjid, '打款类型': dklx, '打款金额': dkje, '留言': ly, '申请时间': sqsj, '成功时间': cgsj, '打款状态': dkzt, '操作人': czr, '审批人': spr})
print('小额打款第', page, '页已解析完成')
return {page: dk_date_one}
except BaseException:
print('小额打款无数据')
return False
def get_html(url):
#打开谷歌浏览器
wd = webdriver.Chrome()
#发起请求
wd.get(url)
time.sleep(3)
#点击我是店员
i = wd.find_element_by_xpath('//*[@id="root"]//div[@class="choose-account-type"]/div[2]')
i.click()
time.sleep(1)
# 点击账号登录
input_account = wd.find_element_by_xpath('//*[@placeholder="请输入手机号"]')
input_account.send_keys('你的账号')
time.sleep(1)
# 输入密码
input_password = wd.find_element_by_xpath('//*[@placeholder="请输入密码"]')
input_password.send_keys('你的密码')
time.sleep(1)
# 点击登录按钮
login_button = wd.find_element_by_xpath('//*[@class="ant-btn"]')
login_button.click()
time.sleep(1)
#点击订单管理
wd.get('https://s.kwaixiaodian.com/order/comments')
time.sleep(2)
#时间筛选
#点击时间
start_time_button = wd.find_element_by_xpath('//*[@class="el-date-editor el-range-editor el-input__inner el-date-editor--datetimerange"]//*[@placeholder="开始时间"]')
start_time_button.click()
time.sleep(2)
#输入开始日期
start_date = wd.find_element_by_xpath('//*[@class="el-date-range-picker__editor el-input el-input--small"]/*[@placeholder="开始日期"]')
start_date.send_keys(Keys.CONTROL, 'a')
start_date.send_keys('2021-03-11')
#输入开始时间
start_time = wd.find_element_by_xpath('//*[@class="el-date-range-picker__editor el-input el-input--small"]/*[@placeholder="开始时间"]')
start_time.send_keys(Keys.CONTROL, 'a')
start_time.send_keys('00:00:00')
#输入结束日期
end_date = wd.find_element_by_xpath('//*[@class="el-date-range-picker__editor el-input el-input--small"]/*[@placeholder="结束日期"]')
end_date.send_keys(Keys.CONTROL, 'a')
end_date.send_keys('2021-04-11')
#输入结束时间
end_time = wd.find_element_by_xpath('//*[@class="el-date-range-picker__editor el-input el-input--small"]/*[@placeholder="结束时间"]')
end_time.send_keys(Keys.CONTROL, 'a')
end_time.send_keys('23:59:59')
#点击确定
queding_button = wd.find_element_by_xpath('//*[@class="el-button el-picker-panel__link-btn el-button--default el-button--mini is-plain"]')
queding_button.click()
#点击筛选
shaixuan_button = wd.find_element_by_xpath('//*[@type="submit"]')
shaixuan_button.click()
time.sleep(2)
#将网页源代码送入队列
dd_data_que = Queue()
dd_data_que.put(wd.page_source)
#数字为总页数-1
for i in range(99):
#使用小数目来进行测试
# for i in range(5):
dd_next_page = wd.find_element_by_xpath('//button[@class ="btn-next"]')
dd_next_page.click()
time.sleep(2)
dd_data_que.put(wd.page_source)
#访问小额打款
wd.get('https://s.kwaixiaodian.com/refund/small-transfer')
time.sleep(2)
#最大化页面
wd.maximize_window()
#点击确认
ljsy = wd.find_element_by_xpath('//button[@class="ant-btn ant-btn-primary ant-btn-lg"]')
ljsy.click()
#点击打款记录
dakuanjilu_button = wd.find_element_by_xpath('//*[@class="ant-tabs-nav ant-tabs-nav-animated"]/div/div[2]')
dakuanjilu_button.click()
time.sleep(2)
#时间筛选
#点击申请时间
sq_button = wd.find_element_by_xpath('//*[@class="ant-form-item-control has-success"]//*[@placeholder="开始日期"]')
sq_button.click()
time.sleep(2)
#输入申请头时间
sq_start_time = wd.find_element_by_xpath('//*[@class="ant-calendar-date-input-wrap"]/*[@placeholder="开始日期"]')
sq_start_time.send_keys(Keys.CONTROL, 'a')
sq_start_time.send_keys('2020-12-02 00:00:00')
time.sleep(2)
#输入申请尾时间
sq_end_time = wd.find_element_by_xpath('//*[@class="ant-calendar-date-input-wrap"]/*[@placeholder="结束日期"]')
sq_end_time.send_keys(Keys.CONTROL, 'a')
sq_end_time.send_keys('2021-03-25 23:59:59')
time.sleep(2)
#点击确认
qr_button1 = wd.find_element_by_xpath('//*[@class="ant-calendar-ok-btn"]')
qr_button1.click()
time.sleep(2)
#点击成功时间
cg_button = wd.find_element_by_xpath('//*[@id="RecordForm_reachTime"]//*[@placeholder="开始日期"]')
cg_button.click()
time.sleep(2)
#点击俩个日期
rq1 = wd.find_element_by_xpath('//*[@class="ant-calendar-tbody"]/tr[2]/td[1]')
rq1.click()
time.sleep(2)
rq2 = wd.find_element_by_xpath('//*[@class="ant-calendar-tbody"]/tr[2]/td[2]')
rq2.click()
#输入成功头时间
cg_start_time = wd.find_element_by_xpath('//*[@class="ant-calendar-input-wrap"]//*[@placeholder="开始日期"]')
cg_start_time.send_keys(Keys.CONTROL, 'a')
cg_start_time.send_keys('2021-08-10 16:25:33')
time.sleep(2)
#输入成功尾时间
cg_end_time = wd.find_element_by_xpath('//*[@class="ant-calendar-input-wrap"]//*[@placeholder="结束日期"]')
cg_end_time.send_keys(Keys.CONTROL, 'a')
cg_end_time.send_keys('2021-08-27 16:25:33')
time.sleep(2)
#点击确认
qr_button2 = wd.find_element_by_xpath('//*[@class="ant-calendar-ok-btn"]')
qr_button2.click()
time.sleep(2)
#点击筛选
sx_button = wd.find_element_by_xpath('//*[@style="text-align: right; padding: 0px 12px;"]//*[@class="ant-btn ant-btn-primary"]')
sx_button.click()
time.sleep(2)
# while True:
# pass
#将网页源代码送入队列
dk_data_que = Queue()
dk_data_que.put(wd.page_source)
# for i in range(47):
# 使用小数目来进行测试
# for i in range(5):
try:
while True:
dk_next_page = wd.find_element_by_xpath('//*[@title="下一页" and @tabindex="0"]')
dk_next_page.click()
dk_data_que.put(wd.page_source)
time.sleep(2)
except BaseException:
print('结束翻页,关闭浏览器')
#关闭浏览器
wd.quit()
#返回存放网页源代码的队列
data = (dd_data_que, dk_data_que)
# data = dk_data_que
return data
def write_dd(datas):
wb = Workbook()
ws = wb.create_sheet('评价管理', 0)
ws['A1'] = '订单号'
ws['B1'] = '买家昵称'
ws['C1'] = '买家ID'
ws['D1'] = '商品'
ws['E1'] = '数量'
ws['F1'] = '价格'
ws['G1'] = '评论时间'
ws['H1'] = '评论'
ws['I1'] = '追评时间'
ws['J1'] = '追评'
ws['K1'] = '商品质量'
ws['L1'] = '服务态度'
ws['M1'] = '物流服务'
ws['N1'] = '页数'
hang = 2 #当前写入到的行数
for data_one_page in datas:
page = list(data_one_page.keys())[0]
for data_one in list(data_one_page.values())[0]:
ws[f'A{hang}'] = data_one['订单号']
ws[f'B{hang}'] = data_one['买家昵称']
ws[f'C{hang}'] = data_one['买家ID']
ws[f'D{hang}'] = data_one['商品']
ws[f'E{hang}'] = data_one['数量']
ws[f'F{hang}'] = data_one['价格']
ws[f'G{hang}'] = data_one['评论时间']
ws[f'H{hang}'] = data_one['评论']
ws[f'I{hang}'] = data_one['追评时间']
ws[f'J{hang}'] = data_one['追评']
# ws[f'K{hang}'] = data_one['评分']
ws[f'K{hang}'] = data_one['商品质量']
ws[f'L{hang}'] = data_one['服务态度']
ws[f'M{hang}'] = data_one['物流服务']
ws[f'N{hang}'] = page
hang += 1
wb.save('评价管理.xlsx')
def write_dk(datas):
wb = Workbook()
ws = wb.create_sheet('小额打款', 0)
ws['A1'] = '订单编号'
ws['B1'] = '商品ID'
ws['C1'] = '买家ID'
ws['D1'] = '打款类型'
ws['E1'] = '打款金额'
ws['F1'] = '留言'
ws['G1'] = '申请时间'
ws['H1'] = '成功时间'
ws['I1'] = '打款状态'
ws['J1'] = '操作人'
ws['K1'] = '审批人'
ws['L1'] = '页数'
hang = 2 # 当前写入到的行数
for data_one_page in datas:
page = list(data_one_page.keys())[0]
for data_one in list(data_one_page.values())[0]:
ws[f'A{hang}'] = data_one['订单编号']
ws[f'B{hang}'] = data_one['商品ID']
ws[f'C{hang}'] = data_one['买家ID']
ws[f'D{hang}'] = data_one['打款类型']
ws[f'E{hang}'] = data_one['打款金额']
ws[f'F{hang}'] = data_one['留言']
ws[f'G{hang}'] = data_one['申请时间']
ws[f'H{hang}'] = data_one['成功时间']
ws[f'I{hang}'] = data_one['打款状态']
ws[f'J{hang}'] = data_one['操作人']
ws[f'K{hang}'] = data_one['审批人']
ws[f'L{hang}'] = page
hang += 1
wb.save('小额打款.xlsx')
if __name__ == '__main__':
url = 'https://s.kwaixiaodian.com/refund/small-transfer'
dk_dd_data_que = get_html(url)
#使用多线程进行解析数据
th1 = DDThread(dk_dd_data_que[0])
th2 = DKThread(dk_dd_data_que[1])
# th2 = DKThread(dk_dd_data_que)
th1.start()
th2.start()
th1.join()
th2.join()
dd_data = th1.dd_data #存放评价数据
dk_data = th2.dk_data #存放打款数据
if len(dd_data) != 0:
write_dd(dd_data)
if len(dk_data) !=0:
write_dk(dk_data)
总结
写了这么多篇selenium的使用,相信大家看到这对于selenium应该使用的比较得心应手了。selenium的使用难度很低,唯一的难度就是元素的定位,这点只要将bs4或者xpath使用的比较好就没有啥问题了。