最大的题目即是处置登录和考证码,登录之后get货色就大略了

作家:H我步行带风 原文地方:https://blog.csdn.net/a2272062968/article/details/115376592

示例源码:

from selenium import webdriver

from selenium.webdriver.common.action_chains import ActionChains

import requests

import time

import numpy

import cv2

import os

'''

倡导网速好一点实行此步调,要不大概会get不到资源引导步调结尾大概get到的标题和谜底为空

question_list_url 标题列表链接(只能是编制程序题的链接,其余题型同理确定一下即可我这边没写)

file_name 要生存的文献名,我运用的markdown方法

access_interval 按照网速自设置树立,考察每到题的功夫间隙s,考察太快总会被提醒,然而我写了展示提醒连接考察的论理;

'''

def Programming_questions(question_list_url,file_name,access_interval):

#创造 WebDriver 东西,指明运用chrome欣赏器启动

web = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe')

web.implicitly_wait(5)

#挪用WebDriver 东西的get本领 不妨让欣赏器翻开指定网址

web.get('https://pintia.cn/auth/login')

zh = web.find_element_by_xpath('/html/body/div[1]/div[3]/div/div[2]/form/div[1]/div[1]/div/div/div[1]/input')

mm = web.find_element_by_xpath('/html/body/div[1]/div[3]/div/div[2]/form/div[1]/div[2]/div/div/div[1]/input')

#在PTA的账号暗号:

zh.send_keys('xxxx@qq.com')

mm.send_keys('xxxx')

#找到登录按钮并点击

web.find_element_by_xpath('/html/body/div[1]/div[3]/div/div[2]/form/div[2]/button/div/div').click()

for i in range(5):

#等候一会,功夫间隙可按照网速安排,考证码加载实行

time.sleep(3)

print('暂时url:'+web.current_url)

#即使暂时url没变证明考证未经过,轮回5(可窜改)次从新考证

if(web.current_url!='https://pintia.cn/auth/login'):

break

#bg后台图片

bg_img_src = web.find_element_by_xpath(

'/html/body/div[3]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/img[1]').get_attribute('src')

#front可拖动图片

front_img_src = web.find_element_by_xpath(

'/html/body/div[3]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/img[2]').get_attribute('src')

#生存图片

with open("bg.jpg", mode="wb") as f:

f.write(requests.get(bg_img_src).content)

with open("front.jpg", mode="wb") as f:

f.write(requests.get(front_img_src).content)

#将图片加载至外存

bg = cv2.imread("bg.jpg")

front = cv2.imread("front.jpg")

#将后台图片变化为灰度图片,将三原色降维

bg = cv2.cvtColor(bg, cv2.COLOR_BGR2GRAY)

#将可滑行图片变化为灰度图片,将三原色降维

front = cv2.cvtColor(front, cv2.COLOR_BGR2GRAY)

front = front[front.any(1)]

#用cv算法配合精度最高的xy值

result = cv2.matchTemplate(bg, front, cv2.TM_CCOEFF_NORMED)

#numpy领会xy,提防xy与本质为差异,x=y,y=x

x, y = numpy.unravel_index(numpy.argmax(result), result.shape)

#找到可拖动地区

div = web.find_element_by_xpath('/html/body/div[3]/div[2]/div/div/div[2]/div/div[2]/div[2]')

#拖动滑块,以本质差异的y值包办x

ActionChains(web).drag_and_drop_by_offset(div, xoffset=y // 0.946, yoffset=0).perform()

#至此胜利破译考证码,因为算法题目,精确率不许到达100%,以是加了轮回确定

#要get的标题集列表

web.get(question_list_url)

#获得以是标题行

trp_problems = web.find_elements_by_xpath('/html/body/div/div[3]/div[3]/div/div[3]/table//tbody/tr')

#寄存一切题手段链接

problems_href=[]

for tr in trp_problems:

problems_href.append(tr.find_element_by_xpath('td[3]/a').get_attribute('href'))

#count用来简单尝试,即使中央步调断掉也不妨经过窜改count的值和前提确定从上回的场合连接实行

count = 0

filePro = open(file_name,'a')

for problem in problems_href:

if count>=0:

#考察太快会被弹出提醒页面,以是加个轮回从来考察(轮回度数自设置)

for i in range(5):

try:

web.get(problem)

time.sleep(access_interval) # 按照网速树立功夫间隙,考察太快也会被提醒

# 获得标题和谜底

tm_title = web.find_element_by_css_selector("[class='text-center black-3 text-4 font-weight-bold my-3']").text

mycode = web.find_element_by_css_selector('textarea').get_attribute('value')

print('标题:' + tm_title)

print(mycode)

#写入方法是markdown文书档案:标题和代码

filePro.write('**' + tm_title + '**\n' + '```\n' + mycode + '\n```' + os.linesep)

break #十足平常实行完证明没有弹出提醒页面,退出即可

except:

continue

count += 1

print('--------------------------------实行数目'+str(count)+'---------------------------------------')

filePro.close()

if __name__ == '__main__':

Programming_questions('https://pintia.cn/problem-sets/1371739727887736832/problems/type/7','test2.md',2.5)