当前位置:网站首页>Bili Bili video crawling source code sharing

Bili Bili video crawling source code sharing

2021-02-23 17:48:14 A young man of mystery

background :

   Unintentional discovery B There is a teacher in the station who has a very good course (python Video of teaching ), The unit's network limits access to video sites , So try to download and save the video , After a period of research, finally completed the development of the code , If you need something, you can do it , I'll do an extended optimization in the future , Strive to do through the front-end page of the video name input as crawling conditions to download .

 

The first edition ;

#_author_='Lucky';
#date: 2021/2/18
import win32gui
import win32con
import win32api
import sys,os
import pynput,time
from time import sleep
from pywinauto import application
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains

def man_dowload(url):
try:
bili_browser.get(api_url)
sleep(25)
bili_browser.find_element_by_xpath("//*[@placeholder=' Enter the address ']").clear()
sleep(5)
bili_browser.find_element_by_xpath("//*[@placeholder=' Enter the address ']").send_keys(url)
sleep(3)
bili_browser.find_element_by_id('button-1').click()
element2 = bili_browser.find_element_by_xpath("//a[contains(text(),'MP4 Address ')]")
sleep(2)
ActionChains(bili_browser).key_down(u'\ue00a').click(element2).perform()
#file_name_save(file_name)
ActionChains(bili_browser).key_up(u'\ue00a')
#bili_browser.find_element_by_xpath("//a[contains(text(),'MP4 Address ')]").send_keys(u'\ue00a')
save_as_window()
except Exception as e:
print(e)
# Below chrome_options_setting You can just quote it directly , Don't move
def chrome_options_setting(web_driver):
"""
Set up Chrome Browser Ask where to save each file before downloading. The option is open (true)
:param web_driver: Browser driven
:return: None
"""
web_driver.get("chrome://settings/downloads")
time.sleep(2)
web_element = web_driver.find_element_by_xpath("//settings-ui")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
# shadowRoot Cannot be used under node xpath Selectors
web_element = shadowRoot.find_element_by_id("container").find_element_by_id("main")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
web_element = shadowRoot.find_element_by_css_selector("settings-basic-page[role='main']")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
web_element = shadowRoot.find_element_by_css_selector("settings-downloads-page")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
web_element = shadowRoot.find_element_by_css_selector("settings-toggle-button")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
result = shadowRoot.find_element_by_css_selector(
"#outerRow > cr-toggle[aria-describedby='sub-label-text']").get_attribute("aria-pressed")
if result == "false":
shadowRoot.find_element_by_css_selector("#outerRow > cr-toggle[aria-describedby='sub-label-text']").click()
# The following method is generated from the loop xpath Element to find the name of the video , And return the file name to save_as_windows Use
def file_name_save():
try:
bili_browser.get(url)
sleep(15)
file_name=bili_browser.find_element_by_xpath(file_name_xpath).text
print(file_name)
return (file_name)
except Exception as e:
print(e)
# The following is to complete the dialog box file name input and save the action
def save_as_window():
end_file_name=file_name_save()
app = application.Application().connect(title_re=u" Save as ", class_name="#32770")
save_as_spec = app.window(title=u" Save as ", class_name="#32770")
#print(save_as_spec.print_control_identifiers())

edit = save_as_spec["Edit"]
edit.set_text(end_file_name) # The first way is to set up edit Of text, hold file_name_save() The return value of is assigned to the file name text box as the file name ;
    #edit.type_keys(file_name_save(), with_spaces=True)  #  The second is to simulate keyboard input inside ( If there are no spaces in the string , You can omit the following parameters ), All roads lead to Rome 

app[' Save as '][' preservation (&S)'].click()

"""
|
| ComboBox - 'notes.txt' (L536, T675, R1188, B700)
| ['ComboBox', ' Save as ComboBox', 'ComboBox0', 'ComboBox1', ' Save as ComboBox0', ' Save as ComboBox1']
| child_window(title="notes.txt", class_name="ComboBox")
| |
| | Edit - 'notes.txt' (L539, T678, R1168, B697)
| | [' Save as Edit', 'Edit', ' Save as Edit0', ' Save as Edit1', 'Edit0', 'Edit1']
| | child_window(title="notes.txt", class_name="Edit")
|
| Edit - 'notes.txt' (L539, T678, R1168, B697)
| [' Save as Edit', 'Edit', ' Save as Edit0', ' Save as Edit1', 'Edit0', 'Edit1']
| child_window(title="notes.txt", class_name="Edit")
"""


if __name__ == "__main__":
bili_browser = webdriver.Chrome()
api_url = 'https://xbeibeix.com/api/bilibili'
bili_browser.maximize_window()
bili_browser.get(api_url)
# Set up Chrome Ask the browser where to save each file before downloading
chrome_options_setting(web_driver=bili_browser)
time.sleep(3)
for i in range(442, 634):
url = 'https://www.bilibili.com/video/BV197411G75w?p=' + str(i)
file_name_xpath = '//*[@id="multi_page"]/div[2]/ul/li[%d]/a/div/div[1]/span[2]' % i
print(file_name_xpath)
#print(url)
# Handle Windows10 System “ Save as ” window , And change the name of the saved file
man_dowload(url)
time.sleep(5)
bili_browser.quit()

版权声明
本文为[A young man of mystery]所创,转载请带上原文链接,感谢
https://chowdera.com/2021/02/20210223174634695u.html

随机推荐