58同城
import requests
from lxml import etree
import sqlite3
from time import sleep
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
}
def main():
get_data = judgment_url()
print(get_data)
dbpath = '58同城.db'
saveDataDB(get_data,dbpath)
def judgment_url():
url = 'https://cd.58.com/jianzhi/pn'
for i in range(1,2):
base_url = url + str(i) + '/'
response = requests.get(url=base_url,headers=headers).text
tree = etree.HTML(response)
href_tag = tree.xpath('//div[@class="items"]/div/div[@class="item1"]/h2')
data_list = []
for i in href_tag:
list = []
title = i.xpath('./a/@href')[0]
if title.startswith("https:") == False:
url = "https:" + title
list.append(url)
elif title.startswith("https:") == True:
all = title
list.append(all)
for url_new in list:
data = []
text = requests.get(url=url_new,headers=headers)
print(text.status_code)
html = text.text
try:
tree = etree.HTML(html)
title_name = tree.xpath('//*[@id="content"]//div/h1/text()')
if len(title_name) != 0:
data.append(title_name[0])
else:
continue
except ValueError as e:
print(e)
continue
price_money = tree.xpath('//*[@id="content"]//div[2]/span[1]/text()')[0]
print('正在爬取中.........')
data.append(price_money)
address = tree.xpath('//*[@id="content"]//div[5]/ul/li[3]/div/span/a/text()')
if len(address) == 2:
adress_one = address[0]
data.append(adress_one)
adress_two = address[1]
data.append(adress_two)
else:
data.append(address[0])
data.append(' ')
work_time = tree.xpath('//*[@id="content"]/div[1]/div[2]/div[1]/span[2]/text()')[0]
data.append(work_time)
describe = tree.xpath('//*[@id="content"]/div[1]/div[2]/p/text()')[0:]
work_describe = " ".join(describe)
data.append(work_describe)
data_list.append(data)
sleep(1)
return data_list
def saveDataDB(get_data,dbpath):
init_db(dbpath)
conn = sqlite3.connect(dbpath)
cursor = conn.cursor()
for data in get_data:
for index in range(len(data)):
data[index] = '"' + data[index] + '"'
print(data)
sql = ''' insert into tongcheng ( title_name,price_money,address_one,address_two,work_time,work_describe) values(%s)''' % ",".join(data)
print(sql)
cursor.execute(sql)
conn.commit()
conn.close()
def init_db(dbpath):
sql = ''' create table tongcheng ( id integer primary key autoincrement not null , title_name varchar , price_money real , address_one char(50) , address_two char(50) , work_time varchar, work_describe TEXT ) ;'''
conn = sqlite3.connect(dbpath)
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()
conn.close()
if __name__ == '__main__':
main()
print("Over!!")
文章评论