逆天邪神小说下载

Sat 11 October 2025

使用 Python 自动爬取网页小说并生成 TXT 文件

由于该脚本使用了 Python 扩展库,请先安装BeautifulSoup与requests支持。

pip3 install beautifulsoup4
pip3 install requests

然后任意创建个文件夹(或者直接在根目录)放入 17549.py 脚本。

输入python3 17549.py(如果创建了个文件夹,记得先 cd 进去)就开始爬了……

Python 爬网页小说脚本

# -*- coding:UTF-8 -*-
from bs4 import BeautifulSoup
import requests
import sys
import time

class downloader(object):

def __init__(self,url):
self.target = url # 章节页
self.names = [] # 存放章节名
self.urls = [] # 存放章节链接
self.nums = 0 # 章节数
self.title=""#小说名

def get_one_text(self, url_i):

text = ' '
url_i="https://www.nitianxieshen.com"+url_i
r = requests.get(url=url_i)
r.encoding = r.apparent_encoding

html = r.text
html_bf = BeautifulSoup(html, features='html.parser')
#div = html_bf.find_all('div', attrs={"id":"content"})
#print(div.find('div',attrs={"class":"m-tpage"}))
texts=html_bf.find_all('p')
texts[0].decompose()
texts[len(texts)-1].decompose()
for t in texts:
text += str(t)
text = text.replace('<None>', '')
text = text.replace('</None>', '')
text = text.replace('</div>', '\n')
text = text.replace('<br/>', '\n')
text = text.replace('<p>', '\n')
text = text.replace('</p>', '\n')
text = text.replace('<\p>', '\n')

return text

def get_name_address_list(self):
list_a_bf = []
list_a = []
r = requests.get(self.target)
r.encoding = r.apparent_encoding
html = r.text
div_bf = BeautifulSoup(html, features='html.parser')
self.title=div_bf.find('h1').text
div = div_bf.find_all('div',attrs={"id":"play_0"})[0]
li=div.find_all('li')
self.nums=len(li)
for i in range(len(li)):
self.names.append(li[i].find('a').string) # string方法返回章节名
self.urls.append(li[i].find('a').get('href')) # get(‘href’)返回子地址串
#print(self.names)
#print(self.urls)
print("共:"+str(self.nums)+"章")

def writer(self, name, path, text):
write_flag = True
with open(path, 'a', encoding='utf-8') as f: # 打开目标路径文件
f.write(name + '\n')
f.writelines(text)
f.write('\n\n')

 

if __name__ == "__main__":

dl = downloader("https://www.nitianxieshen.com/zhuxian/")
dl.get_name_address_list()

print('《'+dl.title+'》开始下载:')
for i in range(dl.nums):
time.sleep(0.2)
try:
dl.writer(dl.names[i], r''+dl.title+'.txt', dl.get_one_text(dl.urls[i]))
except IndexError as e:
print(repr(e))
sys.stdout.write(" 已下载:%.3f%%" % float((i/dl.nums)*100) + '\r'+'当前第:'+str(i)+' 章')
sys.stdout.flush()
print(dl.title+'下载完成')

Category: 11