none.gif

AsCenSion

GF  2021-04-16 18:05

Iwara爬虫,科学GHS

又闲得慌,写个Iwara爬虫,欢迎大佬指点

复制代码
  1. import requests
  2. from lxml import etree
  3. import os
  4. import re
  5. from subprocess import call
  6. #################################
  7. # 函数定义
  8. # 创建文件夹
  9. def mkdir(path):
  10.     # 判断是否存在文件夹如果不存在则创建为文件夹
  11.     # 如果路径不存在会创建这个路径
  12.     folder = os.path.exists(path)
  13.     if not folder:
  14.         os.makedirs(path)
  15. # 使用IDM下载资源
  16. def idm_dld(idm_url, idm_path, idm_name):
  17.     IDMPath = "C:\\Green\\IDM 6.36.5"
  18.     os.chdir(IDMPath)
  19.     IDM = "IDMan.exe"
  20.     call([IDM, '/d', idm_url, '/p', idm_path, '/f', idm_name, '/a'])
  21.     call([IDM, '/s'])
  22. # 提取视频源
  23. def get_source_url(url):
  24.     url_api = url.split('videos', 1)[0] + 'api/video' + url.split('videos', 1)[1]
  25.     response = requests.get(url_api)
  26.     js = response.json()
  27.     for item in js:
  28.         if item['resolution'] == 'Source':
  29.             source_url = 'https:' + item['uri']
  30.             return source_url
  31. # 从页面提取视频网址
  32. def get_urls(search_url):
  33.     if search_url == '':
  34.         return []
  35.     requests.get(search_url)
  36.     web_urls = etree.HTML(requests.get(search_url).text).xpath('//div[@class="field-item even"]/a/@href')
  37.     for i in range(len(web_urls)):
  38.         web_urls[i] = 'https://ecchi.iwara.tv' + web_urls[i]
  39.     return web_urls
  40. # 下载多个网页的视频
  41. # 在expire_year-expire_month之前的视频不会下载
  42. def videos_dld(web_urls, dld_path, expire_year, expire_month):
  43.     # 对每个网址
  44.     for i in range(len(web_urls)):
  45.         print(str(i + 1) + '/' + str(len(web_urls)) + '\t' + web_urls[i])
  46.         if web_urls[i].find('videos') >= 0:
  47.             source_url = get_source_url(web_urls[i])
  48.             if source_url is None:
  49.                 continue
  50.             file_name = re.search('file=(.*?)&op', source_url).group(1)
  51.             author = etree.HTML(requests.get(web_urls[i]).text).xpath('//a[@class="username"]/text()')[0]
  52.             print('\t' + 'Author: ' + author)
  53.             print('\t' + source_url)
  54.             file_name = file_name.replace('%2F', '-')
  55.             print('\t' + 'New name: ' + file_name)
  56.             file_path = dld_path + '\\' + author
  57.             mkdir(file_path)
  58.             local_name = file_path + '\\' + file_name
  59.             old_file_name = file_name.split('-')[3]
  60.             print('\t' + 'Old name: ' + old_file_name)
  61.             old_local_name = file_path + '\\' + old_file_name
  62.             # 判断日期
  63.             if ((int(file_name.split('-')[0]) == expire_year) & (int(file_name.split('-')[1]) >= expire_month) | (int(file_name.split('-')[0]) > expire_year)):
  64.                 # 下载
  65.                 if (not os.path.isfile(local_name)) & (not os.path.isfile(old_local_name)):
  66.                     print('\tState: no such file, to be downloaded')
  67.                     idm_dld(source_url, file_path, file_name)
  68.                 elif os.path.isfile(old_local_name) & os.path.isfile(local_name):
  69.                     print('\tState: old and new file exist')
  70.                     os.remove(old_local_name)
  71.                 elif os.path.isfile(old_local_name):
  72.                     print('\tState: old file exist')
  73.                     os.rename(old_local_name, local_name)
  74.                 else:
  75.                     print('\tState: new file exist')
  76.             else:
  77.                 print('\tEarly date')
  78.                 break
  79. # 整合版
  80. def iwara_dld(dld_path, search_url, web_urls, expire_year, expire_month):
  81.     search_urls = get_urls(search_url)
  82.     # print('Download videos from search page')
  83.     videos_dld(search_urls, dld_path, expire_year, expire_month)
  84.     # print('Download videos directly from urls')
  85.     videos_dld(web_urls, dld_path, 2000, 0)
  86. ##################################
  87. # 主程序
  88. ##################################
  89. def main():
  90.     # 设置
  91.     # local_path = "D:\\Another\\temp\\ANOTHER_RUBBISH\\VIDEOS\\Iwara"
  92.     local_path = "C:\\Another\\temp\\iwara"
  93.     # 搜索页面URL
  94.     url = ''
  95.     # 视频网页.
  96.     urls = []
  97.     # 下载该日期之后的视频
  98.     year = 2020
  99.     month = 3
  100.     print('------------------------------------')
  101.     print('Iwara Downloader V1.0')
  102.     print('------------------------------------')
  103.     print('Default Settings:')
  104.     print('Download Directory: %s' % (local_path))
  105.     print('Year: %s' % str(year))
  106.     print('Month: %s' % str(month))
  107.     while True:
  108.         cmd = 0
  109.         try:
  110.             print('------------------------------------\n------------------------------------\nAvailable Functions:')
  111.             print('1.Download Video Directly From Link')
  112.             print('2.Download All Videos In The Searching Page')
  113.             print('3.Change Download Settings')
  114.             print('4.View Download Settings')
  115.             print('5.Quit')
  116.             cmd = int(input('\nChoose Desired Function:'))
  117.         except:
  118.             pass
  119.         if cmd == 1:
  120.             url = ''
  121.             urls = []
  122.             urls.append(input('\nDownload Video Directly From Link (Enter To Skip):'))
  123.             iwara_dld(local_path, url, urls, year, month)
  124.         elif cmd == 2:
  125.             url = ''
  126.             urls = []
  127.             url = input('\nDownload All Videos In The Searching Page (Enter To Skip):')
  128.             iwara_dld(local_path, url, urls, year, month)
  129.         elif cmd == 3:
  130.             try:
  131.                 local_path = input('\nSet Download Directory:')
  132.                 mkdir(local_path)
  133.                 year = int(input('\nSet Year'))
  134.                 month = int(input('\nSet Month'))
  135.             except:
  136.                 local_path = "C:\\Another\\temp\\iwara"
  137.                 year = 2020
  138.                 month = 3
  139.         elif cmd == 4:
  140.             print('\nView Download Settings :')
  141.             print('Download Directory: %s' % (local_path))
  142.             print('Year: %s' % str(year))
  143.             print('Month: %s' % str(month))
  144.         elif cmd == 5:
  145.             print('\nQuit')
  146.             break
  147.         print('Finished\n')
  148. if __name__ == '__main__':
  149.     main()


为加快下载速度调用了IDM,需要把IDM本体安装到“C:\Green\IDM 6.36.5”文件夹中,运行爬虫的时候需要保证IDM已经打开。

度盘IDM链接:
链接: https://pan.baidu.com/s/1ZsjNK8kgrLHxb7AlNZ8gEw
提取码: 9msh
解压后运行“!绿化.bat”就行了

打包的.exe文件:
链接: https://pan.baidu.com/s/1FAoyJEOZHW_rl-xo9BtAFA
提取码: qkdm

使用截图:





欢迎打赏
此帖售价 0 SP币,已有 67 人购买
若发现会员采用欺骗的方法获取财富,请立刻举报,我们会对会员处以2-N倍的罚金,严重者封掉ID!

277949.jpg

大吗H

B1F  2021-04-16 21:48
(只要是脸蛋好看的女孩子都是我喜欢的类型)
用了这个下BT能变快吗?楼主