妹子图 改良版


之前的版本只下载缩略图,改良后可以下载大图。

转载注明:转载于yh6788鱼友作品,如侵权请联我。。
仔细看了一下作者的代码 受益良多 转载过来给大家学习一下

import requests
import os
import re
from bs4 import BeautifulSoup
import time


def url_open(url):
    headers = {
    'Referer': 'https://www.mzitu.com/',
    'User-Agent': 'Mozilla /5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
    }
    response = requests.get(url, headers=headers)
    #print(response)
    #return response
    html=response.text
    #soup=BeautifulSoup(html,'html.parser')
    soup=BeautifulSoup(html,'lxml')
    #div=soup.find('div',attrs={'id':'pins'})
    soup=soup.find(class_='postlist')
    soup1=str(soup)
    ss = r'<a href="([^"]+\d)"'
    urls=re.findall(ss,soup1)
    return urls

def url_open2(url):
    headers = {
    'Referer': 'https://www.mzitu.com/',
    'User-Agent': 'Mozilla /5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
    }
    response = requests.get(url, headers=headers)
    #print(response)
    #return response
    html=response.text

    return response



def save_jpg(jpg,x):
    jpg_dir='jpgs'
    filename =str(x) + '_' + jpg.split('/')[-1]
    #filename=jpg
    #x += 1
    print('正在保存文件'+filename)
    print('='*50)
    with open(filename, 'wb') as f:
        img = url_open2(jpg).content
        f.write(img)

def find_imgs(url):
    os.system("cls")
    x=0
    while True:
        headers = {
            'Referer': 'https://www.mzitu.com/',
            'User-Agent': 'Mozilla /5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
        }
        response = requests.get(url, headers=headers)
        html = response.text
        soup = BeautifulSoup(html, 'lxml')
        # print(html)
        soup = soup.find(class_='main-image')
        soup1 = str(soup)
        p = r'src="([^"]+\.jpg)"'
        pp = re.findall(p, soup1)
        # print(soup1)
        s = r'href="([^"]+\d)"'
        ss = re.findall(s,soup1)
        ppp=pp[0]
        sss=ss[0]
        print()
        print('处理图片:%s \n下一网址:%s' %(ppp,sss),)
        url=sss
        save_jpg(ppp,x)
        x+=1


def url_open1(htmls):
    print(htmls)
    for url in htmls:
        find_imgs(url)
        print('Ctrl+C退出吧。')
        time.sleep(555)



def download_new(folder='OOXX'):
    os.system('cls')
    os.system('del /q  OOXX\*.*')
    os.system('rmdir OOXX')
    os.mkdir(folder)
    os.chdir(folder)
    print(url)
    htmls = url_open(url)
    htmls1 = url_open1(htmls)

if __name__ == '__main__':
    url = 'http://www.mzitu.com/'
    #download_mm()
    download_new()

声明:九七'Blog|版权所有,违者必究|如未注明,均为原创|本网站采用BY-NC-SA协议进行授权

转载:转载请注明原文链接 - 妹子图 改良版


欢迎入住九七博客