寻找知乎最美小姐姐

~~Python 我不会，更不会讲。~~突然布置任务，就感觉挺突然…

网上也有教程。

0x01 获取图片

没啥好说的直接上源码：

##Get_imgs.py
import requests
from lxml import etree
import json
import time
import re

headers = {
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
    'cookie':'' ##Input your cookie
}

def get_img(url):
    res = requests.get(url,headers=headers)
    i=1
    json_data = json.loads(res.text)
    datas = json_data['data']
    for data in datas:
        id = data['author']['name']
        content = data['content']
        imgs = re.findall('img src="(.*?)"',content,re.S)
        if len(imgs) == 0:
            pass
        else:
            for img in imgs:
                if 'jpg' in img:
                    res_1 = requests.get(img,headers=headers)
                    fp = open('row_img/'+id+'+'+str(i)+'.jpg','wb')
                    fp.write(res_1.content)
                    i = i+1
                    print(id,img)

if __name__ == '__main__':
    urls = ['https://www.zhihu.com/api/v4/questions/29024583/answers?include=data%5B%2A%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_labeled%3Bdata%5B%2A%5D.mark_infos%5B%2A%5D.url%3Bdata%5B%2A%5D.author.follower_count%2Cbadge%5B%2A%5D.topics&limit=5&offset={}&platform=desktop&sort_by=default'.format(str(i)) for i in range(0,25000,5)]
    for url in urls:
        get_img(url)
        time.sleep(2)

urls中的29024583是指知乎问题编号；可以更换成自己想要的知乎问题。

0x02 获取token

给小姐姐打分需要调用百度接口（仅作为学习使用），去这个网站注册登录；将自己的ak和sk带入代码中，运行获得token。其中access_token 是我们后续要用的token，有效期为30天。

import requests
ak = ''
sk = ''
host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={}&client_secret={}'.format(ak,sk)
res = requests.post(host)
print(res.text)

0x03 评分流程

将上一步得到的token放入代码中，运行即可，会在OK目录中保留满足条件的照片，重命名格式为分数+原图片名。

import base64
import json
import requests
import os
import shutil
import time

token=''
request_url = "https://aip.baidubce.com/rest/2.0/face/v3/detect"
request_url = request_url+"?access_token="+token

def get_img_base(file):
    with open(file,'rb') as fp:
        content = base64.b64encode(fp.read())
        return content

file_path = 'row_img'
list_paths = os.listdir(file_path)
for list_path in list_paths:
    img_path = file_path+'/'+list_path
    params = {
        'image':get_img_base(img_path),
        'image_type':'BASE64',
        'face_field':'age,beauty,gender'
    }
    res = requests.post(request_url,data=params)
    result = res.text
    json_result = json.loads(result)
    code = json_result['error_code']
    if code == 222202:
        print("Deleting >>>>>>> ",img_path)
        os.remove(img_path)
        continue

    try:
        gender = json_result['result']['face_list'][0]['gender']['type']
        if gender == 'male':
            print("Deleting male >>>>>>> ",img_path)
            os.remove(img_path)
            continue
        beauty = json_result['result']['face_list'][0]['beauty']
        if beauty < 60.0:
            print("Deleting low >>>>>>> ",img_path,beauty)
            os.remove(img_path)
            continue
        Newname = 'OK/'+str(beauty)+'+'+list_path
        shutil.move(img_path,Newname)
        print(img_path,beauty)
        time.sleep(1)
    except KeyError:
        pass
    except TypeError:
        pass