寻找知乎最美小姐姐

~~Python 我不会,更不会讲。~~突然布置任务,就感觉挺突然…

网上也有教程。

0x01 获取图片

没啥好说的直接上源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
##Get_imgs.py
import requests
from lxml import etree
import json
import time
import re

headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
'cookie':'' ##Input your cookie
}

def get_img(url):
res = requests.get(url,headers=headers)
i=1
json_data = json.loads(res.text)
datas = json_data['data']
for data in datas:
id = data['author']['name']
content = data['content']
imgs = re.findall('img src="(.*?)"',content,re.S)
if len(imgs) == 0:
pass
else:
for img in imgs:
if 'jpg' in img:
res_1 = requests.get(img,headers=headers)
fp = open('row_img/'+id+'+'+str(i)+'.jpg','wb')
fp.write(res_1.content)
i = i+1
print(id,img)

if __name__ == '__main__':
urls = ['https://www.zhihu.com/api/v4/questions/29024583/answers?include=data%5B%2A%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_labeled%3Bdata%5B%2A%5D.mark_infos%5B%2A%5D.url%3Bdata%5B%2A%5D.author.follower_count%2Cbadge%5B%2A%5D.topics&limit=5&offset={}&platform=desktop&sort_by=default'.format(str(i)) for i in range(0,25000,5)]
for url in urls:
get_img(url)
time.sleep(2)

urls中的29024583是指知乎问题编号;可以更换成自己想要的知乎问题。

0x02 获取token

给小姐姐打分需要调用百度接口(仅作为学习使用),去这个网站注册登录;将自己的ak和sk带入代码中,运行获得token。其中access_token 是我们后续要用的token,有效期为30天。

1
2
3
4
5
6
import requests
ak = ''
sk = ''
host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={}&client_secret={}'.format(ak,sk)
res = requests.post(host)
print(res.text)

0x03 评分流程

将上一步得到的token放入代码中,运行即可,会在OK目录中保留满足条件的照片,重命名格式为分数+原图片名

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import base64
import json
import requests
import os
import shutil
import time

token=''
request_url = "https://aip.baidubce.com/rest/2.0/face/v3/detect"
request_url = request_url+"?access_token="+token

def get_img_base(file):
with open(file,'rb') as fp:
content = base64.b64encode(fp.read())
return content

file_path = 'row_img'
list_paths = os.listdir(file_path)
for list_path in list_paths:
img_path = file_path+'/'+list_path
params = {
'image':get_img_base(img_path),
'image_type':'BASE64',
'face_field':'age,beauty,gender'
}
res = requests.post(request_url,data=params)
result = res.text
json_result = json.loads(result)
code = json_result['error_code']
if code == 222202:
print("Deleting >>>>>>> ",img_path)
os.remove(img_path)
continue

try:
gender = json_result['result']['face_list'][0]['gender']['type']
if gender == 'male':
print("Deleting male >>>>>>> ",img_path)
os.remove(img_path)
continue
beauty = json_result['result']['face_list'][0]['beauty']
if beauty < 60.0:
print("Deleting low >>>>>>> ",img_path,beauty)
os.remove(img_path)
continue
Newname = 'OK/'+str(beauty)+'+'+list_path
shutil.move(img_path,Newname)
print(img_path,beauty)
time.sleep(1)
except KeyError:
pass
except TypeError:
pass

0x04 运行结果展示