空气质量监控平台分析反爬虫分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import hashlib
import time


def get_signe(sign_string):
print(sign_string)
md5 = hashlib.md5()
md5.update(sign_string.encode("utf-8"))
md5_1_res = md5.hexdigest()
md5_2 = hashlib.md5()
md5_2.update(md5_1_res.encode("utf-8"))
md5_final = md5_2.hexdigest()
return md5_final


headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:126.0) Gecko/20100101 Firefox/126.0",
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN",
"platform": "pc",
"version": "2.9.4",
"nonce": "b5mtrunz",
"timestamp": "1717752984",
"api-version": "v1",
"sign": "782d765a122609c4de153970eb896f14",
"origin": "https://www.kankanews.com",
"referer": "https://www.kankanews.com/",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"priority": "u=1",
"te": "trailers"
}

params = {
"slit_program_id": "D1OwG002Eb0",
"date": "2024-06-06"
}


def get_sign(para, headers):
enc_dict = {}
enc_dict.update(para)
enc_dict['version'] = '2.9.4'
enc_dict['platform'] = headers['platform']
enc_dict['nonce'] = headers['nonce']
enc_dict['Api-Version'] = headers['api-version']
enc_dict['timestamp'] = headers['timestamp']

# 按照键(即参数名)的字母顺序对字典进行排序
sorted_params = sorted(enc_dict.items())

# 构建查询字符串
query_string = '&'.join(f"{key}={value}" for key, value in sorted_params if not key.startswith('__'))

query_string += '&28c8edde3d61a0411511d3b1866f0636'
return get_signe(query_string)


def get_sign_hash_from_dict():
d = {"Api-Version": "v1", "date": "2024-06-07", "nonce": "mdfrdapo", "platform": "pc",
"slit_program_id": "q0zwqDMQ7vm", "timestamp": 1718178547, "version": "2.9.4"}
sorted_params = sorted(d.items())
query_string = '&'.join(f"{key}={value}" for key, value in sorted_params if not key.startswith('__'))
query_string += '&28c8edde3d61a0411511d3b1866f0636'
return get_signe(query_string)


import requests

headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:126.0) Gecko/20100101 Firefox/126.0",
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN",
"platform": "pc",
"version": "2.9.4",
"nonce": "b5mtrunz",
"timestamp": "1718179787",
"api-version": "v1",
"sign": "782d765a122609c4de153970eb896f14",
"origin": "https://www.kankanews.com",
"referer": "https://www.kankanews.com/",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"priority": "u=1",
"te": "trailers"
}

url = 'https://kapi.kankanews.com/content/pc/news/search'
params = {
'keyword': 'china',
'type': '3',
'size': '20',
'page': '2',
}

timestamp = (int(time.time()))
headers['timestamp'] = str(timestamp)

sign = get_sign(para=params, headers=headers)
headers['sign'] = sign
response = requests.get(url, headers=headers, params=params)
print(response.text)
print(response)


空气质量监控平台分析反爬虫分析
https://kingjem.github.io/2024/06/27/看看新闻搜索/
作者
Ruhai
发布于
2024年6月27日
许可协议