hw中总是遇到N个目标的场景,一个个收集起来本就很难,

23年HW还要100%控股才可以,这样更难做

:::success
思路:

  1. 备案查询
  2. 梳理全部根域名
  3. fofa梭哈下载

:::

1.备案查询

单个脚本(ICP_checker.py):
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# -*- coding: utf-8 -*-
import re
import os
import cv2
import time
import base64
import hashlib
import requests
import openpyxl as xl
from openpyxl.styles import Alignment

os.environ['no_proxy'] = '*'


def query_base():
print("版本:V2.1.6 可用测试:2023-2-26\n")
print("项目地址:https://github.com/wongzeon/ICP-Checker\n")
while True:
try:
info = input("请完整输入公司全称 / 域名以查询备案信息:\n\n").replace(" ", "").replace("https://www.", "").replace("http://www.", "").replace("http://", "")
# 过滤空值和特殊字符,只允许 - . () 分别用于域名和公司名
if info == "":
raise ValueError("InputNone")
info = re.sub("[^\\u4e00-\\u9fa5-A-Za-z0-9,-.()()]", "", info)
input_zh = re.compile(u'[\u4e00-\u9fa5]')
zh_match = input_zh.search(info)
if zh_match:
info_result = info
else:
# 检测是否为可备案的域名类型(类型同步日期2022/01/06)
# TODO 部分特殊域名, 如51.la也能备案, 可能是特事特办
input_url = re.compile(
r'([^.]+)(?:\.(?:GOV\.cn|ORG\.cn|AC\.cn|MIL\.cn|NET\.cn|EDU\.cn|COM\.cn|BJ\.cn|TJ\.cn|SH\.cn|CQ\.cn|HE\.cn|SX\.cn|NM\.cn|LN\.cn|JL\.cn|HL\.cn|JS\.cn|ZJ\.cn|AH\.cn|FJ\.cn|JX\.cn|SD\.cn|HA\.cn|HB\.cn|HN\.cn|GD\.cn|GX\.cn|HI\.cn|SC\.cn|GZ\.cn|YN\.cn|XZ\.cn|SN\.cn|GS\.cn|QH\.cn|NX\.cn|XJ\.cn|TW\.cn|HK\.cn|MO\.cn|cn|REN|WANG|CITIC|TOP|SOHU|XIN|COM|NET|CLUB|XYZ|VIP|SITE|SHOP|INK|INFO|MOBI|RED|PRO|KIM|LTD|GROUP|BIZ|AUTO|LINK|WORK|LAW|BEER|STORE|TECH|FUN|ONLINE|ART|DESIGN|WIKI|LOVE|CENTER|VIDEO|SOCIAL|TEAM|SHOW|COOL|ZONE|WORLD|TODAY|CITY|CHAT|COMPANY|LIVE|FUND|GOLD|PLUS|GURU|RUN|PUB|EMAIL|LIFE|CO|FASHION|FIT|LUXE|YOGA|BAIDU|CLOUD|HOST|SPACE|PRESS|WEBSITE|ARCHI|ASIA|BIO|BLACK|BLUE|GREEN|LOTTO|ORGANIC|PET|PINK|POKER|PROMO|SKI|VOTE|VOTO|ICU))',
flags=re.IGNORECASE)
info_result = input_url.search(info)
if info_result is None:
if info.split(".")[0] == "":
raise ValueError("OnlyDomainInput")
raise ValueError("ValidType")
else:
info_result = info_result.group()
info_data = {'pageNum': '1', 'pageSize': '40', 'unitName': info_result}
return info_data
except ValueError as e:
if str(e) == 'InputNone' or str(e) == 'OnlyDomainInput':
print("\n ************** 请正确输入域名 **************\n")
else:
print("\n*** 该域名不支持备案,请查阅:http://xn--fiq8ituh5mn9d1qbc28lu5dusc.xn--vuq861b/ ***\n")


def get_cookies():
cookie_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32'}
err_num = 0
while err_num < 3:
try:
cookie = requests.utils.dict_from_cookiejar(requests.get('https://beian.miit.gov.cn/', headers=cookie_headers).cookies)['__jsluid_s']
return cookie
except:
err_num += 1
time.sleep(3)
return -1


def get_token():
timeStamp = round(time.time() * 1000)
authSecret = 'testtest' + str(timeStamp)
authKey = hashlib.md5(authSecret.encode(encoding='UTF-8')).hexdigest()
auth_data = {'authKey': authKey, 'timeStamp': timeStamp}
url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/auth'
try:
t_response = requests.post(url=url, data=auth_data, headers=base_header).json()
token = t_response['params']['bussiness']
except:
return -1
return token


def get_check_pic(token):
url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/getCheckImage'
base_header['Accept'] = 'application/json, text/plain, */*'
base_header.update({'Content-Length': '0', 'token': token})
try:
p_request = requests.post(url=url, data='', headers=base_header).json()
p_uuid = p_request['params']['uuid']
big_image = p_request['params']['bigImage']
small_image = p_request['params']['smallImage']
except:
return -1
# 解码图片,写入并计算图片缺口位置
with open('bigImage.jpg', 'wb') as f:
f.write(base64.b64decode(big_image))
with open('smallImage.jpg', 'wb') as f:
f.write(base64.b64decode(small_image))
background_image = cv2.imread('bigImage.jpg', cv2.COLOR_GRAY2RGB)
fill_image = cv2.imread('smallImage.jpg', cv2.COLOR_GRAY2RGB)
position_match = cv2.matchTemplate(background_image, fill_image, cv2.TM_CCOEFF_NORMED)
max_loc = cv2.minMaxLoc(position_match)[3][0]
mouse_length = max_loc + 1
os.remove('bigImage.jpg')
os.remove('smallImage.jpg')
check_data = {'key': p_uuid, 'value': mouse_length}
return check_data


def get_sign(check_data, token):
check_url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/checkImage'
base_header.update({'Content-Length': '60', 'token': token, 'Content-Type': 'application/json'})
try:
pic_sign = requests.post(check_url, json=check_data, headers=base_header).json()
sign = pic_sign['params']
except:
return -1
return sign


def get_beian_info(info_data, p_uuid, token, sign):
domain_list = []
info_url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/icpAbbreviateInfo/queryByCondition'
base_header.update({'Content-Length': '78', 'uuid': p_uuid, 'token': token, 'sign': sign})
try:
beian_info = requests.post(url=info_url, json=info_data, headers=base_header).json()
if not beian_info["success"]:
print(f'请求错误: CODE {beian_info["code"]} MSG {beian_info["msg"]}')
return domain_list
domain_total = beian_info['params']['total']
page_total = beian_info['params']['lastPage']
end_row = beian_info['params']['endRow']
info = info_data['unitName']
print(f"\n查询对象:{info} 共有 {domain_total} 个已备案域名\n")
for i in range(0, page_total):
print(f"正在查询第{i+1}页……\n")
for k in range(0, end_row + 1):
info_base = beian_info['params']['list'][k]
domain_name = info_base['domain']
domain_type = info_base['natureName']
domain_licence = info_base['mainLicence']
website_licence = info_base['serviceLicence']
domain_status = info_base['limitAccess']
domain_approve_date = info_base['updateRecordTime']
domain_owner = info_base['unitName']
try:
domain_content_approved = info_base['contentTypeName']
if domain_content_approved == "":
domain_content_approved = "无"
except KeyError:
domain_content_approved = "无"
row_data = domain_owner, domain_name, domain_licence, website_licence, domain_type, domain_content_approved, domain_status, domain_approve_date
domain_list.append(row_data)
info_data = {'pageNum': i + 2, 'pageSize': '40', 'unitName': info}
if beian_info['params']['isLastPage'] is True:
break
else:
beian_info = requests.post(info_url, json=info_data, headers=base_header).json()
end_row = beian_info['params']['endRow']
time.sleep(3)
except Exception as e:
print(f"意外错误: {e}")
return domain_list
return domain_list


def data_saver(domain_list):
"""
打印最终结果,并保存数据至Excel表格,同时调整表格格式。
"""
# 计算需要写入表格的总行数,如果是空列表,即代表该域名没有备案信息,也有可能是获取信息失败了
total_row = len(domain_list)
if total_row == 1:
total_row = 0
elif total_row == 0:
return print("所查域名无备案\n")
print(f"查询结果如下:\n\n{domain_list}\n")
# Windows获取桌面路径,将表格保存到桌面,其他系统默认保存到/home/文件夹下
if os.name == "nt":
import winreg
# 用户更改过桌面路径,则需获取User Shell Folders才能获取到准确的桌面路径,否则不会保存到实际的桌面
subkey = r'Software\Microsoft\Windows\CurrentVersion\Explorer\User Shell Folders'
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, subkey, 0)
desktop_raw = str(winreg.QueryValueEx(key, "Desktop")[0])
if desktop_raw == "%USERPROFILE%\Desktop":
# 此时情况为用户未更改过桌面路径,则需获取系统默认路径
subkey = r'Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders'
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, subkey, 0)
desktop_raw = str(winreg.QueryValueEx(key, "Desktop")[0])
desktop_path = desktop_raw.replace('\\', '/') + "/"
file_path = f"{desktop_path}备案信息.xlsx"
else:
file_path = './备案信息.xlsx'
# 存在对应文件,则读取表格追加写入,不存在则创建,并设置表格的标题、列宽、冻结窗格、文字布局等格式
if os.path.exists(file_path):
wb = xl.load_workbook(file_path)
ws = wb['备案信息']
max_row = ws.max_row
start = max_row + 1
total_row = total_row + start
after_title = 0
else:
wb = xl.Workbook()
ws = wb.active
ws.title = "备案信息"
title_list = ['域名主办方', '域名', '备案许可证号', '网站备案号', '域名类型', '网站前置审批项', '是否限制接入', '审核通过日期']
for i in range(0, 8):
ws.cell(1, i + 1).value = title_list[i]
col_width = {'A': 45, 'B': 40, 'C': 22, 'D': 24, 'E': 9, 'F': 15, 'G': 13, 'H': 21}
for k, v in col_width.items():
ws.column_dimensions[k].width = v
ws.freeze_panes = 'A2'
start = 0
after_title = 2
# 写入查询数据
for j in range(start, total_row + 1):
for k in range(0, 8):
try:
ws.cell(j + after_title, k + 1).value = domain_list[j - start][k]
except:
continue
# 垂直居中
for row in range(ws.max_row):
for col in range(ws.max_column):
ws.cell(row + 1, col + 1).alignment = Alignment(horizontal='center', vertical='center')
try:
wb.save(file_path)
except PermissionError:
print("** 备案信息登记表格已打开,无法写入文件。如需写入,请关闭文件后重新执行! **\n")
return -1
print(f"查询结果保存在:{file_path}\n")
return 'OK'


def main():
cookie = get_cookies()
while True:
info = query_base()
try:
global base_header
base_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32',
'Origin': 'https://beian.miit.gov.cn',
'Referer': 'https://beian.miit.gov.cn/',
'Cookie': f'__jsluid_s={cookie}'
}
# -1代表对应步骤失败了,不是-1则正常执行下一步
if cookie != -1:
token = get_token()
if token != -1:
check_data = get_check_pic(token)
if check_data != -1:
sign = get_sign(check_data, token)
p_uuid = check_data['key']
if sign != -1:
domain_list = get_beian_info(info, p_uuid, token, sign)
data_saver(domain_list)
else:
raise ValueError("获取Sign遇到错误,请重试!")
else:
raise ValueError("计算图片缺口位置错误,请重试!")
else:
raise ValueError("获取Token失败,如频繁失败请关闭程序后等待几分钟再试!")
else:
cookie = get_cookies()
raise ValueError("获取Cookie失败,请重试!")
except Exception as e:
print(f'{e}\n')


if __name__ == '__main__':
main()

批量脚本(ICP_checks.py):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import re
import os
import cv2
import time
import base64
import hashlib
import requests
import openpyxl as xl
from openpyxl.styles import Alignment

os.environ['no_proxy'] = '*'


def read_targets_from_file(file_path='target.txt'):
with open(file_path, 'r', encoding='utf-8') as file:
targets = [line.strip() for line in file.readlines()]
return targets


def main():
targets = read_targets_from_file()

for target in targets:
print(f"正在查询:{target}")
cookie = get_cookies()
while True:
try:
global base_header
base_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32',
'Origin': 'https://beian.miit.gov.cn',
'Referer': 'https://beian.miit.gov.cn/',
'Cookie': f'__jsluid_s={cookie}'
}
if cookie != -1:
token = get_token()
if token != -1:
check_data = get_check_pic(token)
if check_data != -1:
sign = get_sign(check_data, token)
p_uuid = check_data['key']
if sign != -1:
info_data = query_base(target)
domain_list = get_beian_info(info_data, p_uuid, token, sign)
data_saver(domain_list, target)
break
else:
raise ValueError("获取Sign遇到错误,请重试!")
else:
raise ValueError("计算图片缺口位置错误,请重试!")
else:
raise ValueError("获取Token失败,如频繁失败请关闭程序后等待几分钟再试!")
else:
cookie = get_cookies()
raise ValueError("获取Cookie失败,请重试!")
except Exception as e:
print(f'{e}\n')
time.sleep(10) # 等待10秒后查询下一个目标
def query_base(target):
info = target.replace(" ", "").replace("https://www.", "").replace("http://www.", "").replace("http://", "")
info = re.sub("[^\\u4e00-\\u9fa5-A-Za-z0-9,-.()()]", "", info)
info_data = {'pageNum': '1', 'pageSize': '40', 'unitName': info}
return info_data

def get_cookies():
cookie_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32'}
err_num = 0
while err_num < 3:
try:
cookie = requests.utils.dict_from_cookiejar(requests.get('https://beian.miit.gov.cn/', headers=cookie_headers).cookies)['__jsluid_s']
return cookie
except:
err_num += 1
time.sleep(3)
return -1

def get_token():
timeStamp = round(time.time() * 1000)
authSecret = 'testtest' + str(timeStamp)
authKey = hashlib.md5(authSecret.encode(encoding='UTF-8')).hexdigest()
auth_data = {'authKey': authKey, 'timeStamp': timeStamp}
url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/auth'
try:
t_response = requests.post(url=url, data=auth_data, headers=base_header).json()
token = t_response['params']['bussiness']
except:
return -1
return token

def get_check_pic(token):
url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/getCheckImage'
base_header['Accept'] = 'application/json, text/plain, */*'
base_header.update({'Content-Length': '0', 'token': token})
try:
p_request = requests.post(url=url, data='', headers=base_header).json()
p_uuid = p_request['params']['uuid']
big_image = p_request['params']['bigImage']
small_image = p_request['params']['smallImage']
except:
return -1
# 解码图片,写入并计算图片缺口位置
with open('bigImage.jpg', 'wb') as f:
f.write(base64.b64decode(big_image))
with open('smallImage.jpg', 'wb') as f:
f.write(base64.b64decode(small_image))
background_image = cv2.imread('bigImage.jpg', cv2.COLOR_GRAY2RGB)
fill_image = cv2.imread('smallImage.jpg', cv2.COLOR_GRAY2RGB)
position_match = cv2.matchTemplate(background_image, fill_image, cv2.TM_CCOEFF_NORMED)
max_loc = cv2.minMaxLoc(position_match)[3][0]
mouse_length = max_loc + 1
os.remove('bigImage.jpg')
os.remove('smallImage.jpg')
check_data = {'key': p_uuid, 'value': mouse_length}
return check_data

def get_sign(check_data, token):
check_url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/checkImage'
base_header.update({'Content-Length': '60', 'token': token, 'Content-Type': 'application/json'})
try:
pic_sign = requests.post(check_url, json=check_data, headers=base_header).json()
sign = pic_sign['params']
except:
return -1
return sign

def get_beian_info(info_data, p_uuid, token, sign):
domain_list = []
info_url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/icpAbbreviateInfo/queryByCondition'
base_header.update({'Content-Length': '78', 'uuid': p_uuid, 'token': token, 'sign': sign})
try:
beian_info = requests.post(url=info_url, json=info_data, headers=base_header).json()
if not beian_info["success"]:
print(f'请求错误: CODE {beian_info["code"]} MSG {beian_info["msg"]}')
return domain_list
domain_total = beian_info['params']['total']
page_total = beian_info['params']['lastPage']
end_row = beian_info['params']['endRow']
info = info_data['unitName']
print(f"\n查询对象:{info} 共有 {domain_total} 个已备案域名\n")
for i in range(0, page_total):
print(f"正在查询第{i+1}页……\n")
for k in range(0, end_row + 1):
info_base = beian_info['params']['list'][k]
domain_name = info_base['domain']
domain_type = info_base['natureName']
domain_licence = info_base['mainLicence']
website_licence = info_base['serviceLicence']
domain_status = info_base['limitAccess']
domain_approve_date = info_base['updateRecordTime']
domain_owner = info_base['unitName']
try:
domain_content_approved = info_base['contentTypeName']
if domain_content_approved == "":
domain_content_approved = "无"
except KeyError:
domain_content_approved = "无"
row_data = domain_owner, domain_name, domain_licence, website_licence, domain_type, domain_content_approved, domain_status, domain_approve_date
domain_list.append(row_data)
info_data = {'pageNum': i + 2, 'pageSize': '40', 'unitName': info}
if beian_info['params']['isLastPage'] is True:
break
else:
beian_info = requests.post(info_url, json=info_data, headers=base_header).json()
end_row = beian_info['params']['endRow']
time.sleep(3)
except Exception as e:
print(f"意外错误: {e}")
return domain_list
return domain_list

# 修改 data_saver 函数,添加 target 参数,将 target 作为工作表名
def data_saver(domain_list, target):
total_row = len(domain_list)
if total_row == 1:
total_row = 0
elif total_row == 0:
return print("所查域名无备案\n")
print(f"查询结果如下:\n\n{domain_list}\n")
if os.name == "nt":
import winreg
subkey = r'Software\Microsoft\Windows\CurrentVersion\Explorer\User Shell Folders'
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, subkey, 0)
desktop_raw = str(winreg.QueryValueEx(key, "Desktop")[0])
if desktop_raw == "%USERPROFILE%\Desktop":
subkey = r'Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders'
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, subkey, 0)
desktop_raw = str(winreg.QueryValueEx(key, "Desktop")[0])
desktop_path = desktop_raw.replace('\\', '/') + "/"
file_path = f"{desktop_path}备案信息.xlsx"
else:
file_path = './备案信息.xlsx'
if os.path.exists(file_path):
wb = xl.load_workbook(file_path)
else:
wb = xl.Workbook()
ws = wb.create_sheet(target)
title_list = ['域名主办方', '域名', '备案许可证号', '网站备案号', '域名类型', '网站前置审批项', '是否限制接入', '审核通过日期']
for i in range(0, 8):
ws.cell(1, i + 1).value = title_list[i]
col_width = {'A': 45, 'B': 40, 'C': 22, 'D': 24, 'E': 9, 'F': 15, 'G': 13, 'H': 21}
for k, v in col_width.items():
ws.column_dimensions[k].width = v
ws.freeze_panes = 'A2'
start = 0
after_title = 2
for j in range(start, total_row + 1):
for k in range(0, 8):
try:
ws.cell(j + after_title, k + 1).value = domain_list[j - start][k]
except:
continue
for row in range(ws.max_row):
for col in range(ws.max_column):
ws.cell(row + 1, col + 1).alignment = Alignment(horizontal='center', vertical='center')
try:
wb.save(file_path)
except PermissionError:
print("** 备案信息登记表格已打开,无法写入文件。如需写入,请关闭文件后重新执行! **\n")
return -1
print(f"查询结果保存在:{file_path}\n")
return 'OK'

if __name__ == '__main__':
main()

md跑一会就被ban了,上代理:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
import re
import os
import cv2
import time
import base64
import hashlib
import requests
import openpyxl as xl
import threading
from openpyxl.styles import Alignment

proxies = {
'http': 'http://username:passwd@x.x.x.x:port',
'https': 'http://username:passwd@x.x.x.x:port',
}

def read_targets_from_file(file_path='target.txt'):
with open(file_path, 'r', encoding='utf-8') as file:
targets = [line.strip() for line in file.readlines()]
return targets

def request_with_retry(method, url, proxies, timeout=10, retries=3, **kwargs):
while retries > 0:
try:
if method == 'get':
response = requests.get(url, timeout=timeout, proxies=proxies, **kwargs)
elif method == 'post':
response = requests.post(url, timeout=timeout, proxies=proxies, **kwargs)
else:
raise ValueError('Invalid method')
return response
except requests.exceptions.RequestException as e:
print(f"请求超时,正在重试: {e}")
retries -= 1
if retries == 0:
raise

def process_target(target):
print(f"正在查询:{target}")
cookie = get_cookies()
while True:
try:
global base_header
base_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32',
'Origin': 'https://beian.miit.gov.cn',
'Referer': 'https://beian.miit.gov.cn/',
'Cookie': f'__jsluid_s={cookie}'
}
if cookie != -1:
token = get_token()
if token != -1:
check_data = get_check_pic(token)
if check_data != -1:
sign = get_sign(check_data, token)
p_uuid = check_data['key']
if sign != -1:
info_data = query_base(target)
domain_list = get_beian_info(info_data, p_uuid, token, sign)
data_saver(domain_list, target)
break
else:
raise ValueError("获取Sign遇到错误,请重试!")
else:
raise ValueError("计算图片缺口位置错误,请重试!")
else:
raise ValueError("获取Token失败,如频繁失败请关闭程序后等待几分钟再试!")
else:
cookie = get_cookies()
raise ValueError("获取Cookie失败,请重试!")
except Exception as e:
print(f'{e}\n')
time.sleep(10) # 等待10秒后查询下一个目标

def main():
targets = read_targets_from_file()
threads = []

for target in targets:
t = threading.Thread(target=process_target, args=(target,))
threads.append(t)
t.start()

for t in threads:
t.join()

def query_base(target):
info = target.replace(" ", "").replace("https://www.", "").replace("http://www.", "").replace("http://", "")
info = re.sub("[^\\u4e00-\\u9fa5-A-Za-z0-9,-.()()]", "", info)
info_data = {'pageNum': '1', 'pageSize': '40', 'unitName': info}
return info_data

def get_cookies():
cookie_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32'}
err_num = 0
while err_num < 3:
try:
cookie = requests.utils.dict_from_cookiejar(request_with_retry('get', 'https://beian.miit.gov.cn/', headers=cookie_headers, proxies=proxies).cookies)['__jsluid_s']
return cookie
except:
err_num += 1
time.sleep(3)
return -1

def get_token():
timeStamp = round(time.time() * 1000)
authSecret = 'testtest' + str(timeStamp)
authKey = hashlib.md5(authSecret.encode(encoding='UTF-8')).hexdigest()
auth_data = {'authKey': authKey, 'timeStamp': timeStamp}
url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/auth'
try:
t_response = request_with_retry('post', url, data=auth_data, headers=base_header, proxies=proxies).json()
token = t_response['params']['bussiness']
except:
return -1
return token

def get_check_pic(token):
url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/getCheckImage'
base_header['Accept'] = 'application/json, text/plain, */*'
base_header.update({'Content-Length': '0', 'token': token})
try:
p_request = request_with_retry('post', url, data='', headers=base_header, proxies=proxies).json()
p_uuid = p_request['params']['uuid']
big_image = p_request['params']['bigImage']
small_image = p_request['params']['smallImage']
except:
return -1
# 解码图片,写入并计算图片缺口位置
with open('bigImage.jpg', 'wb') as f:
f.write(base64.b64decode(big_image))
with open('smallImage.jpg', 'wb') as f:
f.write(base64.b64decode(small_image))
background_image = cv2.imread('bigImage.jpg', cv2.COLOR_GRAY2RGB)
fill_image = cv2.imread('smallImage.jpg', cv2.COLOR_GRAY2RGB)
position_match = cv2.matchTemplate(background_image, fill_image, cv2.TM_CCOEFF_NORMED)
max_loc = cv2.minMaxLoc(position_match)[3][0]
mouse_length = max_loc + 1
os.remove('bigImage.jpg')
os.remove('smallImage.jpg')
check_data = {'key': p_uuid, 'value': mouse_length}
return check_data

def get_sign(check_data, token):
check_url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/checkImage'
base_header.update({'Content-Length': '60', 'token': token, 'Content-Type': 'application/json'})
try:
pic_sign = request_with_retry('post', check_url, json=check_data, headers=base_header, proxies=proxies).json()
sign = pic_sign['params']
except:
return -1
return sign

def get_beian_info(info_data, p_uuid, token, sign):
domain_list = []
info_url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/icpAbbreviateInfo/queryByCondition'
base_header.update({'Content-Length': '78', 'uuid': p_uuid, 'token': token, 'sign': sign})
try:
beian_info = request_with_retry('post', url=info_url, json=info_data, headers=base_header, proxies=proxies).json()
if not beian_info["success"]:
print(f'请求错误: CODE {beian_info["code"]} MSG {beian_info["msg"]}')
return domain_list
domain_total = beian_info['params']['total']
page_total = beian_info['params']['lastPage']
end_row = beian_info['params']['endRow']
info = info_data['unitName']
print(f"\n查询对象:{info} 共有 {domain_total} 个已备案域名\n")
for i in range(0, page_total):
print(f"正在查询第{i+1}页……\n")
for k in range(0, end_row + 1):
info_base = beian_info['params']['list'][k]
domain_name = info_base['domain']
domain_type = info_base['natureName']
domain_licence = info_base['mainLicence']
website_licence = info_base['serviceLicence']
domain_status = info_base['limitAccess']
domain_approve_date = info_base['updateRecordTime']
domain_owner = info_base['unitName']
try:
domain_content_approved = info_base['contentTypeName']
if domain_content_approved == "":
domain_content_approved = "无"
except KeyError:
domain_content_approved = "无"
row_data = domain_owner, domain_name, domain_licence, website_licence, domain_type, domain_content_approved, domain_status, domain_approve_date
domain_list.append(row_data)
info_data = {'pageNum': i + 2, 'pageSize': '40', 'unitName': info}
if beian_info['params']['isLastPage'] is True:
break
else:
beian_info = request_with_retry('post', info_url, json=info_data, headers=base_header, proxies=proxies).json()
end_row = beian_info['params']['endRow']
time.sleep(3)
except Exception as e:
print(f"意外错误: {e}")
return domain_list
return domain_list
def data_saver(domain_list, target):
total_row = len(domain_list)
if total_row == 1:
total_row = 0
elif total_row == 0:
return print("所查域名无备案\n")
print(f"查询结果如下:\n\n{domain_list}\n")
if os.name == "nt":
import winreg
subkey = r'Software\Microsoft\Windows\CurrentVersion\Explorer\User Shell Folders'
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, subkey, 0)
desktop_raw = str(winreg.QueryValueEx(key, "Desktop")[0])
if desktop_raw == "%USERPROFILE%\Desktop":
subkey = r'Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders'
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, subkey, 0)
desktop_raw = str(winreg.QueryValueEx(key, "Desktop")[0])
desktop_path = desktop_raw.replace('\\', '/') + "/"
file_path = f"{desktop_path}备案信息.xlsx"
else:
file_path = './备案信息.xlsx'
if os.path.exists(file_path):
wb = xl.load_workbook(file_path)
else:
wb = xl.Workbook()
ws = wb.create_sheet(target)
title_list = ['域名主办方', '域名', '备案许可证号', '网站备案号', '域名类型', '网站前置审批项', '是否限制接入', '审核通过日期']
for i in range(0, 8):
ws.cell(1, i + 1).value = title_list[i]
col_width = {'A': 45, 'B': 40, 'C': 22, 'D': 24, 'E': 9, 'F': 15, 'G': 13, 'H': 21}
for k, v in col_width.items():
ws.column_dimensions[k].width = v
ws.freeze_panes = 'A2'
start = 0
after_title = 2
for j in range(start, total_row + 1):
for k in range(0, 8):
try:
ws.cell(j + after_title, k + 1).value = domain_list[j - start][k]
except:
continue
for row in range(ws.max_row):
for col in range(ws.max_column):
ws.cell(row + 1, col + 1).alignment = Alignment(horizontal='center', vertical='center')
try:
wb.save(file_path)
except PermissionError:
print("** 备案信息登记表格已打开,无法写入文件。如需写入,请关闭文件后重新执行! **\n")
return -1
print(f"查询结果保存在:{file_path}\n")
return 'OK'

if __name__ == '__main__':
main()

使用的方式:

1.把所有目标公司的全称放在target.txt文件中

2.然后直接python3 ICP_checks.py

3.等待结果的生成,都在一个excel中的不同表格中

2.梳理全部域名

拿到的备案的文件了,直接一个脚本给把域名全部提取出来:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import openpyxl as xl

def extract_domains_from_excel(file_path='备案信息.xlsx'):
wb = xl.load_workbook(file_path)
domain_list = []
for sheet in wb.worksheets:
for row in sheet.iter_rows(min_row=2, min_col=2, max_col=2):
domain = row[0].value
if domain:
domain_list.append(domain)
return domain_list

def write_domains_to_file(domains, file_path='domain.txt'):
with open(file_path, 'w', encoding='utf-8') as file:
for domain in domains:
file.write(f'{domain}\n')

if __name__ == '__main__':
domains = extract_domains_from_excel()
write_domains_to_file(domains)
print(f"域名已提取并保存到 domain.txt\n")

python3 domain_get.py

3.fofa 梭哈下载

要配合本地fofa sdk进行下载

主要是两个命令:

先确认有多少资产,然后下载

1
2
command = f"fofa search 'domain=\"{domain}\"' --count"
command = f"fofa search 'domain=\"{domain}\"' -f ip,port,domain,link,title,certs_match,certs_expired --size {asset_count} --save {domain}.csv"

脚本批量下载:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import os
import subprocess

def read_domains_from_file(file_path='domain.txt'):
with open(file_path, 'r', encoding='utf-8') as file:
domains = [line.strip() for line in file.readlines()]
return domains

def get_asset_count(domain):
command = f"fofa search 'domain=\"{domain}\"' --count"
output = subprocess.check_output(command, shell=True).decode('utf-8').strip()
return int(output)

def run_fofa_command(domain, asset_count):
command = f"fofa search 'domain=\"{domain}\"' -f ip,port,domain,link,title,certs_match,certs_expired --size {asset_count} --save {domain}.csv"
subprocess.run(command, shell=True)

if __name__ == '__main__':
domains = read_domains_from_file()
for domain in domains:
print(f"正在处理域名:{domain}")
asset_count = get_asset_count(domain)
print(f"资产数量:{asset_count}")
if asset_count > 0:
run_fofa_command(domain, asset_count)
print(f"已保存 {domain}.csv\n")
else:
print(f"跳过 {domain},没有资产\n")

fofa search 'domain="chinamoney.com.cn"' -f ip,port,domain,link,title,certs_match,certs_expired --size 80 --save chinamoney.com.cn.csv

python3 domain_fofa.py

然后你就能拥有所有可以打可以提交的目标了

其他查询平台:

[xiaomi.com网站备案查询 , 站长工具](https://micp.chinaz.com/xiaomi.com)

icp备案查询-域名备案号批量查询工具-聚查网

chainz查询接口:

https://apidatav2.chinaz.com/single/newicp?key=xxxxx&domain=chinaz.com

其他综合工具

[GitHub - wgpsec/ENScan_GO: 一款基于各大企业信息API的工具,解决在遇到的各种针对国内企业信息收集难题。一键收集控股公司ICP备案、APP、小程序、微信公众号等信息聚合导出。](https://github.com/wgpsec/ENScan_GO)

GitHub - i11us0ry/AScan: 对/wgpsec/ENScan_GO的修改,只保留了爱企查接口,支持对外投资企业和子公司递归