处理事件中的CVE
- 我采用爬虫抓取公共漏洞库中的CVE信息
- 同样的URL地址也不展示了
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51import requests
import re
class cve_spider(object):
def __init__(self):
self.url = "http://xxxxxx.org.cn/vulns?search_type=t_id&keyword={}&cvss_floor=&cvss_ceil=&pubtime_floor=&pubtime_ceil="
self.headers = {"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"}
def get_data(self,cve):
self.url = self.url.format(cve)
response = requests.get(self.url, headers=self.headers, timeout=5).content.decode("utf-8")
return response
def parse_url(self, response, cve):
com_str = r"""<a href=.*\s*{}""".format(cve)
# print(com_str)
re_com = re.compile(com_str)
ret = re_com.search(response)
try:
ret = re.search(r"\".*\"",ret.group())
except:
ret = None
if ret:
url = ret.group()
else:
url = None
return url
def parse_data(self,url):
if url:
url = "http://xxxxxx.org.cn{}".format(url[1:-1])
else:
return
response = requests.get(url, headers=self.headers, timeout=10).content.decode("utf-8")
bug_str = r"""<div class="row bug_article.*\s*.*\s*.*\s*</p>"""
re_com = re.compile(bug_str)
ret = re_com.search(response)
ret = ret.group().split("<p class='pad30T pad30B mrg0B' style='word-wrap: break-word;'>")[1]
ret = ret[25:-25]
return ret
def run(self, cve):
response = self.get_data(cve)
url = self.parse_url(response,cve)
bug_data = self.parse_data(url)
return bug_data
if __name__ == "__main__":
cve = "CVE-2017-9791"
spider = cve_spider()
data = spider.run(cve)
print(data)
将数据与模板整合
- 我列举了一部分数据的处理,当然还有一些我也没有处理到
- 将模板的数据读取出来
- 与分析的数据拼接
- 将拼接完成的数据和图,写入预处理文件夹
处理DOC文档
- 将预处理文件夹中的文件,读取出来,排版
- 通过python-docx库,import docx
1
2
3
4
5
6
7
8
9
10
11
12
13
14def add_breaks(document, is_page):
"""
添加分页符,或 换行
:param is_page: Ture,分页符,False,换行符
:return None
"""
paragraph = document.add_paragraph()
run = paragraph.add_run()
#换页只需要把docx.enum.text.WD_BREAK.PAGE作为唯一的参数传递给add_break
#如果是换行的话就不需要使用docx.enum.text.WD_BREAK.PAGE参数
if is_page:
run.add_break(docx.enum.text.WD_BREAK.PAGE)
else:
run.add_break()
接下来处理标题,小标题
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def write_docx(document, write_context, istitle, str_position, font_name, font_size):
"""
写入一字符串
:param document: 文档对象
:param write_context: 要写入的字符串
:param istitle: 是否用标题样式
:param str_position: 位置,1,左对齐,2,居中对齐,3,右对齐
:param font_name: 使用什么字体
:param font_size: 字体大小
:return None
"""
if istitle == 1:
heading = document.add_heading()
heading_format = heading.paragraph_format
if str_position == 1:
heading_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
elif str_position == 2:
heading_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
elif str_position == 3:
heading_format.alignment = WD_ALIGN_PARAGRAPH.RIGHT
run = heading.add_run(write_context)
run.font.name=font_name
run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
run.font.color.rgb = RGBColor(0,0,0)
run.font.size = Pt(font_size)
elif istitle == 2:
paragraph = document.add_paragraph()
paragraph_format = paragraph.paragraph_format
paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = paragraph.add_run(write_context)
run.font.name=font_name
run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
run.font.color.rgb = RGBColor(0,0,0)
run.font.size = Pt(font_size)
else:
paragraph = document.add_paragraph(write_context)
paragraph_format = paragraph.paragraph_format
paragraph_format.first_line_indent = 406400 # 首行缩进 2 个字符
if str_position == 1:
paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT # 左对齐
elif str_position == 2:
paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER # 居中对齐
elif str_position == 3:
paragraph_format.alignment = WD_ALIGN_PARAGRAPH.RIGHT # 右对齐
elif str_position == 4:
paragraph_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY # 两端对齐
document.styles['Normal'].font.name = font_name # Normal 表示 正文
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
document.styles['Normal'].font.size = Pt(font_size) # 12 == 小四
document.styles['Normal'].font.color.rgb = RGBColor(0,0,0)处理正文
1
2
3
4
5
6
7def write_normal_text(document, txt_path):
"""
写入正文
"""
data_list = read_txt(txt_path)
for data in data_list:
write_docx(document, data[:-1], 3, 1, u"宋体", 12)处理图片
1
2
3
4
5
6
7
8
9def write_img(document, img_path):
paragraph = document.add_paragraph()
paragraph_format = paragraph.paragraph_format
paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = paragraph.add_run()
# inline_shape = document.paragraphs[1].add_run().add_picture('111.jpg')
inline_shape = run.add_picture(img_path) # 添加图像
inline_shape.height = Cm(7.5)
inline_shape.width = Cm(14)
处理CSV,以表格形式写入DOCX
- 因为报告中,还有一些表,我处理成了CSV
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39def write_excle(document, data_list, excel_size, merge_cells_list):
"""
将CSV文件读取出来,以表格的形式写入docx文档
:param document: 文档对象
:param data_list: 刚读取的CSV列表
:param excel_size: 一个标注列宽的列表
:merge_cells_list: 需要合并的单元格,为1的合并,其它需为0
:return None
"""
len_row = len(data_list)
len_column = len(data_list[0])
tb=document.add_table(rows=len_row-1,cols=len_column) # 创建一张 rows行 cols列 的表
tb.add_row()
s = 0
for i in range(len_column):
tb.cell(0,i).width=Cm(excel_size[s])
tb.cell(0,i).text=data_list[0][i] #添加表头
s += 1
s = 0
for row in range(1,len_row):
for col in range(len_column):
tb.cell(0,col).width=Cm(excel_size[s])
s += 1
tb.cell(row,col).text=data_list[row][col]
for par in tb.cell(row,col).paragraphs:
for run in par.runs:
run.font.size = Pt(12)
run.font.name = u'宋体'
run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
s = 0
if merge_cells_list:
for i in range(1,len(merge_cells_list)-1):
for j in range(len(merge_cells_list[0])-1):
if merge_cells_list[i][j]==1 and merge_cells_list[i][j+1]==1:
tb.cell(i, j).merge(tb.cell(i, j+1))
elif merge_cells_list[i][j] == 1 and merge_cells_list[i+1][j]==1:
tb.cell(i, j).merge(tb.cell(i+1, j))
tb.style='Table Grid'
tb.autofit=True