处理事件中的CVE

  • 我采用爬虫抓取公共漏洞库中的CVE信息
  • 同样的URL地址也不展示了
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    import requests
    import re

    class cve_spider(object):
    def __init__(self):
    self.url = "http://xxxxxx.org.cn/vulns?search_type=t_id&keyword={}&cvss_floor=&cvss_ceil=&pubtime_floor=&pubtime_ceil="
    self.headers = {"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"}
    def get_data(self,cve):
    self.url = self.url.format(cve)
    response = requests.get(self.url, headers=self.headers, timeout=5).content.decode("utf-8")
    return response

    def parse_url(self, response, cve):
    com_str = r"""<a href=.*\s*{}""".format(cve)
    # print(com_str)
    re_com = re.compile(com_str)
    ret = re_com.search(response)
    try:
    ret = re.search(r"\".*\"",ret.group())
    except:
    ret = None
    if ret:
    url = ret.group()
    else:
    url = None
    return url

    def parse_data(self,url):
    if url:
    url = "http://xxxxxx.org.cn{}".format(url[1:-1])
    else:
    return
    response = requests.get(url, headers=self.headers, timeout=10).content.decode("utf-8")
    bug_str = r"""<div class="row bug_article.*\s*.*\s*.*\s*</p>"""
    re_com = re.compile(bug_str)
    ret = re_com.search(response)
    ret = ret.group().split("<p class='pad30T pad30B mrg0B' style='word-wrap: break-word;'>")[1]
    ret = ret[25:-25]
    return ret

    def run(self, cve):
    response = self.get_data(cve)
    url = self.parse_url(response,cve)
    bug_data = self.parse_data(url)
    return bug_data

    if __name__ == "__main__":
    cve = "CVE-2017-9791"
    spider = cve_spider()
    data = spider.run(cve)
    print(data)

将数据与模板整合

  • 我列举了一部分数据的处理,当然还有一些我也没有处理到
  • 将模板的数据读取出来
  • 与分析的数据拼接
  • 将拼接完成的数据和图,写入预处理文件夹

处理DOC文档

  • 将预处理文件夹中的文件,读取出来,排版
  • 通过python-docx库,import docx
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    def add_breaks(document, is_page):
        """
        添加分页符,或 换行
        :param is_page: Ture,分页符,False,换行符
        :return None
        """
        paragraph = document.add_paragraph()
        run = paragraph.add_run()
        #换页只需要把docx.enum.text.WD_BREAK.PAGE作为唯一的参数传递给add_break
        #如果是换行的话就不需要使用docx.enum.text.WD_BREAK.PAGE参数
        if is_page:
            run.add_break(docx.enum.text.WD_BREAK.PAGE)
        else:
            run.add_break()

  • 接下来处理标题,小标题

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52

    def write_docx(document, write_context, istitle, str_position, font_name, font_size):
        """
        写入一字符串
        
        :param document: 文档对象
        :param write_context: 要写入的字符串
        :param istitle: 是否用标题样式
        :param str_position: 位置,1,左对齐,2,居中对齐,3,右对齐
        :param font_name: 使用什么字体
        :param font_size: 字体大小
        :return None
        """
        if istitle == 1:
            heading = document.add_heading()
            heading_format = heading.paragraph_format
            if str_position == 1:
                heading_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
            elif str_position == 2:
                heading_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
            elif str_position == 3:
                heading_format.alignment = WD_ALIGN_PARAGRAPH.RIGHT
            run = heading.add_run(write_context)
            run.font.name=font_name
            run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
            run.font.color.rgb = RGBColor(0,0,0)
            run.font.size = Pt(font_size)
        elif istitle == 2:
            paragraph = document.add_paragraph()
            paragraph_format = paragraph.paragraph_format
            paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
            run = paragraph.add_run(write_context)
            run.font.name=font_name
            run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
            run.font.color.rgb = RGBColor(0,0,0)
            run.font.size = Pt(font_size)
        else:
            paragraph = document.add_paragraph(write_context)
            paragraph_format = paragraph.paragraph_format
            paragraph_format.first_line_indent = 406400 # 首行缩进 2 个字符
            if str_position == 1:
                paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT # 左对齐
            elif str_position == 2:
                paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER # 居中对齐
            elif str_position == 3:
                paragraph_format.alignment = WD_ALIGN_PARAGRAPH.RIGHT # 右对齐
            elif str_position == 4:
                paragraph_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY # 两端对齐
            document.styles['Normal'].font.name = font_name # Normal 表示 正文
            document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
            document.styles['Normal'].font.size = Pt(font_size) # 12 == 小四
            document.styles['Normal'].font.color.rgb = RGBColor(0,0,0)
  • 处理正文

    1
    2
    3
    4
    5
    6
    7
    def write_normal_text(document, txt_path):
        """
        写入正文
        """
        data_list = read_txt(txt_path)
        for data in data_list:
            write_docx(document, data[:-1], 31u"宋体"12)
  • 处理图片

    1
    2
    3
    4
    5
    6
    7
    8
    9
    def write_img(document, img_path):
        paragraph = document.add_paragraph()
        paragraph_format = paragraph.paragraph_format
        paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
        run = paragraph.add_run()
        # inline_shape = document.paragraphs[1].add_run().add_picture('111.jpg') 
        inline_shape = run.add_picture(img_path) # 添加图像
        inline_shape.height = Cm(7.5)
        inline_shape.width = Cm(14)

处理CSV,以表格形式写入DOCX

  • 因为报告中,还有一些表,我处理成了CSV
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    def write_excle(document, data_list, excel_size, merge_cells_list):
        """
        将CSV文件读取出来,以表格的形式写入docx文档
        :param document: 文档对象
        :param data_list: 刚读取的CSV列表
        :param excel_size: 一个标注列宽的列表
        :merge_cells_list: 需要合并的单元格,为1的合并,其它需为0
        :return None
        """
        len_row = len(data_list)
        len_column = len(data_list[0])
        tb=document.add_table(rows=len_row-1,cols=len_column) # 创建一张 rows行 cols列 的表
        tb.add_row()
        s = 0
        for i in range(len_column):
            tb.cell(0,i).width=Cm(excel_size[s])
            tb.cell(0,i).text=data_list[0][i] #添加表头
            s += 1
        s = 0
        for row in range(1,len_row):
            for col in range(len_column):
                tb.cell(0,col).width=Cm(excel_size[s])
                s += 1
                tb.cell(row,col).text=data_list[row][col]
                for par in tb.cell(row,col).paragraphs:
                    for run in par.runs:
                        run.font.size = Pt(12)
                        run.font.name = u'宋体'
                        run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
            s = 0
        if merge_cells_list:
            for i in range(1,len(merge_cells_list)-1):
                for j in range(len(merge_cells_list[0])-1):
                    if merge_cells_list[i][j]==1 and merge_cells_list[i][j+1]==1:
                        tb.cell(i, j).merge(tb.cell(i, j+1))
                    elif merge_cells_list[i][j] == 1 and merge_cells_list[i+1][j]==1:
                        tb.cell(i, j).merge(tb.cell(i+1, j))
        tb.style='Table Grid'
        tb.autofit=True