import os
import sys
import copy
#自动安装第三方模块
try:
import pandas as pd
except:
import os
os.system('pip install pandas') # 安装pandas模块,用于生成excel表格
import pandas as pd
try:
import openyxl
except:
os.system('pip install openpyxl') # 安装pandas依赖模块openpyxl
import openpyxl
IS_DEBUG = True
class PageOwnerParser(object):
def __init__(self, pageowner_file, ps_file, func):
self.pageowner_file = pageowner_file
self.ps_file = ps_file
self.func = func
self.path = os.getcwd()
def parsePageOwner(self):
with open(self.pageowner_file, "r") as f:
out_filter = []
out_filters = []
out_total = 0
out_filter_fusion_list = []
out_filter_fusion_dict = {}
segments = []
segment = []
# 1.分段分割
for line in f.readlines():
if (line == '\n'):
if(len(segment) != 0):
copy_segment = copy.copy(segment)
segments.append(copy_segment)
segment.clear()
else:
segment.append(line.strip())
#print("segments is ", segments)
# 2.遍历数组,搜集”关键字函数“中所在调用栈中的pid,times,page order。
# 以及统计所有调用栈的2^order * times累计总和(及page_owner中记录的所有page的大小总和)
for tmp_segment in segments:
# 2.1 解析当前调用栈分配次数
tmp_datas_line1 = tmp_segment[0].split()
times = tmp_datas_line1[0]
# 2.2 解析当前调用栈pid
tmp_datas_line2 = tmp_segment[1].split()
tmp_datas_line2_len = len(tmp_datas_line2)
pid = tmp_datas_line2[tmp_datas_line2_len-1]
# 2.3 解析当前调用栈的阶
tmp_data_line2_order = tmp_segment[1].split(",")
tmp_data_line2_order_split = tmp_data_line2_order[0].split()
tmp_data_line2_order_split_len = len(tmp_data_line2_order_split)
order = tmp_data_line2_order_split[tmp_data_line2_order_split_len-1]
match_func = False
for tmp_line in tmp_segment:
if(self.func in tmp_line):
match_func = True
# 2.4 提取“关键字函数”相关段中的pid,order,times
if(match_func):
out_filter.append(pid)
out_filter.append(2**int(order) * int(times) * 4)
out_filters.append(copy.copy(out_filter))
out_filter.clear()
out_total = out_total + 2**int(order) * int(times) * 4
# 此时out_filters生成的数据格式如下:
# [SPID Sizes(Kb)]
# [2327 128]
# [2327 2048]
# 如上名为2327的线程有两条,也就是说有可能会会出现相同进程确因为内存分配调用栈不同而导致数据分散,或者因为Order不同而导致数据分散;
# 因此下面需要对数据进行整合
#print(out_filters)
print("stack_number is ", len(segments))
print("out_total is ", out_total)
# 3.数据融合,融合后数据格式如下:
# {SPID:Sizes(Kb)}
# {2327:128}
for original_tmp in out_filters:
if original_tmp[0] in out_filter_fusion_dict:
out_filter_fusion_dict[original_tmp[0]] = int(out_filter_fusion_dict[original_tmp[0]]) + int(original_tmp[1])
else:
out_filter_fusion_dict[original_tmp[0]] = int(original_tmp[1])
#print(out_filter_fusion_dict)
return out_filter_fusion_dict
'''
# 4.数据转换,从dict转换为list,为了方便写入到excel表格当中
for fusion_dict_tmp in out_filter_fusion_dict:
fusion_list_tmp = []
fusion_list_tmp.append(fusion_dict_tmp)
fusion_list_tmp.append(out_filter_fusion_dict[fusion_dict_tmp])
out_filter_fusion_list.append(copy.copy(fusion_list_tmp))
#print(out_filter_fusion_list)
return out_filter_fusion_list
'''
def parsePS(self):
with open(self.ps_file, "r") as f:
ps_lines = []
spid_dic = {}
pid_dic = {}
pid_cmdline_dic = {}
# 1.按行解析
for line in f.readlines():
ps_lines.append(line)
print(">>>>>> ps's lines is ", len(ps_lines))
# 删除前两行,如下:
# Thu Jan 1 08:45:08 CST 1970
# USER PID SPID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
ps_lines.remove(ps_lines[0])
ps_lines.remove(ps_lines[0])
# 2. 解析USER PID SPID CMDLINE,并以{SPID1:[USER1, PID1, CMDLINE1],...,SPIDx:[USERx, PIDx, CMDLINEx]]
for line_new in ps_lines:
elements_ps = line_new.split()
spid = elements_ps[2]
pid = elements_ps[1]
cmdlie = elements_ps[11]
#2.1 解析pid,以及对应的cmdline,格式形如{pid:cmdline}
if pid not in pid_cmdline_dic:
pid_cmdline_dic[pid] = elements_ps[11]
#2.2 解析pid,以及对应的spid,格式形如{pid:[spid1,spid2,spid3]}
if pid in pid_dic:
pid_dic[pid].append(elements_ps[2])
else:
pid_info = []
pid_info.append(elements_ps[2])
pid_dic[pid] = copy.copy(pid_info)
#2.3 解析spid对应的pid,user,command,,格式形如{spid:[user,pid,command]}
if spid in spid_dic:
spid_dic[spid].append(elements_ps[0])
spid_dic[spid].append(elements_ps[1])
spid_dic[spid].append(elements_ps[11])
else:
spid_info = []
spid_info.append(elements_ps[0])
spid_info.append(elements_ps[1])
spid_info.append(elements_ps[11])
spid_dic[spid] = copy.copy(spid_info)
print(">>>>>> spid_dic's number is ", len(spid_dic))
#print(spid_dic)
print(">>>>>> pid_dic's lines is ", len(pid_dic))
#print(pid_dic)
return spid_dic, pid_dic, pid_cmdline_dic
def calculatePIDMemSize(self, spid_size_dic, spid_dic, pid_dic, pid_cmdline_dic):
pid_statistic = []
pid_statistic_list = []
pid_size_dic = {}
# 未知进程用“unknown”表示
unknown = 0
# 1.遍历spid_size_dic获取spid, 以及spid的size
for spid in spid_size_dic:
spid_size = spid_size_dic[spid]
# spid_dic内容为:{spid:[user,pid,command]}
# pid_dic内容为:{pid:[spid1,spid2,spid3]}
if spid in spid_dic:
# 2.得到spid_dic中对应spid的pid
pid_spid = spid_dic[spid][1]
if pid_spid in pid_size_dic:
pid_size_dic[pid_spid] = pid_size_dic[pid_spid] + spid_size
else:
pid_size_dic[pid_spid] = spid_size
else:
unknown = unknown + spid_size
pid_size_dic['unknown'] = unknown
# 3.生成最终数据,数据格式�

高桐@BILL
- 粉丝: 4445
最新资源
- 软件测试工程师管理系统需求分析.doc
- 2022年计算机等级考试真题分析.doc
- 算法简介及程序的基本结构.pptx
- 如何做好网络时代8090员工的思想工作.ppt
- 球墨铸铁给水管施工方法胶圈接口.doc
- 基于单片机智能粮仓控制系统的研究.doc
- MATLAB蛛网模型.doc
- 基于PLC自动换刀系统设计说明书.doc
- (源码)基于Arduino的代码实验项目 Tinkercad Arduino.zip
- 女士相亲网站自我介绍.doc
- 中小型企业网络工程设计方案--OKK.doc
- 学生成绩管理系统c语言代码.doc
- 信息化管理与运作课件.ppt
- 项目管理系统培训材料v2.pptx
- 通信专业技术工作总结.doc
- 高校房屋修缮类修购专项项目管理探索.doc
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈



- 1
- 2
- 3
前往页