蒙珣的博客

活好当下,做好今天该做的事情。

0%

Shell+Python监控脚本

Shell + Python 的监控脚本,以后还会出一些,写在这里用来拓宽监控思路和计算思路

因为每个监控脚本虽然大同小异,但是每个脚本之中都有特殊内容,是这个脚本的灵魂所在

监控AL小程序和DM

Shell监控部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#-----------------配置项-----------------
jmeterFile=zyzx_audio_2.jmx #jmeter文件

thread=(400) #并发数

# 小程序接口IP:PORT
audioIP="192.168.129.218:9800"

# 日志保存路径
logFile=/root/dengyuanjing/jmeterResult

# jmeter生成文件路径
jmeterCreatePath=/root/dengyuanjing/jmeter

# jmeter启动路径
jmeterStartPath=/root/dengyuanjing/apache-jmeter-5.4.1/bin
# 注意:小程序连接数需要监控多个进程,目前只能主动填写
# python打印的并发数是一个占位值,可以填写真实的值
#-----------------配置项-----------------

for i in ${thread[@]}
do

lastThread=`sed -n "21p" $jmeterCreatePath/$jmeterFile | awk -F ">" '{print $2}' | awk -F "<" '{print $1}'`
sed -i "21s/$lastThread/$i/g" $jmeterCreatePath/$jmeterFile

rm -rf $jmeterCreatePath/500.jtl
rm -rf $jmeterCreatePath/msgReport500

current_time=`date "+%Y-%m-%d_%H_%M_%S"`
echo "创建目录名称为:${logFile}/${current_time}"
echo "并发数:$i"

mkdir $logFile/$current_time/

nohup sh $jmeterStartPath/jmeter.sh -n -t $jmeterCreatePath/$jmeterFile -l $jmeterCreatePath/500.jtl -e -o $jmeterCreatePath/${current_time}_msgReport_${i} > $jmeterCreatePath/123.log 2>& 1 &

# DM_PID
DMservice_PID=`ps -ef | grep DMService.jar | grep -v grep | awk '{print $2}'`

# NLU_PID
NLU_PID=`ps -ef | grep NLUService | grep -v grep | awk '{print $2}'`

# 小程序PID
#audio_PID=(`ps -ef | grep audiolistening | grep -v grep | awk '{print $2}'`)
audio_PIDs=(`ps -ef | grep /usr/local/audiolistening-37/audiolistening | grep -v grep | awk '{print $2}'`)

# MySQL PID
mysql_PID=`ps -ef | grep "mysqld --character-set-server=utf8mb4" | awk '{print $2}'`

# redis PID
#redis_PID=`ps -ef | grep redis-server | awk '{print $2}'`

if [ ${audio_PIDs[0]} -gt ${audio_PIDs[1]} ]; then
audio_PID=${audio_PIDs[0]}
else
audio_PID=${audio_PIDs[1]}
fi

while true
do

jmeter_PID=`ps -ef | grep apache-jmeter-5.4.1/bin/jmeter.sh | grep -v grep | awk '{print $2}'`

if [ -n "$jmeter_PID" ]; then

# 监控小程序内存
#top -p $TTS_PID -n 1 -b |grep -vE "top|Tasks|Cpu|Mem|Swap|PID" |awk '{print $10}' |sed -n '2p' >> $logFile/$current_time/mem.txt
top -n 1 -b | grep $audio_PID | awk '{print $10}' >> $logFile/$current_time/AudioListenMem.txt

# 监控小程序CPU
#top -p $TTS_PID -n 1 -b |grep -vE "top|Tasks|Cpu|Mem|Swap|PID" |awk '{print $9}' |sed -n '2p' >> $logFile/$current_time/cpu.txt
top -n 1 -b | grep $audio_PID | awk '{print $9}' >> $logFile/$current_time/AudioListenCpu.txt

# 监控DM内存
top -n 1 -b | grep $DMservice_PID | awk '{print $10}' >> $logFile/$current_time/DMmem.txt

# 监控DMCPU
top -n 1 -b | grep $DMservice_PID | awk '{print $9}' >> $logFile/$current_time/DMcpu.txt

# 监控NLU CPU
top -n 1 -b | grep $NLU_PID | awk '{print $9}' >> $logFile/$current_time/NLUcpu.txt

# 监控DM内存
top -n 1 -b | grep $DMservice_PID | awk '{print $10}' >> $logFile/$current_time/NLUmem.txt

# 监控连接数DM
#netstat -antp |grep $DMservice_PID | grep -v "LISTEN" | wc -l >> $logFile/$current_time/DMthreadCount.txt
netstat -antp |grep $DMservice_PID | grep -v "LISTEN" | grep "ESTABLISHED" |grep 9002 |wc -l >> $logFile/$current_time/DMthreadCount.txt

# 监控连接数NLU
netstat -antp |grep $NLU_PID | grep -v "LISTEN" | grep "ESTABLISHED" |grep 9001 |wc -l >> $logFile/$current_time/NLUthreadCount.txt

# 监控连接数小程序
#netstat -antp |grep $audio_PID | grep -v "LISTEN" | grep "ESTABLISHED" |grep 9800 |wc -l >> $logFile/$current_time/AudioLinstening.txt

# netstat -antp |grep -E "137618|137619|137620|137621|137622|137623" | grep -v "LISTEN" | grep "ESTABLISHED" |grep 9800 |wc -l >> $logFile/$current_time/AudioLinstening.txt
# 监控小程序等待连接数
netstat -antp | grep -v "LISTEN" | grep $audioIP | grep "TIME_WAIT" | wc -l >> $logFile/$current_time/AudioLinstening_TIMEWAIT.txt
# 监控小程序 等待连接数 和 已连接数
netstat -antp | grep -v "LISTEN" | grep $audioIP | wc -l >> $logFile/$current_time/AudioLinstening.txt

# top 总cpu
top -n 1 -b | grep "%Cpu(s)" | awk '{print $2}' >> $logFile/$current_time/CPU.txt

# top 总内存
top -n 1 -b | grep "KiB Mem"| awk '{print $8}' >> $logFile/$current_time/MEM.txt

# MySQL CPU
top -n 1 -b | grep $mysql_PID | awk '{print $9}' >> $logFile/$current_time/MySQLcpu.txt

# MySQL MEM
top -n 1 -b | grep $mysql_PID | awk '{print $10}' >> $logFile/$current_time/MySQLmem.txt

# redis CPU
#top -n 1 -b | grep $redis_PID | awk '{print $9}' >> $logFile/$current_time/Redismem.txt

# redis mem
#top -n 1 -b | grep $redis_PID | awk '{print $10}' >> $logFile/$current_time/Redismem.txt

sleep 1

else
echo "监控结束,正在统计..."
break
fi
done

python3 calc_bak.py $logFile/$current_time/

#echo "threadCount.txt 连接数第一个值为`cat 222.txt | head -n 1`"

cat $logFile/$current_time/result.txt

echo ""

done

Python 计算部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import math,sys,os

"""
字体颜色
"""
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
END = '\033[0m '

"""
初始化日志
"""
def createLog():
with open(resultPath,'a+',encoding='utf-8') as f:
#f.write('并发次数为:'+str(thread_num))
f.write("\n======================================================\n")
f.write("%s\t%s\t%s\t%s\t%5s\t%5s\t%5s\n" %("type","Max","Min","Avg","PCT90","PCT95","PCT99"))

"""
获取路径所有文件
"""
def getFiles(PATH):
for root, dirs, files in os.walk(PATH):
return files

"""
运算90、95、99百分位
1.接收要统计的项目名称,接收完整的路径,接收排序好的数组
"""
def calc(proName,array):
percent1 = 99
percent2 = 95
percent3 = 90
a1 = percent1/100
a2 = percent2/100
a3 = percent3/100

num_count = len(array)

# 最大值、最小值、平均值
maxValue = round(array[-1],2)
mixValue = round(array[0],2)
averageValue = round(sum(array)/len(array),2)

# 90、95、99%
if math.ceil(num_count*a1) == num_count:
percent_99 = round(array[num_count-1],2)
else:
percent_99 = round(array[math.ceil(num_count*a1)],2)
if math.ceil(num_count*a2) == num_count:
percent_95 = round(array[num_count-1],2)
else:
percent_95 = round(array[math.ceil(num_count*a2)],2)
if math.ceil(num_count*a3) == num_count:
percent_90 = round(array[num_count-1],2)
else:
percent_90 = round(array[math.ceil(num_count*a3)],2)

# 将值存入字典
resultDict[proName] = [maxValue, mixValue, averageValue, percent_90, percent_95, percent_99]

#print("路径为:"+txtPath)
with open(resultPath,'a+',encoding='utf-8') as f:
f.write("%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n" %(proName,maxValue,mixValue,averageValue,percent_90,percent_95,percent_99))

return resultDict

"""
这段代码后期可以细化,重复代码可拿出来单写,return array

1.判断是buff/resp
2.读取buff/resp路径下的监控日志,排序后调用calc函数
"""

def solve():
for i in fileList:
array = []
logPath = PATH + i
# 读取监控日志内容,并写入列表
if os.path.getsize(logPath) == 0:
for k in range(6):
array.append(float(0))
array.sort()
proName = i.split(".")[0]
calc(proName,array)
else:
with open(logPath,'r+',encoding='utf-8') as f:
for line in f.readlines():
if len(line) >= 15:
print(bcolors.WARNING+"WARNING"+bcolors.END+logPath+logPath+"检测结果大于7位数,被抛弃")
continue
try:
array.append(float(line))
except:
print(bcolors.FAIL+"ERROR"+bcolors.END+logPath+"该值有问题,",line)
array.sort()
proName = i.split(".")[0]
calc(proName,array)

if __name__ == '__main__':
PATH = sys.argv[1]
#thread_num = sys.argv[2]
resultPath = PATH + 'result.txt'
#fileList = ["AudioListenCpu.txt","AudioListenMem.txt","DMmem.txt","DMcpu.txt","DMthreadCount.txt","NLUthreadCount.txt","AudioLinstening.txt"]

fileList = getFiles(PATH)
if "result.txt" in fileList:
fileList.remove("result.txt")

# 创建以文件名为键的字典
key = []
resultDict = {}
for i in fileList:
key.append(i.split(".")[0])
resultDict = dict.fromkeys(key)

# 执行程序
createLog()
solve()

"""
[[[项目index],[监控值index]],[[项目index],[监控值index]], ...]
需要显示的数据
0 最大值
1 最小值
2 平均值
3 90%
4 95%
5 99%

优化,输入要监控的项目和值,自动生成二维数组
"""
# values = [[Pdist['AudioListenCpu'][0], Pdist['AudioListenCpu'][1]], [Pdist['AudioListenMem'][1], Pdist['AudioListenMem'][2]]]
# print(*values)


print('===============================================')
# keys = ['AudioListenCpu', 'AudioListenMem']
# indexes = [[0, 1], [1, 2]]
# for key, idx in zip(keys, indexes):
# print(*(resultDict[key][i] for i in idx))

keys = ['DMthreadCount','AudioLinstening','NLUthreadCount','DMcpu','DMmem','AudioListenCpu','AudioListenMem','NLUcpu','NLUmem']
indexes = [[0,4], [0,4], [0,4], [0,4], [0], [4], [4], [4], [0]]
#print(resultDict)
result = []
for key, idx in zip(keys, indexes):
result.extend( (str(resultDict[key][i]) for i in idx))

for i in result:
print(i,end="\t")
print()
#print(result)
#p = " ".join(result)

TTS监控脚本

Shell监控部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/bin/bash

#------------------------跑之前需要修改的内容------------------------
thread=(100 100 100 100 100 100) # 需要跑的线程数
loop=3560 # 循环次数
model=buff # 需要跑响应时间写buff,需要跑合成时间写resp
voice_name=xiaonan # 指定音色
jar_tool=client_test.3.0.1.6.7.jar # 需要使用哪种jar包
port=8080 # TTS端口号
dir=/home/testuser/dyj/tts3.0_result/xingneng # 存放路径
#-----------------------------------------------------------------

#PATH=$PATH:/home/testuser/dyj/python/bin/
#export PATH=/home/testuser/dyj/python/bin/:$PATH

for i in ${thread[@]}
do

# 打印当前时间
date_time=`date "+%Y_%m_%d_%H-%M-%S"`
echo "当前时间目录为:$date_time"

# 查询TTS进程号,TTS目录需为apache-tomcat-TTS,否则更改grep
TTS_PID=`ps -ef | grep "apache-tomcat-tts" | grep -v grep | awk '{print $2}'`

# 创建文件夹
mkdir -p $dir/$date_time/"$i"_"$model"/

if [ $model == "buff" ]; then
nohup java -jar $jar_tool -h 127.0.0.1 -p $port -t socket -sample_rate 8000 -pt url -voice_name $voice_name -text_file file -thread $i -thread_count $loop -play 4 -log 18 -streaming 1 -format pcm -play_buflen 4 > $dir/$date_time/"$i"_"$model"/buff_java.txt 2>& 1 &

elif [ $model == "resp" ]; then
nohup java -jar $jar_tool -h 127.0.0.1 -p $port -t socket -sample_rate 8000 -pt url -voice_name $voice_name -text_file file -thread $i -thread_count $loop -play 0 -log 24 -streaming 0 -format pcm -play_buflen 4 > $dir/$date_time/"$i"_"$model"/resp_java.txt 2>& 1 &
else
echo "请输入buff/resp"
break
fi

echo "$model模式,$i并发客户端启动完成,正在监控CPU、MEM"

sleep 2s


# java -jar 命令执行结束会自动退出,如果没有退出说明正在执行
# 循环判断jar_PID进程是否存在,不存在跳出循环,每5s判断一次

while true
do
# 获取java -jar的PID
jar_PID=`ps -ef | grep $jar_tool | grep -v grep | awk '{print $2}'`

if [ -n "$jar_PID" ]; then
# 监控内存
#top -n 1 -b | grep "$TTS_PID " | awk '{print $10}' >> $dir/$date_time/"$i"_"$model"/mem.txt
top -p $TTS_PID -n 1 -b |grep -vE "top|Tasks|Cpu|Mem|Swap|PID" |awk '{print $10}' |sed -n '2p' >> $dir/$date_time/"$i"_"$model"/mem.txt

# 监控cpu
#top -n 1 -b | grep "$TTS_PID " | awk '{print $9}' >> $dir/$date_time/"$i"_"$model"/cpu.txt
top -p $TTS_PID -n 1 -b |grep -vE "top|Tasks|Cpu|Mem|Swap|PID" |awk '{print $9}' |sed -n '2p' >> $dir/$date_time/"$i"_"$model"/cpu.txt

sleep 1s
else
break
fi

done

echo "$model模式 $i并发客户端进程已结束,正在获取日志数据..."

if [ $model == "buff" ]; then
# 获取buff_time,并生成buff_time.txt文件
cat $dir/$date_time/"$i"_"$model"/buff_java.txt | grep "buffTime=" | awk '{print $5}' | awk -F "=" '{print $2}' | awk -F ";" '{print $1}' > $dir/$date_time/"$i"_"$model"/buff_time.txt
# 获取卡顿次数,并生成buff_jdTime.txt文件
cat $dir/$date_time/"$i"_"$model"/buff_java.txt | grep "jdTime" | wc -l > $dir/$date_time/"$i"_"$model"/buff_jdTimeNum.txt
jdTimeNum=`cat $dir/$date_time/"$i"_"$model"/buff_jdTimeNum.txt`
# 卡顿时长,需要看日志,目前没有,先不写
cat $dir/$date_time/"$i"_"$model"/buff_java.txt | grep "jdTime" | awk '{print $8}' > $dir/$date_time/"$i"_"$model"/buff_jdTime.txt
elif [ $model == "resp" ]; then
# 获取resp_time,并生成resp_time.txt文件
cat $dir/$date_time/"$i"_"$model"/resp_java.txt | grep "respTime=" | awk '{print $6}' | awk -F "=" '{print $2}' | awk -F ";" '{print $1}' > $dir/$date_time/"$i"_"$model"/resp_time.txt
# 获取卡顿次数,并生成resp_jdTime.txt文件
cat $dir/$date_time/"$i"_"$model"/resp_java.txt | grep "jdTime" | wc -l > $dir/$date_time/"$i"_"$model"/resp_jdTimeNum.txt
jdTimeNum=`cat $dir/$date_time/"$i"_"$model"/resp_jdTimeNum.txt`
# 卡顿时长,需要看日志,目前没有,先不写
cat $dir/$date_time/"$i"_"$model"/resp_java.txt | grep "jdTime" | awk '{print $8}' > $dir/$date_time/"$i"_"$model"/resp_jdTime.txt
fi

echo "$model模式 $i并发客户端日志获取完成,正在统计结果"
#if [ $model == "buff" ]; then
# cat $dir/$date_time/"$i"_"$model"/buff_jdTimeNum.txt
#elif [ $model == "resp" ]; then
# cat $dir/$date_time/"$i"_"$model"/resp_jdTimeNum.txt
#fi

python3 calc.py $dir/$date_time/"$i"_"$model"/ $model $i $jdTimeNum

echo ""
cat $dir/$date_time/"$i"_"$model"/result.txt
echo ""

done

Python计算部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from contextlib import nullcontext
import math
import sys,os

"""
字体颜色
"""
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
END = '\033[0m '

"""
初始化日志
"""
def createLog(jdTimeNum):
with open(resultPath,'a+',encoding='utf-8') as f:
f.write(model+'\t'+'并发次数为:'+str(thread_num)+'\t'+ '卡顿次数为:'+str(jdTimeNum))
f.write("\n======================================================\n")
f.write("%s\t%s\t%s\t%s\t%5s\t%5s\t%5s\n" %("type","Max","Min","Avg","PCT90","PCT95","PCT99"))

"""
运算90、95、99百分位
1.接收要统计的项目名称,接收完整的路径,接收排序好的数组
"""
def calc(proName,array):
percent1 = 99
percent2 = 95
percent3 = 90
a1 = percent1/100
a2 = percent2/100
a3 = percent3/100

num_count = len(array)

# 最大值、最小值、平均值
maxValue = array[-1]
mixValue = array[0]
averageValue = sum(array)/len(array)

# 90、95、99%
if math.ceil(num_count*a1) == num_count:
percent_99 = array[num_count-1]
else:
percent_99 = array[math.ceil(num_count*a1)]
if math.ceil(num_count*a2) == num_count:
percent_95 = array[num_count-1]
else:
percent_95 = array[math.ceil(num_count*a2)]
if math.ceil(num_count*a3) == num_count:
percent_90 = array[num_count-1]
else:
percent_90 = array[math.ceil(num_count*a3)]

#print("路径为:"+txtPath)
with open(resultPath,'a+',encoding='utf-8') as f:
f.write("%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n" %(proName,maxValue,mixValue,averageValue,percent_90,percent_95,percent_99))


def solve(i):
array = []
logPath = PATH + i
# 读取监控日志内容,并写入列表
if os.path.getsize(logPath) == 0: # 查看该文件是否大小为0,主要针对没有卡顿的情况
for k in range(6):
array.append(float(0))
array.sort()
proName = i.split(".")[0]
calc(proName,array)
else:
with open(logPath,'r+',encoding='utf-8') as f:
for line in f.readlines():
if line == "0.0":
print(bcolors.WARNING+"warning"+bcolors.END+logPath+"0.0的值已被抛弃") # 抛弃内存/cpu中0.0的异常值
continue
if len(line) >= 7:
print(bcolors.WARNING+"warning"+bcolors.END+logPath+"检测结果大于7位数,已抛弃") # 抛弃内存/cpu中过大异常值
continue
try:
array.append(float(line))
except:
print(bcolors.FAIL+"error"+bcolors.END+logPath+"该值有问题,",line)
array.sort()
proName = i.split(".")[0]
calc(proName,array)


"""
1.判断是buff/resp
2.读取buff/resp路径下的监控日志,排序后调用calc函数
"""
def buff_Or_resp():
if model == "buff":
buffList = ["buff_time.txt","buff_jdTime.txt","cpu.txt","mem.txt"]
for i in buffList:
solve(i)

elif model == "resp":
respList = ["resp_time.txt","resp_jdTime.txt","cpu.txt","mem.txt"]
for i in respList:
solve(i)
else:
print("Usage:python3 path buff/resp thread_num jdTimeNum")

if __name__ == '__main__':
PATH = sys.argv[1]
model = sys.argv[2]
thread_num = sys.argv[3]
jdTimeNum = sys.argv[4]
resultPath = PATH + 'result.txt'

createLog(jdTimeNum)
buff_Or_resp()