Compare commits

...

10 Commits

Author SHA1 Message Date
openeuler-ci-bot
a580b7945a
!238 fix period task some bugs and fix env config
From: @tong_1001 
Reviewed-by: @hubin95 
Signed-off-by: @hubin95
2025-03-29 07:44:34 +00:00
shixuantong
675344a743 fix period task some bugs and fix env config 2025-03-29 14:39:29 +08:00
openeuler-ci-bot
d3fa09f916
!220 ai block io: exit when stage is not supported
From: @luckky7 
Reviewed-by: @znzjugod 
Signed-off-by: @znzjugod
2025-03-13 08:54:40 +00:00
luckky
cb7fc9462f ai block io: exit when stage is not supported 2025-03-13 16:25:00 +08:00
openeuler-ci-bot
89864b3d6d
!217 fix the sentryCollector service can't be stopped for a long time
From: @zhuofeng6 
Reviewed-by: @znzjugod 
Signed-off-by: @znzjugod
2025-03-13 03:20:11 +00:00
zhuofeng
a2367330fa
fix the sentryCollector service can't be stopped for a long
Signed-off-by: zhuofeng <1107893276@qq.com>
2025-03-13 02:08:40 +00:00
openeuler-ci-bot
4fd8aa3dea
!205 fix bug of ebpf and ai_block_io
From: @zhuofeng6 
Reviewed-by: @znzjugod 
Signed-off-by: @znzjugod
2025-02-14 06:34:51 +00:00
zhuofeng
95cc3d6f1c fix bug of ebpf and ai_block_io
Signed-off-by: zhuofeng <1107893276@qq.com>
2025-02-14 14:20:04 +08:00
openeuler-ci-bot
a5c71fe53f
!200 add avg_block_io and ai_block_io
From: @zhuofeng6 
Reviewed-by: @znzjugod 
Signed-off-by: @znzjugod
2025-01-26 06:26:05 +00:00
zhuofeng
68c319012a add avg_block_io and ai_block_io
Signed-off-by: zhuofeng <1107893276@qq.com>
2025-01-26 12:01:15 +08:00
8 changed files with 5336 additions and 1619 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,69 @@
From b1fdf6495d7f2a7afa313d1510cb8f65aa42c369 Mon Sep 17 00:00:00 2001
From: luckky <guodashun1@huawei.com>
Date: Thu, 13 Mar 2025 11:38:34 +0800
Subject: [PATCH] ai block io: exit when stage is not supported
---
.../ai_block_io/config_parser.py | 32 +++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/src/python/sentryPlugins/ai_block_io/config_parser.py b/src/python/sentryPlugins/ai_block_io/config_parser.py
index 1bbb609..612fe9f 100644
--- a/src/python/sentryPlugins/ai_block_io/config_parser.py
+++ b/src/python/sentryPlugins/ai_block_io/config_parser.py
@@ -32,6 +32,12 @@ ALL_STAGE_LIST = [
"rq_driver",
"bio",
]
+EBPF_STAGE_LIST = [
+ "wbt",
+ "rq_driver",
+ "bio",
+ "gettag"
+]
ALL_IOTPYE_LIST = ["read", "write"]
DISK_TYPE_MAP = {
0: "nvme_ssd",
@@ -312,15 +318,37 @@ class ConfigParser:
if len(stage_list) == 1 and stage_list[0] == "":
logging.critical("stage value not allow is empty, exiting...")
exit(1)
+
+ # check if kernel or ebpf is supported (code is from collector)
+ valid_stage_list = ALL_STAGE_LIST
+ base_path = '/sys/kernel/debug/block'
+ all_disk = []
+ for disk_name in os.listdir(base_path):
+ disk_path = os.path.join(base_path, disk_name)
+ blk_io_hierarchy_path = os.path.join(disk_path, 'blk_io_hierarchy')
+
+ if not os.path.exists(blk_io_hierarchy_path):
+ logging.warning("no blk_io_hierarchy directory found in %s, skipping.", disk_name)
+ continue
+
+ for file_name in os.listdir(blk_io_hierarchy_path):
+ if file_name == 'stats':
+ all_disk.append(disk_name)
+
+ if len(all_disk) == 0:
+ logging.debug("no blk_io_hierarchy disk, it is not lock-free collection")
+ valid_stage_list = EBPF_STAGE_LIST
+
if len(stage_list) == 1 and stage_list[0] == "default":
logging.warning(
"stage will enable default value: %s",
self.DEFAULT_CONF["common"]["stage"],
)
- self._conf["common"]["stage"] = ALL_STAGE_LIST
+ self._conf["common"]["stage"] = valid_stage_list
return
+
for stage in stage_list:
- if stage not in ALL_STAGE_LIST:
+ if stage not in valid_stage_list:
logging.critical(
"stage: %s is not valid stage, ai_block_io will exit...", stage
)
--
2.43.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,397 @@
From 480c0fc479ec882786cdb58d699cf84ce5995531 Mon Sep 17 00:00:00 2001
From: zhuofeng <zhuofeng2@huawei.com>
Date: Fri, 14 Feb 2025 09:42:27 +0800
Subject: [PATCH] fix bug of ebpf and ai_block_io
---
src/c/ebpf_collector/ebpf_collector.bpf.c | 357 ------------------
.../sentryPlugins/ai_block_io/detector.py | 3 +-
2 files changed, 2 insertions(+), 358 deletions(-)
diff --git a/src/c/ebpf_collector/ebpf_collector.bpf.c b/src/c/ebpf_collector/ebpf_collector.bpf.c
index 417618d..7a2f481 100644
--- a/src/c/ebpf_collector/ebpf_collector.bpf.c
+++ b/src/c/ebpf_collector/ebpf_collector.bpf.c
@@ -590,361 +590,4 @@ int kprobe_bio_endio(struct pt_regs *regs)
return 0;
}
-// start get_tag
-SEC("kprobe/blk_mq_get_tag")
-int kprobe_blk_mq_get_tag(struct pt_regs *regs)
-{
- u64 tagkey = bpf_get_current_task();
- u64 value = (u64)PT_REGS_PARM1(regs);
- (void)bpf_map_update_elem(&tag_args, &tagkey, &value, BPF_ANY);
-
- struct blk_mq_alloc_data *bd;
- struct request_queue *q;
- struct backing_dev_info *backing_dev_info;
- struct device *owner;
- dev_t devt;
- unsigned int cmd_flags = 0;
-
- bd = (struct blk_mq_alloc_data *)value;
- bpf_core_read(&q, sizeof(q), &bd->q);
- bpf_core_read(&backing_dev_info, sizeof(backing_dev_info), &q->backing_dev_info);
- bpf_core_read(&owner, sizeof(owner), &backing_dev_info->owner);
- bpf_core_read(&devt, sizeof(devt), &owner->devt);
- int major = MAJOR(devt);
- int first_minor = MINOR(devt);
-
- if (major == 0) {
- log_event(STAGE_GET_TAG, PERIOD_START, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_get_tag(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp, zero = {};
- init_io_counter(&zero, major, first_minor);
- counterp = bpf_map_lookup_elem(&tag_map, &tagkey);
- if (counterp) {
- return 0;
- }
- long err = bpf_map_update_elem(&tag_map, &tagkey, &zero, BPF_NOEXIST);
- if (err) {
- log_event(STAGE_GET_TAG, PERIOD_START, ERROR_UPDATE_FAIL);
- return 0;
- }
-
- u64 curr_start_range = zero.start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&tag_res, &key);
- if (!curr_data) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 0,
- .finish_over_time = 0,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
- } else {
- update_curr_data_in_start(curr_data, &params);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&tag_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&tag_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && key >= 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], 1);
- }
- }
- return 0;
-}
-
-// finish get_tag
-SEC("kretprobe/blk_mq_get_tag")
-int kretprobe_blk_mq_get_tag(struct pt_regs *regs)
-{
- u64 tagkey = bpf_get_current_task();
- u64 *tagargs = (u64 *)bpf_map_lookup_elem(&tag_args, &tagkey);
- if (tagargs == NULL) {
- bpf_map_delete_elem(&tag_args, &tagkey);
- return 0;
- }
-
- struct blk_mq_alloc_data *bd;
- struct request_queue *q;
- struct backing_dev_info *backing_dev_info;
- struct device *owner;
- dev_t devt;
- unsigned int cmd_flags = 0;
-
- bd = (struct blk_mq_alloc_data *)*tagargs;
- bpf_core_read(&q, sizeof(q), &bd->q);
- bpf_core_read(&backing_dev_info, sizeof(backing_dev_info), &q->backing_dev_info);
- bpf_core_read(&owner, sizeof(owner), &backing_dev_info->owner);
- bpf_core_read(&devt, sizeof(devt), &owner->devt);
- int major = MAJOR(devt);
- int first_minor = MINOR(devt);
-
- if (major == 0) {
- log_event(STAGE_GET_TAG, PERIOD_END, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_get_tag(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp = bpf_map_lookup_elem(&tag_map, &tagkey);
- if (!counterp) {
- return 0;
- }
-
- u64 duration = bpf_ktime_get_ns() - counterp->start_time;
- u64 curr_start_range = counterp->start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&tag_res, &key);
- if (curr_data == NULL && duration > DURATION_THRESHOLD) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 1,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
- } else if (curr_data == NULL) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 0,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
- } else {
- curr_data->duration += duration;
- update_curr_data_in_finish(curr_data, &params, duration);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&tag_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&tag_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
- }
- }
-
- bpf_map_delete_elem(&tag_map, &tagkey);
- bpf_map_delete_elem(&tag_args, &tagkey);
- return 0;
-}
-
-// start wbt
-SEC("kprobe/wbt_wait")
-int kprobe_wbt_wait(struct pt_regs *regs)
-{
- u64 wbtkey = bpf_get_current_task();
- u64 value = (u64)PT_REGS_PARM2(regs);
- (void)bpf_map_update_elem(&wbt_args, &wbtkey, &value, BPF_ANY);
-
- struct bio *bio;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags;
-
- bio = (struct bio *)value;
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &bio->bi_disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
- bpf_core_read(&cmd_flags, sizeof(cmd_flags), &bio->bi_opf);
-
- if (major == 0) {
- log_event(STAGE_WBT, PERIOD_START, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_wbt(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp, zero = {};
- init_io_counter(&zero, major, first_minor);
- counterp = bpf_map_lookup_elem(&wbt_map, &wbtkey);
- if (counterp) {
- return 0;
- }
- long err = bpf_map_update_elem(&wbt_map, &wbtkey, &zero, BPF_NOEXIST);
- if (err) {
- log_event(STAGE_WBT, PERIOD_START, ERROR_UPDATE_FAIL);
- return 0;
- }
-
- u64 curr_start_range = zero.start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&wbt_res, &key);
- if (!curr_data) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 0,
- .finish_over_time = 0,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
- } else {
- update_curr_data_in_start(curr_data, &params);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&wbt_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&wbt_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && key >= 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], 1);
- }
- }
- return 0;
-}
-
-// finish wbt
-SEC("kretprobe/wbt_wait")
-int kretprobe_wbt_wait(struct pt_regs *regs)
-{
- u64 wbtkey = bpf_get_current_task();
- u64 *wbtargs = (u64 *)bpf_map_lookup_elem(&wbt_args, &wbtkey);
- if (wbtargs == NULL) {
- bpf_map_delete_elem(&wbt_args, &wbtkey);
- return 0;
- }
-
- struct bio *bio;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags;
-
- bio = (struct bio *)(*wbtargs);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &bio->bi_disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
- bpf_core_read(&cmd_flags, sizeof(cmd_flags), &bio->bi_opf);
-
- if (major == 0) {
- log_event(STAGE_WBT, PERIOD_END, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_wbt(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp = bpf_map_lookup_elem(&wbt_map, &wbtkey);
- if (!counterp) {
- return 0;
- }
-
- u64 duration = bpf_ktime_get_ns() - counterp->start_time;
- u64 curr_start_range = counterp->start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&wbt_res, &key);
- if (curr_data == NULL && duration > DURATION_THRESHOLD) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 1,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
- } else if (curr_data == NULL) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 0,
- .duration = 0,
- .io_type = "",
- .major = major,
- .first_minor = first_minor,
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
- } else {
- curr_data->duration += duration;
- update_curr_data_in_finish(curr_data, &params, duration);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&wbt_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&wbt_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
- }
- }
- bpf_map_delete_elem(&wbt_map, &wbtkey);
- bpf_map_delete_elem(&wbt_args, &wbtkey);
- return 0;
-}
-
char _license[] SEC("license") = "GPL";
diff --git a/src/python/sentryPlugins/ai_block_io/detector.py b/src/python/sentryPlugins/ai_block_io/detector.py
index 27fb7f7..2688cb1 100644
--- a/src/python/sentryPlugins/ai_block_io/detector.py
+++ b/src/python/sentryPlugins/ai_block_io/detector.py
@@ -55,11 +55,12 @@ class Detector:
detection_result = self._slidingWindow.is_slow_io_event(metric_value)
# 检测到慢周期由Detector负责打印info级别日志
if detection_result[0][1]:
+ ai_threshold = "None" if detection_result[2] is None else round(detection_result[2], 3)
logging.info(f'[abnormal_period]: disk: {self._metric_name.disk_name}, '
f'stage: {self._metric_name.stage_name}, '
f'iotype: {self._metric_name.io_access_type_name}, '
f'type: {self._metric_name.metric_name}, '
- f'ai_threshold: {round(detection_result[2], 3)}, '
+ f'ai_threshold: {ai_threshold}, '
f'curr_val: {metric_value}')
else:
logging.debug(f'Detection result: {str(detection_result)}')
--
2.33.0

View File

@ -0,0 +1,56 @@
From 71fe4393402427b3fbcd147626406cbd70186046 Mon Sep 17 00:00:00 2001
From: shixuantong <shixuantong1@huawei.com>
Date: Sat, 29 Mar 2025 11:06:47 +0800
Subject: [PATCH] fix env_file and environ_conf
---
src/python/syssentry/global_values.py | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/src/python/syssentry/global_values.py b/src/python/syssentry/global_values.py
index 9c7800b..48a9f2d 100644
--- a/src/python/syssentry/global_values.py
+++ b/src/python/syssentry/global_values.py
@@ -75,6 +75,8 @@ class InspectTask:
self.onstart = False
# ccnfig env_file
self.env_file = ""
+ # env conf to popen arg
+ self.environ_conf = None
# start mode
self.conflict = "up"
# alarm id
@@ -112,7 +114,7 @@ class InspectTask:
logging.error("task %s log_file %s open failed", self.name, self.log_file)
logfile = subprocess.PIPE
try:
- child = subprocess.Popen(cmd_list, stdout=logfile, stderr=subprocess.STDOUT, close_fds=True)
+ child = subprocess.Popen(cmd_list, stdout=logfile, stderr=subprocess.STDOUT, close_fds=True, env=self.environ_conf)
except OSError:
logging.error("task %s start Popen error, invalid cmd", cmd_list)
self.result_info["result"] = ResultLevel.FAIL.name
@@ -199,7 +201,7 @@ class InspectTask:
return
# read config
- environ_conf = {}
+ self.environ_conf = dict(os.environ)
with open(self.env_file, 'r') as file:
for line in file:
line = line.strip()
@@ -210,11 +212,6 @@ class InspectTask:
if not key or not value:
logging.error("env_file = %s format is error, use default environ", self.env_file)
return
- environ_conf[key] = value
-
- # set environ
- for key, value in environ_conf.items():
- logging.debug("environ key=%s, value=%s", key, value)
- os.environ[key] = value
+ self.environ_conf[key] = value
logging.debug("the subprocess=[%s] begin to run", self.name)
--
2.27.0

View File

@ -0,0 +1,54 @@
From f2e384ea0cf6a323a41c293f981952b48ff3052f Mon Sep 17 00:00:00 2001
From: shixuantong <shixuantong1@huawei.com>
Date: Sat, 29 Mar 2025 10:50:47 +0800
Subject: [PATCH] fix period task some bugs
---
src/python/syssentry/cron_process.py | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/src/python/syssentry/cron_process.py b/src/python/syssentry/cron_process.py
index 5543d67..52e6e1f 100644
--- a/src/python/syssentry/cron_process.py
+++ b/src/python/syssentry/cron_process.py
@@ -59,7 +59,6 @@ class PeriodTask(InspectTask):
self.result_info["details"] = {}
if not self.period_enabled:
self.period_enabled = True
- self.upgrade_period_timestamp()
if self.conflict != 'up':
ret = self.check_conflict()
@@ -87,6 +86,7 @@ class PeriodTask(InspectTask):
self.runtime_status = FAILED_STATUS
return False, "period task start popen failed, invalid command"
finally:
+ self.upgrade_period_timestamp()
if isinstance(logfile, io.TextIOWrapper) and not logfile.closed:
logfile.close()
@@ -127,7 +127,6 @@ class PeriodTask(InspectTask):
res, _ = self.start()
if res:
set_runtime_status(self.name, RUNNING_STATUS)
- self.upgrade_period_timestamp()
def period_tasks_handle():
@@ -142,7 +141,7 @@ def period_tasks_handle():
logging.debug("period not enabled")
continue
- if not task.onstart:
+ if not task.onstart and task.last_exec_timestamp == 0:
logging.debug("period onstart not enabled, task: %s", task.name)
task.runtime_status = EXITED_STATUS
continue
@@ -153,4 +152,3 @@ def period_tasks_handle():
res, _ = task.start()
if res:
set_runtime_status(task.name, RUNNING_STATUS)
- task.upgrade_period_timestamp()
--
2.27.0

View File

@ -0,0 +1,61 @@
From 411e0fe141efdf02d73aa15c2576214af1be787e Mon Sep 17 00:00:00 2001
From: zhuofeng <1107893276@qq.com>
Date: Wed, 12 Mar 2025 02:27:12 +0000
Subject: [PATCH] fix the sentryCollector service can't be stopped for a long
time
Signed-off-by: zhuofeng <1107893276@qq.com>
---
src/python/sentryCollector/collect_io.py | 13 +++++++++++--
src/python/syssentry/global_values.py | 2 +-
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/src/python/sentryCollector/collect_io.py b/src/python/sentryCollector/collect_io.py
index 4cf6534..622e0b4 100644
--- a/src/python/sentryCollector/collect_io.py
+++ b/src/python/sentryCollector/collect_io.py
@@ -322,6 +322,8 @@ class CollectIo():
if curr_io_dump > 0:
logging.info(f"ebpf io_dump info : {disk_name}, {stage}, {io_type}, {curr_io_dump}")
IO_GLOBAL_DATA[disk_name][stage][io_type].insert(0, [curr_lat, curr_io_dump, curr_io_length, curr_iops])
+ if curr_lat > 0:
+ logging.info(f"ebpf info : {disk_name}, {stage}, {io_type}, {curr_lat}, {curr_iops}")
elapsed_time = time.time() - start_time
sleep_time = self.period_time - elapsed_time
@@ -405,10 +407,17 @@ class CollectIo():
self
) -> None:
global EBPF_PROCESS
- if EBPF_PROCESS:
+ if not EBPF_PROCESS:
+ logging.debug("No eBPF process to stop")
+ return
+ try:
EBPF_PROCESS.terminate()
+ EBPF_PROCESS.wait(timeout=3)
+ except subprocess.TimeoutExpired:
+ logging.debug("eBPF process did not exit within timeout. Forcing kill.")
+ EBPF_PROCESS.kill()
EBPF_PROCESS.wait()
- logging.info("ebpf collector thread exit")
+ logging.info("ebpf collector thread exit")
def main_loop(self):
global IO_GLOBAL_DATA
diff --git a/src/python/syssentry/global_values.py b/src/python/syssentry/global_values.py
index b123b2d..9c7800b 100644
--- a/src/python/syssentry/global_values.py
+++ b/src/python/syssentry/global_values.py
@@ -114,7 +114,7 @@ class InspectTask:
try:
child = subprocess.Popen(cmd_list, stdout=logfile, stderr=subprocess.STDOUT, close_fds=True)
except OSError:
- logging.error("task %s start Popen error, invalid cmd")
+ logging.error("task %s start Popen error, invalid cmd", cmd_list)
self.result_info["result"] = ResultLevel.FAIL.name
self.result_info["error_msg"] = RESULT_LEVEL_ERR_MSG_DICT.get(ResultLevel.FAIL.name)
self.runtime_status = "FAILED"
--
2.43.0

View File

@ -4,7 +4,7 @@
Summary: System Inspection Framework
Name: sysSentry
Version: 1.0.2
Release: 29
Release: 34
License: Mulan PSL v2
Group: System Environment/Daemons
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
@ -39,12 +39,19 @@ Patch26: hbm_online_repair-add-unload-driver.patch
Patch27: add-pyxalarm-and-pySentryNotify-add-multi-users-supp.patch
Patch28: adapt_5.10_kenel_for_syssentry.patch
Patch29: collect-module-adapt-to-the-5.10-kernel.patch
Patch30: add-avg_block_io-and-ai_block_io.patch
Patch31: fix-bug-of-ebpf-and-ai_block_io.patch
Patch32: fix-the-sentryCollector-service-can-t-be-stopped-for.patch
Patch33: ai-block-io-exit-when-stage-is-not-supported.patch
Patch34: fix-period-task-some-bugs.patch
Patch35: fix-env_file-and-environ_conf.patch
BuildRequires: cmake gcc-c++
BuildRequires: python3 python3-setuptools
BuildRequires: json-c-devel
BuildRequires: chrpath
BuildRequires: elfutils-devel clang libbpf-devel bpftool
BuildRequires: python3-numpy python3-pytest
Requires: libxalarm = %{version}
Requires: libbpf
@ -68,6 +75,39 @@ Provides: libxalarm-devel = %{version}
%description -n libxalarm-devel
This package provides developer tools for the libxalarm.
%package -n avg_block_io
Summary: Supports slow I/O detection
Requires: sysSentry = %{version}-%{release}
Requires: pysentry_notify = %{version}-%{release}
Requires: pysentry_collect = %{version}-%{release}
%description -n avg_block_io
This package provides Supports slow I/O detection based on EBPF
%package -n ai_block_io
Summary: Supports slow I/O detection
Requires: python3-numpy
Requires: sysSentry = %{version}-%{release}
Requires: pysentry_notify = %{version}-%{release}
Requires: pysentry_collect = %{version}-%{release}
%description -n ai_block_io
This package provides Supports slow I/O detection based on AI
%package -n pyxalarm
Summary: Supports xalarm api in python immplementation
Requires: sysSentry = %{version}-%{release}
%description -n pyxalarm
This package provides Supports xalarm api for users
%package -n pysentry_notify
Summary: Supports xalarm report in python immplementation
Requires: sysSentry = %{version}-%{release}
%description -n pysentry_notify
This package provides Supports xalarm report for plugins
%package -n cpu_sentry
Summary: CPU fault inspection program
Requires: procps-ng
@ -165,6 +205,14 @@ install src/c/hbm_online_repair/hbm_online_repair.env %{buildroot}/etc/sysconfig
chrpath -d %{buildroot}%{_bindir}/cat-cli
chrpath -d %{buildroot}%{_libdir}/libcpu_patrol.so
# avg_block_io
install config/tasks/avg_block_io.mod %{buildroot}/etc/sysSentry/tasks/
install config/plugins/avg_block_io.ini %{buildroot}/etc/sysSentry/plugins/avg_block_io.ini
# ai_block_io
install config/tasks/ai_block_io.mod %{buildroot}/etc/sysSentry/tasks/
install config/plugins/ai_block_io.ini %{buildroot}/etc/sysSentry/plugins/ai_block_io.ini
# logrotate
mkdir -p %{buildroot}%{_localstatedir}/lib/logrotate-syssentry
mkdir -p %{buildroot}%{_sysconfdir}/cron.hourly
@ -173,6 +221,8 @@ install -m 0500 src/sh/logrotate-sysSentry.cron %{buildroot}%{_sysconfdir}/cron.
pushd src/python
python3 setup.py install -O1 --root=$RPM_BUILD_ROOT --record=SENTRY_FILES
cat SENTRY_FILES | grep -v register_xalarm.* | grep -v sentry_notify.* > SENTRY_FILES.tmp
mv SENTRY_FILES.tmp SENTRY_FILES
popd
%pre
@ -221,6 +271,18 @@ rm -rf %{buildroot}
%attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysSentry/xalarm.conf
%attr(0600,root,root) %{_unitdir}/xalarmd.service
# avg block io
%exclude %{_sysconfdir}/sysSentry/tasks/avg_block_io.mod
%exclude %{_sysconfdir}/sysSentry/plugins/avg_block_io.ini
%exclude %{_bindir}/avg_block_io
%exclude %{python3_sitelib}/sentryPlugins/*
# ai_block_io
%exclude %{_sysconfdir}/sysSentry/tasks/ai_block_io.mod
%exclude %{_sysconfdir}/sysSentry/plugins/ai_block_io.ini
%exclude %{_bindir}/ai_block_io
%exclude %{python3_sitelib}/sentryPlugins/*
# sentryCollector
%attr(0550,root,root) %{_bindir}/sentryCollector
%attr(0600,root,root) %{_sysconfdir}/sysSentry/collector.conf
@ -248,6 +310,23 @@ rm -rf %{buildroot}
%exclude %{python3_sitelib}/syssentry/bmc_*
%exclude %{python3_sitelib}/syssentry/*/bmc_*
%files -n avg_block_io
%attr(0500,root,root) %{_bindir}/avg_block_io
%attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysSentry/tasks/avg_block_io.mod
%attr(0600,root,root) %{_sysconfdir}/sysSentry/plugins/avg_block_io.ini
%attr(0550,root,root) %{python3_sitelib}/sentryPlugins/avg_block_io
%files -n ai_block_io
%attr(0500,root,root) %{_bindir}/ai_block_io
%attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysSentry/tasks/ai_block_io.mod
%attr(0600,root,root) %{_sysconfdir}/sysSentry/plugins/ai_block_io.ini
%attr(0550,root,root) %{python3_sitelib}/sentryPlugins/ai_block_io
# hbm repair module
%exclude %{_sysconfdir}/sysSentry/tasks/hbm_online_repair.mod
%exclude %{python3_sitelib}/syssentry/bmc_*
%exclude %{python3_sitelib}/syssentry/*/bmc_*
%files -n libxalarm
%attr(0550,root,root) %{_libdir}/libxalarm.so
@ -256,6 +335,14 @@ rm -rf %{buildroot}
%attr(0550,root,root) %{_includedir}/xalarm
%attr(0550,root,root) %{_includedir}/xalarm/register_xalarm.h
%files -n pyxalarm
%attr(0550,root,root) %{python3_sitelib}/xalarm/register_xalarm.py
%attr(0550,root,root) %{python3_sitelib}/xalarm/__pycache__/register_xalarm*
%files -n pysentry_notify
%attr(0550,root,root) %{python3_sitelib}/xalarm/sentry_notify.py
%attr(0550,root,root) %{python3_sitelib}/xalarm/__pycache__/sentry_notify*
%files -n cpu_sentry
%attr(0500,root,root) %{_bindir}/cat-cli
%attr(0500,root,root) %{_bindir}/cpu_sentry
@ -275,6 +362,37 @@ rm -rf %{buildroot}
%attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin*
%changelog
* Sat Mar 29 2025 shixuantong <shixuantong1@huawei.com> - 1.0.2-34
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:fix period task some bugs
fix env_file and environ_conf
* Thu Mar 13 2025 luckky <guodashun1@huawei.com> - 1.0.2-33
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC: fix an issue with printing error
* Thu Mar 13 2025 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-32
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:fix the sentryCollector service can't be stopped for a long
* Fri Feb 14 2025 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-31
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:fix bug of ebpf and ai_block_io
* Sun Jan 26 2025 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-30
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:add avg_block_io and ai_block_io
* Sun Jan 26 2025 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-29
- Type:bugfix
- CVE:NA