Compare commits
10 Commits
ced9047bda
...
a580b7945a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a580b7945a | ||
|
|
675344a743 | ||
|
|
d3fa09f916 | ||
|
|
cb7fc9462f | ||
|
|
89864b3d6d | ||
|
|
a2367330fa | ||
|
|
4fd8aa3dea | ||
|
|
95cc3d6f1c | ||
|
|
a5c71fe53f | ||
|
|
68c319012a |
2962
add-avg_block_io-and-ai_block_io.patch
Normal file
2962
add-avg_block_io-and-ai_block_io.patch
Normal file
File diff suppressed because it is too large
Load Diff
69
ai-block-io-exit-when-stage-is-not-supported.patch
Normal file
69
ai-block-io-exit-when-stage-is-not-supported.patch
Normal file
@ -0,0 +1,69 @@
|
||||
From b1fdf6495d7f2a7afa313d1510cb8f65aa42c369 Mon Sep 17 00:00:00 2001
|
||||
From: luckky <guodashun1@huawei.com>
|
||||
Date: Thu, 13 Mar 2025 11:38:34 +0800
|
||||
Subject: [PATCH] ai block io: exit when stage is not supported
|
||||
|
||||
---
|
||||
.../ai_block_io/config_parser.py | 32 +++++++++++++++++--
|
||||
1 file changed, 30 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/python/sentryPlugins/ai_block_io/config_parser.py b/src/python/sentryPlugins/ai_block_io/config_parser.py
|
||||
index 1bbb609..612fe9f 100644
|
||||
--- a/src/python/sentryPlugins/ai_block_io/config_parser.py
|
||||
+++ b/src/python/sentryPlugins/ai_block_io/config_parser.py
|
||||
@@ -32,6 +32,12 @@ ALL_STAGE_LIST = [
|
||||
"rq_driver",
|
||||
"bio",
|
||||
]
|
||||
+EBPF_STAGE_LIST = [
|
||||
+ "wbt",
|
||||
+ "rq_driver",
|
||||
+ "bio",
|
||||
+ "gettag"
|
||||
+]
|
||||
ALL_IOTPYE_LIST = ["read", "write"]
|
||||
DISK_TYPE_MAP = {
|
||||
0: "nvme_ssd",
|
||||
@@ -312,15 +318,37 @@ class ConfigParser:
|
||||
if len(stage_list) == 1 and stage_list[0] == "":
|
||||
logging.critical("stage value not allow is empty, exiting...")
|
||||
exit(1)
|
||||
+
|
||||
+ # check if kernel or ebpf is supported (code is from collector)
|
||||
+ valid_stage_list = ALL_STAGE_LIST
|
||||
+ base_path = '/sys/kernel/debug/block'
|
||||
+ all_disk = []
|
||||
+ for disk_name in os.listdir(base_path):
|
||||
+ disk_path = os.path.join(base_path, disk_name)
|
||||
+ blk_io_hierarchy_path = os.path.join(disk_path, 'blk_io_hierarchy')
|
||||
+
|
||||
+ if not os.path.exists(blk_io_hierarchy_path):
|
||||
+ logging.warning("no blk_io_hierarchy directory found in %s, skipping.", disk_name)
|
||||
+ continue
|
||||
+
|
||||
+ for file_name in os.listdir(blk_io_hierarchy_path):
|
||||
+ if file_name == 'stats':
|
||||
+ all_disk.append(disk_name)
|
||||
+
|
||||
+ if len(all_disk) == 0:
|
||||
+ logging.debug("no blk_io_hierarchy disk, it is not lock-free collection")
|
||||
+ valid_stage_list = EBPF_STAGE_LIST
|
||||
+
|
||||
if len(stage_list) == 1 and stage_list[0] == "default":
|
||||
logging.warning(
|
||||
"stage will enable default value: %s",
|
||||
self.DEFAULT_CONF["common"]["stage"],
|
||||
)
|
||||
- self._conf["common"]["stage"] = ALL_STAGE_LIST
|
||||
+ self._conf["common"]["stage"] = valid_stage_list
|
||||
return
|
||||
+
|
||||
for stage in stage_list:
|
||||
- if stage not in ALL_STAGE_LIST:
|
||||
+ if stage not in valid_stage_list:
|
||||
logging.critical(
|
||||
"stage: %s is not valid stage, ai_block_io will exit...", stage
|
||||
)
|
||||
--
|
||||
2.43.0
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
397
fix-bug-of-ebpf-and-ai_block_io.patch
Normal file
397
fix-bug-of-ebpf-and-ai_block_io.patch
Normal file
@ -0,0 +1,397 @@
|
||||
From 480c0fc479ec882786cdb58d699cf84ce5995531 Mon Sep 17 00:00:00 2001
|
||||
From: zhuofeng <zhuofeng2@huawei.com>
|
||||
Date: Fri, 14 Feb 2025 09:42:27 +0800
|
||||
Subject: [PATCH] fix bug of ebpf and ai_block_io
|
||||
|
||||
---
|
||||
src/c/ebpf_collector/ebpf_collector.bpf.c | 357 ------------------
|
||||
.../sentryPlugins/ai_block_io/detector.py | 3 +-
|
||||
2 files changed, 2 insertions(+), 358 deletions(-)
|
||||
|
||||
diff --git a/src/c/ebpf_collector/ebpf_collector.bpf.c b/src/c/ebpf_collector/ebpf_collector.bpf.c
|
||||
index 417618d..7a2f481 100644
|
||||
--- a/src/c/ebpf_collector/ebpf_collector.bpf.c
|
||||
+++ b/src/c/ebpf_collector/ebpf_collector.bpf.c
|
||||
@@ -590,361 +590,4 @@ int kprobe_bio_endio(struct pt_regs *regs)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-// start get_tag
|
||||
-SEC("kprobe/blk_mq_get_tag")
|
||||
-int kprobe_blk_mq_get_tag(struct pt_regs *regs)
|
||||
-{
|
||||
- u64 tagkey = bpf_get_current_task();
|
||||
- u64 value = (u64)PT_REGS_PARM1(regs);
|
||||
- (void)bpf_map_update_elem(&tag_args, &tagkey, &value, BPF_ANY);
|
||||
-
|
||||
- struct blk_mq_alloc_data *bd;
|
||||
- struct request_queue *q;
|
||||
- struct backing_dev_info *backing_dev_info;
|
||||
- struct device *owner;
|
||||
- dev_t devt;
|
||||
- unsigned int cmd_flags = 0;
|
||||
-
|
||||
- bd = (struct blk_mq_alloc_data *)value;
|
||||
- bpf_core_read(&q, sizeof(q), &bd->q);
|
||||
- bpf_core_read(&backing_dev_info, sizeof(backing_dev_info), &q->backing_dev_info);
|
||||
- bpf_core_read(&owner, sizeof(owner), &backing_dev_info->owner);
|
||||
- bpf_core_read(&devt, sizeof(devt), &owner->devt);
|
||||
- int major = MAJOR(devt);
|
||||
- int first_minor = MINOR(devt);
|
||||
-
|
||||
- if (major == 0) {
|
||||
- log_event(STAGE_GET_TAG, PERIOD_START, ERROR_MAJOR_ZERO);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- u32 key = find_matching_key_get_tag(major, first_minor);
|
||||
- if (key >= MAP_SIZE) {
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- struct io_counter *counterp, zero = {};
|
||||
- init_io_counter(&zero, major, first_minor);
|
||||
- counterp = bpf_map_lookup_elem(&tag_map, &tagkey);
|
||||
- if (counterp) {
|
||||
- return 0;
|
||||
- }
|
||||
- long err = bpf_map_update_elem(&tag_map, &tagkey, &zero, BPF_NOEXIST);
|
||||
- if (err) {
|
||||
- log_event(STAGE_GET_TAG, PERIOD_START, ERROR_UPDATE_FAIL);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- u64 curr_start_range = zero.start_time / THRESHOLD;
|
||||
-
|
||||
- struct update_params params = {
|
||||
- .major = major,
|
||||
- .first_minor = first_minor,
|
||||
- .cmd_flags = cmd_flags,
|
||||
- .curr_start_range = curr_start_range,
|
||||
- };
|
||||
-
|
||||
- struct stage_data *curr_data;
|
||||
- curr_data = bpf_map_lookup_elem(&tag_res, &key);
|
||||
- if (!curr_data) {
|
||||
- struct stage_data new_data = {
|
||||
- .start_count = 1,
|
||||
- .finish_count = 0,
|
||||
- .finish_over_time = 0,
|
||||
- .duration = 0,
|
||||
- .major = major,
|
||||
- .first_minor = first_minor,
|
||||
- .io_type = "",
|
||||
- };
|
||||
- blk_fill_rwbs(new_data.io_type, cmd_flags);
|
||||
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
|
||||
- } else {
|
||||
- update_curr_data_in_start(curr_data, ¶ms);
|
||||
- }
|
||||
-
|
||||
- struct time_range_io_count *curr_data_time_range;
|
||||
- curr_data_time_range = bpf_map_lookup_elem(&tag_res_2, &curr_start_range);
|
||||
- if (curr_data_time_range == NULL) {
|
||||
- struct time_range_io_count new_data = { .count = {0} };
|
||||
- bpf_map_update_elem(&tag_res_2, &curr_start_range, &new_data, 0);
|
||||
- } else {
|
||||
- if (key < MAP_SIZE && key >= 0) {
|
||||
- __sync_fetch_and_add(&curr_data_time_range->count[key], 1);
|
||||
- }
|
||||
- }
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-// finish get_tag
|
||||
-SEC("kretprobe/blk_mq_get_tag")
|
||||
-int kretprobe_blk_mq_get_tag(struct pt_regs *regs)
|
||||
-{
|
||||
- u64 tagkey = bpf_get_current_task();
|
||||
- u64 *tagargs = (u64 *)bpf_map_lookup_elem(&tag_args, &tagkey);
|
||||
- if (tagargs == NULL) {
|
||||
- bpf_map_delete_elem(&tag_args, &tagkey);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- struct blk_mq_alloc_data *bd;
|
||||
- struct request_queue *q;
|
||||
- struct backing_dev_info *backing_dev_info;
|
||||
- struct device *owner;
|
||||
- dev_t devt;
|
||||
- unsigned int cmd_flags = 0;
|
||||
-
|
||||
- bd = (struct blk_mq_alloc_data *)*tagargs;
|
||||
- bpf_core_read(&q, sizeof(q), &bd->q);
|
||||
- bpf_core_read(&backing_dev_info, sizeof(backing_dev_info), &q->backing_dev_info);
|
||||
- bpf_core_read(&owner, sizeof(owner), &backing_dev_info->owner);
|
||||
- bpf_core_read(&devt, sizeof(devt), &owner->devt);
|
||||
- int major = MAJOR(devt);
|
||||
- int first_minor = MINOR(devt);
|
||||
-
|
||||
- if (major == 0) {
|
||||
- log_event(STAGE_GET_TAG, PERIOD_END, ERROR_MAJOR_ZERO);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- u32 key = find_matching_key_get_tag(major, first_minor);
|
||||
- if (key >= MAP_SIZE) {
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- struct io_counter *counterp = bpf_map_lookup_elem(&tag_map, &tagkey);
|
||||
- if (!counterp) {
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- u64 duration = bpf_ktime_get_ns() - counterp->start_time;
|
||||
- u64 curr_start_range = counterp->start_time / THRESHOLD;
|
||||
-
|
||||
- struct update_params params = {
|
||||
- .major = major,
|
||||
- .first_minor = first_minor,
|
||||
- .cmd_flags = cmd_flags,
|
||||
- .curr_start_range = curr_start_range,
|
||||
- };
|
||||
-
|
||||
- struct stage_data *curr_data;
|
||||
- curr_data = bpf_map_lookup_elem(&tag_res, &key);
|
||||
- if (curr_data == NULL && duration > DURATION_THRESHOLD) {
|
||||
- struct stage_data new_data = {
|
||||
- .start_count = 1,
|
||||
- .finish_count = 1,
|
||||
- .finish_over_time = 1,
|
||||
- .duration = 0,
|
||||
- .major = major,
|
||||
- .first_minor = first_minor,
|
||||
- .io_type = "",
|
||||
- };
|
||||
- blk_fill_rwbs(new_data.io_type, cmd_flags);
|
||||
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
|
||||
- } else if (curr_data == NULL) {
|
||||
- struct stage_data new_data = {
|
||||
- .start_count = 1,
|
||||
- .finish_count = 1,
|
||||
- .finish_over_time = 0,
|
||||
- .duration = 0,
|
||||
- .major = major,
|
||||
- .first_minor = first_minor,
|
||||
- .io_type = "",
|
||||
- };
|
||||
- blk_fill_rwbs(new_data.io_type, cmd_flags);
|
||||
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
|
||||
- } else {
|
||||
- curr_data->duration += duration;
|
||||
- update_curr_data_in_finish(curr_data, ¶ms, duration);
|
||||
- }
|
||||
-
|
||||
- struct time_range_io_count *curr_data_time_range;
|
||||
- curr_data_time_range = bpf_map_lookup_elem(&tag_res_2, &curr_start_range);
|
||||
- if (curr_data_time_range == NULL) {
|
||||
- struct time_range_io_count new_data = { .count = {0} };
|
||||
- bpf_map_update_elem(&tag_res_2, &curr_start_range, &new_data, 0);
|
||||
- } else {
|
||||
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
|
||||
- __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- bpf_map_delete_elem(&tag_map, &tagkey);
|
||||
- bpf_map_delete_elem(&tag_args, &tagkey);
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-// start wbt
|
||||
-SEC("kprobe/wbt_wait")
|
||||
-int kprobe_wbt_wait(struct pt_regs *regs)
|
||||
-{
|
||||
- u64 wbtkey = bpf_get_current_task();
|
||||
- u64 value = (u64)PT_REGS_PARM2(regs);
|
||||
- (void)bpf_map_update_elem(&wbt_args, &wbtkey, &value, BPF_ANY);
|
||||
-
|
||||
- struct bio *bio;
|
||||
- struct gendisk *curr_rq_disk;
|
||||
- int major, first_minor;
|
||||
- unsigned int cmd_flags;
|
||||
-
|
||||
- bio = (struct bio *)value;
|
||||
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &bio->bi_disk);
|
||||
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
|
||||
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
|
||||
- bpf_core_read(&cmd_flags, sizeof(cmd_flags), &bio->bi_opf);
|
||||
-
|
||||
- if (major == 0) {
|
||||
- log_event(STAGE_WBT, PERIOD_START, ERROR_MAJOR_ZERO);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- u32 key = find_matching_key_wbt(major, first_minor);
|
||||
- if (key >= MAP_SIZE) {
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- struct io_counter *counterp, zero = {};
|
||||
- init_io_counter(&zero, major, first_minor);
|
||||
- counterp = bpf_map_lookup_elem(&wbt_map, &wbtkey);
|
||||
- if (counterp) {
|
||||
- return 0;
|
||||
- }
|
||||
- long err = bpf_map_update_elem(&wbt_map, &wbtkey, &zero, BPF_NOEXIST);
|
||||
- if (err) {
|
||||
- log_event(STAGE_WBT, PERIOD_START, ERROR_UPDATE_FAIL);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- u64 curr_start_range = zero.start_time / THRESHOLD;
|
||||
-
|
||||
- struct update_params params = {
|
||||
- .major = major,
|
||||
- .first_minor = first_minor,
|
||||
- .cmd_flags = cmd_flags,
|
||||
- .curr_start_range = curr_start_range,
|
||||
- };
|
||||
-
|
||||
- struct stage_data *curr_data;
|
||||
- curr_data = bpf_map_lookup_elem(&wbt_res, &key);
|
||||
- if (!curr_data) {
|
||||
- struct stage_data new_data = {
|
||||
- .start_count = 1,
|
||||
- .finish_count = 0,
|
||||
- .finish_over_time = 0,
|
||||
- .duration = 0,
|
||||
- .major = major,
|
||||
- .first_minor = first_minor,
|
||||
- .io_type = "",
|
||||
- };
|
||||
- blk_fill_rwbs(new_data.io_type, cmd_flags);
|
||||
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
|
||||
- } else {
|
||||
- update_curr_data_in_start(curr_data, ¶ms);
|
||||
- }
|
||||
-
|
||||
- struct time_range_io_count *curr_data_time_range;
|
||||
- curr_data_time_range = bpf_map_lookup_elem(&wbt_res_2, &curr_start_range);
|
||||
- if (curr_data_time_range == NULL) {
|
||||
- struct time_range_io_count new_data = { .count = {0} };
|
||||
- bpf_map_update_elem(&wbt_res_2, &curr_start_range, &new_data, 0);
|
||||
- } else {
|
||||
- if (key < MAP_SIZE && key >= 0) {
|
||||
- __sync_fetch_and_add(&curr_data_time_range->count[key], 1);
|
||||
- }
|
||||
- }
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-// finish wbt
|
||||
-SEC("kretprobe/wbt_wait")
|
||||
-int kretprobe_wbt_wait(struct pt_regs *regs)
|
||||
-{
|
||||
- u64 wbtkey = bpf_get_current_task();
|
||||
- u64 *wbtargs = (u64 *)bpf_map_lookup_elem(&wbt_args, &wbtkey);
|
||||
- if (wbtargs == NULL) {
|
||||
- bpf_map_delete_elem(&wbt_args, &wbtkey);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- struct bio *bio;
|
||||
- struct gendisk *curr_rq_disk;
|
||||
- int major, first_minor;
|
||||
- unsigned int cmd_flags;
|
||||
-
|
||||
- bio = (struct bio *)(*wbtargs);
|
||||
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &bio->bi_disk);
|
||||
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
|
||||
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
|
||||
- bpf_core_read(&cmd_flags, sizeof(cmd_flags), &bio->bi_opf);
|
||||
-
|
||||
- if (major == 0) {
|
||||
- log_event(STAGE_WBT, PERIOD_END, ERROR_MAJOR_ZERO);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- u32 key = find_matching_key_wbt(major, first_minor);
|
||||
- if (key >= MAP_SIZE) {
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- struct io_counter *counterp = bpf_map_lookup_elem(&wbt_map, &wbtkey);
|
||||
- if (!counterp) {
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- u64 duration = bpf_ktime_get_ns() - counterp->start_time;
|
||||
- u64 curr_start_range = counterp->start_time / THRESHOLD;
|
||||
-
|
||||
- struct update_params params = {
|
||||
- .major = major,
|
||||
- .first_minor = first_minor,
|
||||
- .cmd_flags = cmd_flags,
|
||||
- .curr_start_range = curr_start_range,
|
||||
- };
|
||||
-
|
||||
- struct stage_data *curr_data;
|
||||
- curr_data = bpf_map_lookup_elem(&wbt_res, &key);
|
||||
- if (curr_data == NULL && duration > DURATION_THRESHOLD) {
|
||||
- struct stage_data new_data = {
|
||||
- .start_count = 1,
|
||||
- .finish_count = 1,
|
||||
- .finish_over_time = 1,
|
||||
- .duration = 0,
|
||||
- .major = major,
|
||||
- .first_minor = first_minor,
|
||||
- .io_type = "",
|
||||
- };
|
||||
- blk_fill_rwbs(new_data.io_type, cmd_flags);
|
||||
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
|
||||
- } else if (curr_data == NULL) {
|
||||
- struct stage_data new_data = {
|
||||
- .start_count = 1,
|
||||
- .finish_count = 1,
|
||||
- .finish_over_time = 0,
|
||||
- .duration = 0,
|
||||
- .io_type = "",
|
||||
- .major = major,
|
||||
- .first_minor = first_minor,
|
||||
- };
|
||||
- blk_fill_rwbs(new_data.io_type, cmd_flags);
|
||||
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
|
||||
- } else {
|
||||
- curr_data->duration += duration;
|
||||
- update_curr_data_in_finish(curr_data, ¶ms, duration);
|
||||
- }
|
||||
-
|
||||
- struct time_range_io_count *curr_data_time_range;
|
||||
- curr_data_time_range = bpf_map_lookup_elem(&wbt_res_2, &curr_start_range);
|
||||
- if (curr_data_time_range == NULL) {
|
||||
- struct time_range_io_count new_data = { .count = {0} };
|
||||
- bpf_map_update_elem(&wbt_res_2, &curr_start_range, &new_data, 0);
|
||||
- } else {
|
||||
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
|
||||
- __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
|
||||
- }
|
||||
- }
|
||||
- bpf_map_delete_elem(&wbt_map, &wbtkey);
|
||||
- bpf_map_delete_elem(&wbt_args, &wbtkey);
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
char _license[] SEC("license") = "GPL";
|
||||
diff --git a/src/python/sentryPlugins/ai_block_io/detector.py b/src/python/sentryPlugins/ai_block_io/detector.py
|
||||
index 27fb7f7..2688cb1 100644
|
||||
--- a/src/python/sentryPlugins/ai_block_io/detector.py
|
||||
+++ b/src/python/sentryPlugins/ai_block_io/detector.py
|
||||
@@ -55,11 +55,12 @@ class Detector:
|
||||
detection_result = self._slidingWindow.is_slow_io_event(metric_value)
|
||||
# 检测到慢周期,由Detector负责打印info级别日志
|
||||
if detection_result[0][1]:
|
||||
+ ai_threshold = "None" if detection_result[2] is None else round(detection_result[2], 3)
|
||||
logging.info(f'[abnormal_period]: disk: {self._metric_name.disk_name}, '
|
||||
f'stage: {self._metric_name.stage_name}, '
|
||||
f'iotype: {self._metric_name.io_access_type_name}, '
|
||||
f'type: {self._metric_name.metric_name}, '
|
||||
- f'ai_threshold: {round(detection_result[2], 3)}, '
|
||||
+ f'ai_threshold: {ai_threshold}, '
|
||||
f'curr_val: {metric_value}')
|
||||
else:
|
||||
logging.debug(f'Detection result: {str(detection_result)}')
|
||||
--
|
||||
2.33.0
|
||||
|
||||
56
fix-env_file-and-environ_conf.patch
Normal file
56
fix-env_file-and-environ_conf.patch
Normal file
@ -0,0 +1,56 @@
|
||||
From 71fe4393402427b3fbcd147626406cbd70186046 Mon Sep 17 00:00:00 2001
|
||||
From: shixuantong <shixuantong1@huawei.com>
|
||||
Date: Sat, 29 Mar 2025 11:06:47 +0800
|
||||
Subject: [PATCH] fix env_file and environ_conf
|
||||
|
||||
---
|
||||
src/python/syssentry/global_values.py | 13 +++++--------
|
||||
1 file changed, 5 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/src/python/syssentry/global_values.py b/src/python/syssentry/global_values.py
|
||||
index 9c7800b..48a9f2d 100644
|
||||
--- a/src/python/syssentry/global_values.py
|
||||
+++ b/src/python/syssentry/global_values.py
|
||||
@@ -75,6 +75,8 @@ class InspectTask:
|
||||
self.onstart = False
|
||||
# ccnfig env_file
|
||||
self.env_file = ""
|
||||
+ # env conf to popen arg
|
||||
+ self.environ_conf = None
|
||||
# start mode
|
||||
self.conflict = "up"
|
||||
# alarm id
|
||||
@@ -112,7 +114,7 @@ class InspectTask:
|
||||
logging.error("task %s log_file %s open failed", self.name, self.log_file)
|
||||
logfile = subprocess.PIPE
|
||||
try:
|
||||
- child = subprocess.Popen(cmd_list, stdout=logfile, stderr=subprocess.STDOUT, close_fds=True)
|
||||
+ child = subprocess.Popen(cmd_list, stdout=logfile, stderr=subprocess.STDOUT, close_fds=True, env=self.environ_conf)
|
||||
except OSError:
|
||||
logging.error("task %s start Popen error, invalid cmd", cmd_list)
|
||||
self.result_info["result"] = ResultLevel.FAIL.name
|
||||
@@ -199,7 +201,7 @@ class InspectTask:
|
||||
return
|
||||
|
||||
# read config
|
||||
- environ_conf = {}
|
||||
+ self.environ_conf = dict(os.environ)
|
||||
with open(self.env_file, 'r') as file:
|
||||
for line in file:
|
||||
line = line.strip()
|
||||
@@ -210,11 +212,6 @@ class InspectTask:
|
||||
if not key or not value:
|
||||
logging.error("env_file = %s format is error, use default environ", self.env_file)
|
||||
return
|
||||
- environ_conf[key] = value
|
||||
-
|
||||
- # set environ
|
||||
- for key, value in environ_conf.items():
|
||||
- logging.debug("environ key=%s, value=%s", key, value)
|
||||
- os.environ[key] = value
|
||||
+ self.environ_conf[key] = value
|
||||
|
||||
logging.debug("the subprocess=[%s] begin to run", self.name)
|
||||
--
|
||||
2.27.0
|
||||
|
||||
54
fix-period-task-some-bugs.patch
Normal file
54
fix-period-task-some-bugs.patch
Normal file
@ -0,0 +1,54 @@
|
||||
From f2e384ea0cf6a323a41c293f981952b48ff3052f Mon Sep 17 00:00:00 2001
|
||||
From: shixuantong <shixuantong1@huawei.com>
|
||||
Date: Sat, 29 Mar 2025 10:50:47 +0800
|
||||
Subject: [PATCH] fix period task some bugs
|
||||
|
||||
---
|
||||
src/python/syssentry/cron_process.py | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/src/python/syssentry/cron_process.py b/src/python/syssentry/cron_process.py
|
||||
index 5543d67..52e6e1f 100644
|
||||
--- a/src/python/syssentry/cron_process.py
|
||||
+++ b/src/python/syssentry/cron_process.py
|
||||
@@ -59,7 +59,6 @@ class PeriodTask(InspectTask):
|
||||
self.result_info["details"] = {}
|
||||
if not self.period_enabled:
|
||||
self.period_enabled = True
|
||||
- self.upgrade_period_timestamp()
|
||||
|
||||
if self.conflict != 'up':
|
||||
ret = self.check_conflict()
|
||||
@@ -87,6 +86,7 @@ class PeriodTask(InspectTask):
|
||||
self.runtime_status = FAILED_STATUS
|
||||
return False, "period task start popen failed, invalid command"
|
||||
finally:
|
||||
+ self.upgrade_period_timestamp()
|
||||
if isinstance(logfile, io.TextIOWrapper) and not logfile.closed:
|
||||
logfile.close()
|
||||
|
||||
@@ -127,7 +127,6 @@ class PeriodTask(InspectTask):
|
||||
res, _ = self.start()
|
||||
if res:
|
||||
set_runtime_status(self.name, RUNNING_STATUS)
|
||||
- self.upgrade_period_timestamp()
|
||||
|
||||
|
||||
def period_tasks_handle():
|
||||
@@ -142,7 +141,7 @@ def period_tasks_handle():
|
||||
logging.debug("period not enabled")
|
||||
continue
|
||||
|
||||
- if not task.onstart:
|
||||
+ if not task.onstart and task.last_exec_timestamp == 0:
|
||||
logging.debug("period onstart not enabled, task: %s", task.name)
|
||||
task.runtime_status = EXITED_STATUS
|
||||
continue
|
||||
@@ -153,4 +152,3 @@ def period_tasks_handle():
|
||||
res, _ = task.start()
|
||||
if res:
|
||||
set_runtime_status(task.name, RUNNING_STATUS)
|
||||
- task.upgrade_period_timestamp()
|
||||
--
|
||||
2.27.0
|
||||
|
||||
61
fix-the-sentryCollector-service-can-t-be-stopped-for.patch
Normal file
61
fix-the-sentryCollector-service-can-t-be-stopped-for.patch
Normal file
@ -0,0 +1,61 @@
|
||||
From 411e0fe141efdf02d73aa15c2576214af1be787e Mon Sep 17 00:00:00 2001
|
||||
From: zhuofeng <1107893276@qq.com>
|
||||
Date: Wed, 12 Mar 2025 02:27:12 +0000
|
||||
Subject: [PATCH] fix the sentryCollector service can't be stopped for a long
|
||||
time
|
||||
|
||||
Signed-off-by: zhuofeng <1107893276@qq.com>
|
||||
---
|
||||
src/python/sentryCollector/collect_io.py | 13 +++++++++++--
|
||||
src/python/syssentry/global_values.py | 2 +-
|
||||
2 files changed, 12 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/python/sentryCollector/collect_io.py b/src/python/sentryCollector/collect_io.py
|
||||
index 4cf6534..622e0b4 100644
|
||||
--- a/src/python/sentryCollector/collect_io.py
|
||||
+++ b/src/python/sentryCollector/collect_io.py
|
||||
@@ -322,6 +322,8 @@ class CollectIo():
|
||||
if curr_io_dump > 0:
|
||||
logging.info(f"ebpf io_dump info : {disk_name}, {stage}, {io_type}, {curr_io_dump}")
|
||||
IO_GLOBAL_DATA[disk_name][stage][io_type].insert(0, [curr_lat, curr_io_dump, curr_io_length, curr_iops])
|
||||
+ if curr_lat > 0:
|
||||
+ logging.info(f"ebpf info : {disk_name}, {stage}, {io_type}, {curr_lat}, {curr_iops}")
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
sleep_time = self.period_time - elapsed_time
|
||||
@@ -405,10 +407,17 @@ class CollectIo():
|
||||
self
|
||||
) -> None:
|
||||
global EBPF_PROCESS
|
||||
- if EBPF_PROCESS:
|
||||
+ if not EBPF_PROCESS:
|
||||
+ logging.debug("No eBPF process to stop")
|
||||
+ return
|
||||
+ try:
|
||||
EBPF_PROCESS.terminate()
|
||||
+ EBPF_PROCESS.wait(timeout=3)
|
||||
+ except subprocess.TimeoutExpired:
|
||||
+ logging.debug("eBPF process did not exit within timeout. Forcing kill.")
|
||||
+ EBPF_PROCESS.kill()
|
||||
EBPF_PROCESS.wait()
|
||||
- logging.info("ebpf collector thread exit")
|
||||
+ logging.info("ebpf collector thread exit")
|
||||
|
||||
def main_loop(self):
|
||||
global IO_GLOBAL_DATA
|
||||
diff --git a/src/python/syssentry/global_values.py b/src/python/syssentry/global_values.py
|
||||
index b123b2d..9c7800b 100644
|
||||
--- a/src/python/syssentry/global_values.py
|
||||
+++ b/src/python/syssentry/global_values.py
|
||||
@@ -114,7 +114,7 @@ class InspectTask:
|
||||
try:
|
||||
child = subprocess.Popen(cmd_list, stdout=logfile, stderr=subprocess.STDOUT, close_fds=True)
|
||||
except OSError:
|
||||
- logging.error("task %s start Popen error, invalid cmd")
|
||||
+ logging.error("task %s start Popen error, invalid cmd", cmd_list)
|
||||
self.result_info["result"] = ResultLevel.FAIL.name
|
||||
self.result_info["error_msg"] = RESULT_LEVEL_ERR_MSG_DICT.get(ResultLevel.FAIL.name)
|
||||
self.runtime_status = "FAILED"
|
||||
--
|
||||
2.43.0
|
||||
|
||||
120
sysSentry.spec
120
sysSentry.spec
@ -4,7 +4,7 @@
|
||||
Summary: System Inspection Framework
|
||||
Name: sysSentry
|
||||
Version: 1.0.2
|
||||
Release: 29
|
||||
Release: 34
|
||||
License: Mulan PSL v2
|
||||
Group: System Environment/Daemons
|
||||
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
|
||||
@ -39,12 +39,19 @@ Patch26: hbm_online_repair-add-unload-driver.patch
|
||||
Patch27: add-pyxalarm-and-pySentryNotify-add-multi-users-supp.patch
|
||||
Patch28: adapt_5.10_kenel_for_syssentry.patch
|
||||
Patch29: collect-module-adapt-to-the-5.10-kernel.patch
|
||||
Patch30: add-avg_block_io-and-ai_block_io.patch
|
||||
Patch31: fix-bug-of-ebpf-and-ai_block_io.patch
|
||||
Patch32: fix-the-sentryCollector-service-can-t-be-stopped-for.patch
|
||||
Patch33: ai-block-io-exit-when-stage-is-not-supported.patch
|
||||
Patch34: fix-period-task-some-bugs.patch
|
||||
Patch35: fix-env_file-and-environ_conf.patch
|
||||
|
||||
BuildRequires: cmake gcc-c++
|
||||
BuildRequires: python3 python3-setuptools
|
||||
BuildRequires: json-c-devel
|
||||
BuildRequires: chrpath
|
||||
BuildRequires: elfutils-devel clang libbpf-devel bpftool
|
||||
BuildRequires: python3-numpy python3-pytest
|
||||
Requires: libxalarm = %{version}
|
||||
Requires: libbpf
|
||||
|
||||
@ -68,6 +75,39 @@ Provides: libxalarm-devel = %{version}
|
||||
%description -n libxalarm-devel
|
||||
This package provides developer tools for the libxalarm.
|
||||
|
||||
%package -n avg_block_io
|
||||
Summary: Supports slow I/O detection
|
||||
Requires: sysSentry = %{version}-%{release}
|
||||
Requires: pysentry_notify = %{version}-%{release}
|
||||
Requires: pysentry_collect = %{version}-%{release}
|
||||
|
||||
%description -n avg_block_io
|
||||
This package provides Supports slow I/O detection based on EBPF
|
||||
|
||||
%package -n ai_block_io
|
||||
Summary: Supports slow I/O detection
|
||||
Requires: python3-numpy
|
||||
Requires: sysSentry = %{version}-%{release}
|
||||
Requires: pysentry_notify = %{version}-%{release}
|
||||
Requires: pysentry_collect = %{version}-%{release}
|
||||
|
||||
%description -n ai_block_io
|
||||
This package provides Supports slow I/O detection based on AI
|
||||
|
||||
%package -n pyxalarm
|
||||
Summary: Supports xalarm api in python immplementation
|
||||
Requires: sysSentry = %{version}-%{release}
|
||||
|
||||
%description -n pyxalarm
|
||||
This package provides Supports xalarm api for users
|
||||
|
||||
%package -n pysentry_notify
|
||||
Summary: Supports xalarm report in python immplementation
|
||||
Requires: sysSentry = %{version}-%{release}
|
||||
|
||||
%description -n pysentry_notify
|
||||
This package provides Supports xalarm report for plugins
|
||||
|
||||
%package -n cpu_sentry
|
||||
Summary: CPU fault inspection program
|
||||
Requires: procps-ng
|
||||
@ -165,6 +205,14 @@ install src/c/hbm_online_repair/hbm_online_repair.env %{buildroot}/etc/sysconfig
|
||||
chrpath -d %{buildroot}%{_bindir}/cat-cli
|
||||
chrpath -d %{buildroot}%{_libdir}/libcpu_patrol.so
|
||||
|
||||
# avg_block_io
|
||||
install config/tasks/avg_block_io.mod %{buildroot}/etc/sysSentry/tasks/
|
||||
install config/plugins/avg_block_io.ini %{buildroot}/etc/sysSentry/plugins/avg_block_io.ini
|
||||
|
||||
# ai_block_io
|
||||
install config/tasks/ai_block_io.mod %{buildroot}/etc/sysSentry/tasks/
|
||||
install config/plugins/ai_block_io.ini %{buildroot}/etc/sysSentry/plugins/ai_block_io.ini
|
||||
|
||||
# logrotate
|
||||
mkdir -p %{buildroot}%{_localstatedir}/lib/logrotate-syssentry
|
||||
mkdir -p %{buildroot}%{_sysconfdir}/cron.hourly
|
||||
@ -173,6 +221,8 @@ install -m 0500 src/sh/logrotate-sysSentry.cron %{buildroot}%{_sysconfdir}/cron.
|
||||
|
||||
pushd src/python
|
||||
python3 setup.py install -O1 --root=$RPM_BUILD_ROOT --record=SENTRY_FILES
|
||||
cat SENTRY_FILES | grep -v register_xalarm.* | grep -v sentry_notify.* > SENTRY_FILES.tmp
|
||||
mv SENTRY_FILES.tmp SENTRY_FILES
|
||||
popd
|
||||
|
||||
%pre
|
||||
@ -221,6 +271,18 @@ rm -rf %{buildroot}
|
||||
%attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysSentry/xalarm.conf
|
||||
%attr(0600,root,root) %{_unitdir}/xalarmd.service
|
||||
|
||||
# avg block io
|
||||
%exclude %{_sysconfdir}/sysSentry/tasks/avg_block_io.mod
|
||||
%exclude %{_sysconfdir}/sysSentry/plugins/avg_block_io.ini
|
||||
%exclude %{_bindir}/avg_block_io
|
||||
%exclude %{python3_sitelib}/sentryPlugins/*
|
||||
|
||||
# ai_block_io
|
||||
%exclude %{_sysconfdir}/sysSentry/tasks/ai_block_io.mod
|
||||
%exclude %{_sysconfdir}/sysSentry/plugins/ai_block_io.ini
|
||||
%exclude %{_bindir}/ai_block_io
|
||||
%exclude %{python3_sitelib}/sentryPlugins/*
|
||||
|
||||
# sentryCollector
|
||||
%attr(0550,root,root) %{_bindir}/sentryCollector
|
||||
%attr(0600,root,root) %{_sysconfdir}/sysSentry/collector.conf
|
||||
@ -248,6 +310,23 @@ rm -rf %{buildroot}
|
||||
%exclude %{python3_sitelib}/syssentry/bmc_*
|
||||
%exclude %{python3_sitelib}/syssentry/*/bmc_*
|
||||
|
||||
%files -n avg_block_io
|
||||
%attr(0500,root,root) %{_bindir}/avg_block_io
|
||||
%attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysSentry/tasks/avg_block_io.mod
|
||||
%attr(0600,root,root) %{_sysconfdir}/sysSentry/plugins/avg_block_io.ini
|
||||
%attr(0550,root,root) %{python3_sitelib}/sentryPlugins/avg_block_io
|
||||
|
||||
%files -n ai_block_io
|
||||
%attr(0500,root,root) %{_bindir}/ai_block_io
|
||||
%attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysSentry/tasks/ai_block_io.mod
|
||||
%attr(0600,root,root) %{_sysconfdir}/sysSentry/plugins/ai_block_io.ini
|
||||
%attr(0550,root,root) %{python3_sitelib}/sentryPlugins/ai_block_io
|
||||
|
||||
# hbm repair module
|
||||
%exclude %{_sysconfdir}/sysSentry/tasks/hbm_online_repair.mod
|
||||
%exclude %{python3_sitelib}/syssentry/bmc_*
|
||||
%exclude %{python3_sitelib}/syssentry/*/bmc_*
|
||||
|
||||
%files -n libxalarm
|
||||
%attr(0550,root,root) %{_libdir}/libxalarm.so
|
||||
|
||||
@ -256,6 +335,14 @@ rm -rf %{buildroot}
|
||||
%attr(0550,root,root) %{_includedir}/xalarm
|
||||
%attr(0550,root,root) %{_includedir}/xalarm/register_xalarm.h
|
||||
|
||||
%files -n pyxalarm
|
||||
%attr(0550,root,root) %{python3_sitelib}/xalarm/register_xalarm.py
|
||||
%attr(0550,root,root) %{python3_sitelib}/xalarm/__pycache__/register_xalarm*
|
||||
|
||||
%files -n pysentry_notify
|
||||
%attr(0550,root,root) %{python3_sitelib}/xalarm/sentry_notify.py
|
||||
%attr(0550,root,root) %{python3_sitelib}/xalarm/__pycache__/sentry_notify*
|
||||
|
||||
%files -n cpu_sentry
|
||||
%attr(0500,root,root) %{_bindir}/cat-cli
|
||||
%attr(0500,root,root) %{_bindir}/cpu_sentry
|
||||
@ -275,6 +362,37 @@ rm -rf %{buildroot}
|
||||
%attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin*
|
||||
|
||||
%changelog
|
||||
* Sat Mar 29 2025 shixuantong <shixuantong1@huawei.com> - 1.0.2-34
|
||||
- Type:bugfix
|
||||
- CVE:NA
|
||||
- SUG:NA
|
||||
- DESC:fix period task some bugs
|
||||
fix env_file and environ_conf
|
||||
|
||||
* Thu Mar 13 2025 luckky <guodashun1@huawei.com> - 1.0.2-33
|
||||
- Type:bugfix
|
||||
- CVE:NA
|
||||
- SUG:NA
|
||||
- DESC: fix an issue with printing error
|
||||
|
||||
* Thu Mar 13 2025 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-32
|
||||
- Type:bugfix
|
||||
- CVE:NA
|
||||
- SUG:NA
|
||||
- DESC:fix the sentryCollector service can't be stopped for a long
|
||||
|
||||
* Fri Feb 14 2025 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-31
|
||||
- Type:bugfix
|
||||
- CVE:NA
|
||||
- SUG:NA
|
||||
- DESC:fix bug of ebpf and ai_block_io
|
||||
|
||||
* Sun Jan 26 2025 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-30
|
||||
- Type:bugfix
|
||||
- CVE:NA
|
||||
- SUG:NA
|
||||
- DESC:add avg_block_io and ai_block_io
|
||||
|
||||
* Sun Jan 26 2025 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-29
|
||||
- Type:bugfix
|
||||
- CVE:NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user