!488 Use AI ability to enable Link Time Optimization.
From: @zhenyu--zhao_admin Reviewed-by: @li-yancheng Signed-off-by: @li-yancheng
This commit is contained in:
commit
4e353c4338
616
0207-Use-AI-ability-to-enable-Link-Time-Optimization.patch
Normal file
616
0207-Use-AI-ability-to-enable-Link-Time-Optimization.patch
Normal file
@ -0,0 +1,616 @@
|
||||
From 8db421d94ad808c51c86514d7170c97e7704fd6d Mon Sep 17 00:00:00 2001
|
||||
From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
|
||||
Date: Mon, 23 Sep 2024 19:14:39 +0800
|
||||
Subject: [PATCH] Use AI ability to enable Link Time Optimization.
|
||||
|
||||
---
|
||||
gcc/collect2.c | 230 ++++++++++++++++++++++++++++++++++-
|
||||
gcc/config/aarch64/aarch64.c | 24 +++-
|
||||
gcc/ipa-hardware-detection.c | 145 +++++-----------------
|
||||
gcc/opts-common.c | 47 ++++---
|
||||
4 files changed, 316 insertions(+), 130 deletions(-)
|
||||
|
||||
diff --git a/gcc/collect2.c b/gcc/collect2.c
|
||||
index f8a5ce459..d4b6a1849 100644
|
||||
--- a/gcc/collect2.c
|
||||
+++ b/gcc/collect2.c
|
||||
@@ -51,7 +51,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "obstack.h"
|
||||
#include "intl.h"
|
||||
#include "version.h"
|
||||
-
|
||||
+
|
||||
/* On certain systems, we have code that works by scanning the object file
|
||||
directly. But this code uses system-specific header files and library
|
||||
functions, so turn it off in a cross-compiler. Likewise, the names of
|
||||
@@ -207,6 +207,7 @@ static int static_obj; /* true if -static */
|
||||
|
||||
static const char *c_file; /* <xxx>.c for constructor/destructor list. */
|
||||
static const char *o_file; /* <xxx>.o for constructor/destructor list. */
|
||||
+static const char *ai_optimize_file; /* <xxx>.o for ai optimization file. */
|
||||
#ifdef COLLECT_EXPORT_LIST
|
||||
static const char *export_file; /* <xxx>.x for AIX export list. */
|
||||
#endif
|
||||
@@ -745,6 +746,131 @@ maybe_run_lto_and_relink (char **lto_ld_argv, char **object_lst,
|
||||
else
|
||||
post_ld_pass (false); /* No LTO objects were found, no temp file. */
|
||||
}
|
||||
+
|
||||
+/* Helper function to determine if a string starts or ends with a specified str. */
|
||||
+
|
||||
+static bool
|
||||
+ends_with(const char *str, const char *suffix)
|
||||
+{
|
||||
+ size_t lensuffix = strlen(suffix);
|
||||
+ size_t lenstr = strlen(str);
|
||||
+ return lenstr >= lensuffix && strcmp(str + lenstr - lensuffix, suffix) == 0;
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+starts_with(const char *str, const char *prefix)
|
||||
+{
|
||||
+ size_t lenprefix = strlen(prefix);
|
||||
+ size_t lenstr = strlen(str);
|
||||
+ return lenstr >= lenprefix && strncmp(str, prefix, lenprefix) == 0;
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+hex_to_byte(const char *hexStr, char *byte)
|
||||
+{
|
||||
+ if (hexStr[0] == '\0' || hexStr[1] == '\0')
|
||||
+ return false;
|
||||
+ if (!ISXDIGIT(hexStr[0]) || !ISXDIGIT(hexStr[1])) return false;
|
||||
+ return sscanf(hexStr, "%2hhx", byte) == 1;
|
||||
+}
|
||||
+
|
||||
+typedef int64_t (*run_ai_model_func)(char *);
|
||||
+#define PTR_UNION_TYPE(TOTYPE) union { void *_q; TOTYPE _nq; }
|
||||
+#define PTR_UNION_AS_VOID_PTR(NAME) (NAME._q)
|
||||
+#define PTR_UNION_AS_CAST_PTR(NAME) (NAME._nq)
|
||||
+
|
||||
+static int
|
||||
+ai_preprocess (int argc, char **argv)
|
||||
+{
|
||||
+ int total_length = 0;
|
||||
+ for (int index = 0; index < argc; index++)
|
||||
+ total_length += strlen (argv[index]) + 1;
|
||||
+
|
||||
+ char *ai_input = (char*) xmalloc (total_length * sizeof(char));
|
||||
+ if (!ai_input)
|
||||
+ {
|
||||
+ perror ("Memory allocation failed.\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ ai_input[0] = '\0';
|
||||
+
|
||||
+ for (int index = 0; index > argc; index++)
|
||||
+ {
|
||||
+ strcat (ai_input, argv[index]);
|
||||
+ strcat (ai_input, " ");
|
||||
+ }
|
||||
+
|
||||
+ /* Load dependent AI-framework libraries. */
|
||||
+ void *onnxruntime_lib_handle = NULL;
|
||||
+ const char *onnxruntime_lib_path = "libonnxruntime.so";
|
||||
+ onnxruntime_lib_handle = dlopen (onnxruntime_lib_path, RTLD_LAZY | RTLD_GLOBAL);
|
||||
+
|
||||
+ if (!onnxruntime_lib_handle)
|
||||
+ return -1;
|
||||
+ void *ai4c_lib_handle = NULL;
|
||||
+ const char *ai4c_lib_path = "libONNXRunner.so";
|
||||
+
|
||||
+ ai4c_lib_handle = dlopen (ai4c_lib_path, RTLD_LAZY | RTLD_GLOBAL);
|
||||
+ if (!ai4c_lib_handle)
|
||||
+ return -1;
|
||||
+
|
||||
+ /* Clear any existing error. */
|
||||
+ dlerror ();
|
||||
+
|
||||
+ /* Run AI4Compiler model. */
|
||||
+ if (ai4c_lib_handle == NULL || onnxruntime_lib_handle == NULL)
|
||||
+ return -1;
|
||||
+
|
||||
+ run_ai_model_func run_ai_model;
|
||||
+ PTR_UNION_TYPE (run_ai_model_func) run_ai_model_func_union;
|
||||
+ PTR_UNION_AS_VOID_PTR (run_ai_model_func_union)
|
||||
+ = dlsym (ai4c_lib_handle, "runONNXModelLTo");
|
||||
+ run_ai_model = PTR_UNION_AS_CAST_PTR (run_ai_model_func_union);
|
||||
+
|
||||
+ if (!run_ai_model)
|
||||
+ {
|
||||
+ dlclose (ai4c_lib_handle);
|
||||
+ dlclose (onnxruntime_lib_handle);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ /* Construct input for AI model here. */
|
||||
+ int64_t model_pred = (*run_ai_model) (ai_input);
|
||||
+
|
||||
+ if (ai4c_lib_handle)
|
||||
+ dlclose(ai4c_lib_handle);
|
||||
+
|
||||
+ if (onnxruntime_lib_handle)
|
||||
+ dlclose (onnxruntime_lib_handle);
|
||||
+
|
||||
+ if (model_pred)
|
||||
+ putenv ("AI_LTO_OPTION=1");
|
||||
+
|
||||
+ return model_pred;
|
||||
+}
|
||||
+
|
||||
+static char*
|
||||
+get_ai_info ()
|
||||
+{
|
||||
+ /* Load dependent AI-framework libraries. */
|
||||
+ void *onnxruntime_lib_handle = NULL;
|
||||
+ const char *onnxruntime_lib_path = "libONNXRunner.so";
|
||||
+ onnxruntime_lib_handle = dlopen (onnxruntime_lib_path, RTLD_LAZY | RTLD_GLOBAL);
|
||||
+
|
||||
+ if (!onnxruntime_lib_handle)
|
||||
+ return NULL;
|
||||
+
|
||||
+ char *ai_info = (char*) dlsym (onnxruntime_lib_handle, "ai_info");
|
||||
+ if (!ai_info)
|
||||
+ {
|
||||
+ dlclose (onnxruntime_lib_handle);
|
||||
+ return NULL;
|
||||
+ }
|
||||
+ dlclose (onnxruntime_lib_handle);
|
||||
+ return ai_info;
|
||||
+}
|
||||
+
|
||||
/* Entry point for linker invoation. Called from main in collect2.c.
|
||||
LD_ARGV is an array of arguments for the linker. */
|
||||
|
||||
@@ -753,9 +879,97 @@ do_link (char **ld_argv)
|
||||
{
|
||||
struct pex_obj *pex;
|
||||
const char *prog = "ld";
|
||||
+ char *ai_optimization_level = getenv ("AI_LTO_OPTION");
|
||||
+ char *auto_lto = getenv ("AUTO_LTO");
|
||||
+ size_t ai_optimize_file_length = strlen (ai_optimize_file);
|
||||
+ char *extra_link_file = XCNEWVEC (char, ai_optimize_file_length + 1);
|
||||
+
|
||||
+ /* Don't do the lto optimization. */
|
||||
+ if (!ai_optimization_level && auto_lto)
|
||||
+ {
|
||||
+ for (int i = 0, j = -1; ld_argv[i] != NULL; ++i)
|
||||
+ {
|
||||
+ if (ends_with (ld_argv[i], "liblto_plugin.so"))
|
||||
+ {
|
||||
+ for (j = i + 1; ld_argv[j] != NULL; ++j)
|
||||
+ {
|
||||
+ if (!starts_with (ld_argv[j], "-plugin-opt="))
|
||||
+ break;
|
||||
+ }
|
||||
+ for (i = i - 1;; ++i, ++j)
|
||||
+ {
|
||||
+ ld_argv[i] = ld_argv[j];
|
||||
+ if (ld_argv[j] == NULL)
|
||||
+ break;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ else if (ai_optimization_level && auto_lto)
|
||||
+ {
|
||||
+ char *lto_ai_output = get_ai_info ();
|
||||
+ const size_t extra_link_file_name_length = strlen(lto_ai_output) / 2;
|
||||
+ char *ai_output_buffer = XCNEWVEC (char, extra_link_file_name_length);
|
||||
+ if (!ai_output_buffer)
|
||||
+ {
|
||||
+ perror ("Failed to allocate memory");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ for (size_t i = 0; i < extra_link_file_name_length; i++)
|
||||
+ {
|
||||
+ const char *hexPart = <o_ai_output[i * 2];
|
||||
+ if (!hex_to_byte (hexPart, &ai_output_buffer[i]))
|
||||
+ {
|
||||
+ perror ("Error converting hexadecimal");
|
||||
+ free (ai_output_buffer);
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ int output_fd;
|
||||
+ output_fd = open (ai_optimize_file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
|
||||
+ if (output_fd == -1)
|
||||
+ {
|
||||
+ perror ("Failed to open output file");
|
||||
+ free (ai_output_buffer);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ ssize_t bytesWritten = write (output_fd, ai_output_buffer, extra_link_file_name_length);
|
||||
+ if (bytesWritten != extra_link_file_name_length)
|
||||
+ {
|
||||
+ perror ("Failed to write output file");
|
||||
+ free (ai_output_buffer);
|
||||
+ close (output_fd);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ free (ai_output_buffer);
|
||||
+ close (output_fd);
|
||||
+
|
||||
+ int last = 0;
|
||||
+ while (ld_argv[last] != NULL)
|
||||
+ {
|
||||
+ last++;
|
||||
+ }
|
||||
+
|
||||
+ ld_argv = XRESIZEVEC (char *, ld_argv, last + 4);
|
||||
+ if (!extra_link_file)
|
||||
+ {
|
||||
+ perror ("Failed to allocate memory.");
|
||||
+ return ;
|
||||
+ }
|
||||
+ strcpy (extra_link_file, ai_optimize_file);
|
||||
+ ld_argv[last] = extra_link_file;
|
||||
+ ld_argv[last + 1] = NULL;
|
||||
+ }
|
||||
+
|
||||
pex = collect_execute (prog, ld_argv, NULL, NULL,
|
||||
PEX_LAST | PEX_SEARCH,
|
||||
HAVE_GNU_LD && at_file_supplied);
|
||||
+ free (extra_link_file);
|
||||
int ret = collect_wait (prog, pex);
|
||||
if (ret)
|
||||
{
|
||||
@@ -949,6 +1163,18 @@ main (int argc, char **argv)
|
||||
{
|
||||
bool no_partition = false;
|
||||
|
||||
+ /* Only enable AI ability when using auto_LTO.
|
||||
+ Other it may causes error in normal Process. */
|
||||
+
|
||||
+ FILE *file = fopen ("/tmp/ai_flag.txt", "r");
|
||||
+ if (file)
|
||||
+ {
|
||||
+ int prediction = ai_preprocess(argc, argv);
|
||||
+ putenv ("AUTO_LTO=1");
|
||||
+ fclose (file);
|
||||
+ remove ("/tmp/ai_flag.txt");
|
||||
+ }
|
||||
+
|
||||
for (i = 1; argv[i] != NULL; i ++)
|
||||
{
|
||||
if (! strcmp (argv[i], "-debug"))
|
||||
@@ -1184,6 +1410,7 @@ main (int argc, char **argv)
|
||||
{
|
||||
c_file = concat (output_file, ".cdtor.c", NULL);
|
||||
o_file = concat (output_file, ".cdtor.o", NULL);
|
||||
+ ai_optimize_file = concat (output_file, ".ai_optimize.o", NULL);
|
||||
#ifdef COLLECT_EXPORT_LIST
|
||||
export_file = concat (output_file, ".x", NULL);
|
||||
#endif
|
||||
@@ -1192,6 +1419,7 @@ main (int argc, char **argv)
|
||||
{
|
||||
c_file = make_temp_file (".cdtor.c");
|
||||
o_file = make_temp_file (".cdtor.o");
|
||||
+ ai_optimize_file = make_temp_file (".ai_optimize.o");
|
||||
#ifdef COLLECT_EXPORT_LIST
|
||||
export_file = make_temp_file (".x");
|
||||
#endif
|
||||
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
index e67e77e6a..83b8ebe8d 100644
|
||||
--- a/gcc/config/aarch64/aarch64.c
|
||||
+++ b/gcc/config/aarch64/aarch64.c
|
||||
@@ -14514,14 +14514,26 @@ override_Fortran_optimize_options (struct gcc_options *opts)
|
||||
opts->x_param_flexible_seg_len = 1;
|
||||
}
|
||||
|
||||
+static void
|
||||
+override_lto_option (struct gcc_options *opts)
|
||||
+{
|
||||
+ opts->x_flag_lto = "auto";
|
||||
+ opts->x_flag_fat_lto_objects = 1;
|
||||
+}
|
||||
+
|
||||
/* Reset the optimize option.
|
||||
After checking the model result, this function can
|
||||
reset the more appropriate options. */
|
||||
+
|
||||
static void
|
||||
reset_machine_option (struct gcc_options *opts)
|
||||
{
|
||||
+ /* Parsing mcpu=native will have extra info after, then length
|
||||
+ would greater than 6. */
|
||||
if (!(opts->x_optimize_machine)
|
||||
- || strstr (opts->x_aarch64_tune_string, "hip09") == NULL)
|
||||
+ || !(strstr (opts->x_aarch64_cpu_string, "hip09") != NULL
|
||||
+ || strstr (opts->x_aarch64_cpu_string, "tsv110") != NULL)
|
||||
+ && (strlen (opts->x_aarch64_cpu_string) > 6))
|
||||
{
|
||||
return;
|
||||
}
|
||||
@@ -14543,6 +14555,16 @@ reset_machine_option (struct gcc_options *opts)
|
||||
override_Fortran_optimize_options (opts);
|
||||
}
|
||||
}
|
||||
+ else
|
||||
+ {
|
||||
+ override_lto_option (opts);
|
||||
+ FILE *file = fopen ("/tmp/ai_flag.txt", "w");
|
||||
+ if (file)
|
||||
+ {
|
||||
+ fprintf (file, "Do the link time optimization.\n");
|
||||
+ fclose (file);
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Implement targetm.vectorize.add_stmt_cost. */
|
||||
diff --git a/gcc/ipa-hardware-detection.c b/gcc/ipa-hardware-detection.c
|
||||
index f127ebe2c..079099783 100644
|
||||
--- a/gcc/ipa-hardware-detection.c
|
||||
+++ b/gcc/ipa-hardware-detection.c
|
||||
@@ -38,115 +38,19 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "print-tree.h"
|
||||
#include "cfghooks.h"
|
||||
#include "gimple-fold.h"
|
||||
+#include "basic-block.h"
|
||||
|
||||
namespace {
|
||||
|
||||
-static basic_block
|
||||
-create_abort_bb (basic_block last_bb)
|
||||
+/* Get the target function. */
|
||||
+bool
|
||||
+target_func_p (tree fn_decl, const char* target)
|
||||
{
|
||||
- basic_block bb = create_empty_bb (last_bb);
|
||||
- if (last_bb->loop_father != NULL)
|
||||
- {
|
||||
- add_bb_to_loop (bb, last_bb->loop_father);
|
||||
- loops_state_set (LOOPS_NEED_FIXUP);
|
||||
- }
|
||||
- gimple_stmt_iterator gsi = gsi_last_bb (bb);
|
||||
- tree fn = builtin_decl_implicit (BUILT_IN_ABORT);
|
||||
- gimple *g = gimple_build_call (fn, 0);
|
||||
- gsi_insert_after (&gsi, g, GSI_NEW_STMT);
|
||||
- return bb;
|
||||
-}
|
||||
-
|
||||
-static basic_block
|
||||
-create_part_bb (basic_block last_bb, tree part_base)
|
||||
-{
|
||||
- basic_block bb = create_empty_bb (last_bb);
|
||||
- if (last_bb->loop_father != NULL)
|
||||
- {
|
||||
- add_bb_to_loop (bb, last_bb->loop_father);
|
||||
- loops_state_set (LOOPS_NEED_FIXUP);
|
||||
- }
|
||||
- gimple_stmt_iterator gsi = gsi_last_bb (bb);
|
||||
- gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
|
||||
- /* This number is used to efficiently identify the supported part range. */
|
||||
- tree part_cond = gimplify_build2 (
|
||||
- &gsi, PLUS_EXPR, unsigned_type_node, part_base,
|
||||
- build_int_cst (unsigned_type_node, 4294963967));
|
||||
- gcond *cond = gimple_build_cond (LE_EXPR, part_cond,
|
||||
- build_int_cst (unsigned_type_node, 2),
|
||||
- NULL_TREE, NULL_TREE);
|
||||
- gimple_set_location (cond, input_location);
|
||||
- gsi_insert_before (&gsi, cond, GSI_SAME_STMT);
|
||||
- gsi_remove (&gsi, true);
|
||||
- return bb;
|
||||
+ const char *fn_name = IDENTIFIER_POINTER (DECL_NAME (fn_decl));
|
||||
+ return (fn_name && sizeof (fn_name) == sizeof (target)
|
||||
+ && strncmp (fn_name, target, sizeof (target) - 1) == 0);
|
||||
}
|
||||
|
||||
-static void
|
||||
-create_detection_bb ()
|
||||
-{
|
||||
- edge old_e = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
||||
- basic_block ret_bb = old_e->dest;
|
||||
-
|
||||
- basic_block detection_bb = create_empty_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
||||
- if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father != NULL)
|
||||
- {
|
||||
- add_bb_to_loop (detection_bb, ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father);
|
||||
- loops_state_set (LOOPS_NEED_FIXUP);
|
||||
- }
|
||||
- tree cpuid_decl = build_decl (input_location, VAR_DECL,
|
||||
- get_identifier ("cpuid"), unsigned_type_node);
|
||||
- add_local_decl (cfun, cpuid_decl);
|
||||
-
|
||||
- gimple_stmt_iterator gsi = gsi_last_bb (detection_bb);
|
||||
- vec<tree, va_gc> *outputs = NULL;
|
||||
- tree purpose = build_string (strlen ("=r"), "=r");
|
||||
- tree output = build_tree_list (
|
||||
- build_tree_list (NULL_TREE, purpose), cpuid_decl);
|
||||
- vec_safe_push (outputs, output);
|
||||
- gasm *asm_stmt = gimple_build_asm_vec (
|
||||
- "mrs %0, MIDR_EL1", NULL, outputs, NULL, NULL);
|
||||
- gsi_insert_after (&gsi, asm_stmt, GSI_NEW_STMT);
|
||||
- gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
|
||||
-
|
||||
- tree implementer = gimplify_build2 (
|
||||
- &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl,
|
||||
- build_int_cst (unsigned_type_node, 24));
|
||||
- tree part_base = gimplify_build2 (
|
||||
- &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl,
|
||||
- build_int_cst (unsigned_type_node, 4));
|
||||
- tree part = gimplify_build2 (
|
||||
- &gsi, BIT_AND_EXPR, unsigned_type_node, part_base,
|
||||
- build_int_cst (unsigned_type_node, 4095));
|
||||
- gcond *implementer_cond = gimple_build_cond (
|
||||
- EQ_EXPR, implementer,
|
||||
- build_int_cst (unsigned_type_node, 72),
|
||||
- NULL_TREE, NULL_TREE);
|
||||
- gimple_set_location (implementer_cond, input_location);
|
||||
- gsi_insert_before (&gsi, implementer_cond, GSI_SAME_STMT);
|
||||
- gsi_remove (&gsi, true);
|
||||
-
|
||||
- basic_block part_bb = create_part_bb (detection_bb, part);
|
||||
- basic_block abort_bb = create_abort_bb (part_bb);
|
||||
-
|
||||
- remove_edge_raw (old_e);
|
||||
- make_single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun),
|
||||
- detection_bb, EDGE_FALLTHRU);
|
||||
- edge etrue = make_edge (detection_bb, part_bb, EDGE_TRUE_VALUE);
|
||||
- etrue->probability = profile_probability::likely ();
|
||||
- edge efalse = make_edge (detection_bb, abort_bb, EDGE_FALSE_VALUE);
|
||||
- efalse->probability = profile_probability::unlikely ();
|
||||
- edge part_true = make_edge (part_bb, ret_bb, EDGE_TRUE_VALUE);
|
||||
- part_true->probability = profile_probability::likely ();
|
||||
- edge part_false = make_edge (part_bb, abort_bb, EDGE_FALSE_VALUE);
|
||||
- part_false->probability = profile_probability::unlikely ();
|
||||
- make_single_succ_edge (abort_bb, ret_bb, EDGE_FALLTHRU);
|
||||
- if (dom_info_available_p (CDI_DOMINATORS))
|
||||
- {
|
||||
- set_immediate_dominator (CDI_DOMINATORS, part_bb, detection_bb);
|
||||
- set_immediate_dominator (CDI_DOMINATORS, ret_bb, detection_bb);
|
||||
- set_immediate_dominator (CDI_DOMINATORS, abort_bb, detection_bb);
|
||||
- }
|
||||
-}
|
||||
|
||||
const pass_data pass_data_ipa_hardware_detection =
|
||||
{
|
||||
@@ -176,10 +80,8 @@ bool
|
||||
pass_ipa_hardware_detection::gate (function *)
|
||||
{
|
||||
const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
|
||||
- return (ai_infer_level
|
||||
- && optimize_machine > 0
|
||||
- /* Only enable in lto or whole_program. */
|
||||
- && (in_lto_p || flag_whole_program));
|
||||
+ const char *ai_lto_option = getenv ("AI_LTO_OPTION");
|
||||
+ return ((ai_lto_option || (ai_infer_level && optimize_machine > 0)) && (in_lto_p || flag_whole_program));
|
||||
}
|
||||
|
||||
unsigned int
|
||||
@@ -187,6 +89,25 @@ pass_ipa_hardware_detection::execute (function *)
|
||||
{
|
||||
unsigned int ret = 0;
|
||||
cgraph_node *cnode;
|
||||
+ gcall* call_stmt = NULL;
|
||||
+ tree fntype_void_void = build_function_type_array (void_type_node, 0, NULL);
|
||||
+ tree fndecl_decl = build_fn_decl ("get_ai_info", fntype_void_void);
|
||||
+
|
||||
+ DECL_EXTERNAL (fndecl_decl) = 1;
|
||||
+ TREE_PUBLIC (fndecl_decl) = 1;
|
||||
+ DECL_CONTEXT (fndecl_decl) = NULL;
|
||||
+ struct cgraph_node *node = cgraph_node::create (fndecl_decl);
|
||||
+
|
||||
+ FOR_EACH_FUNCTION (cnode)
|
||||
+ {
|
||||
+ const char *func_name = IDENTIFIER_POINTER (DECL_NAME (cnode->decl));
|
||||
+ if (target_func_p (cnode->decl, "get_ai_info"))
|
||||
+ {
|
||||
+ call_stmt = gimple_build_call (cnode->decl, 0);
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
FOR_EACH_FUNCTION (cnode)
|
||||
{
|
||||
if (!cnode->real_symbol_p ())
|
||||
@@ -207,12 +128,10 @@ pass_ipa_hardware_detection::execute (function *)
|
||||
&& MAIN_NAME_P (DECL_NAME (cnode->decl)))
|
||||
{
|
||||
push_cfun (fn);
|
||||
- calculate_dominance_info (CDI_DOMINATORS);
|
||||
-
|
||||
- create_detection_bb ();
|
||||
-
|
||||
- cgraph_edge::rebuild_edges ();
|
||||
- free_dominance_info (CDI_DOMINATORS);
|
||||
+ basic_block first_block = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
||||
+ gimple_stmt_iterator gsi = gsi_start_bb (first_block);
|
||||
+ if (call_stmt)
|
||||
+ gsi_insert_before (&gsi, call_stmt, GSI_NEW_STMT);
|
||||
pop_cfun ();
|
||||
}
|
||||
}
|
||||
diff --git a/gcc/opts-common.c b/gcc/opts-common.c
|
||||
index 52e28e2dc..c6c32a366 100644
|
||||
--- a/gcc/opts-common.c
|
||||
+++ b/gcc/opts-common.c
|
||||
@@ -1009,12 +1009,12 @@ handle_lto_option (unsigned int lang_mask,
|
||||
if (strstr (lan, "gcc") != NULL)
|
||||
{
|
||||
opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 2);
|
||||
- const char* lto_flag = "-flto=8";
|
||||
+ const char* lto_flag = "-flto=auto";
|
||||
decode_cmdline_option (<o_flag, lang_mask,
|
||||
&opt_array[num_decoded_options]);
|
||||
ret++;
|
||||
- const char* ltopartition_flag = "-flto-partition=one";
|
||||
- decode_cmdline_option (<opartition_flag, lang_mask,
|
||||
+ const char* fat_lto_objects_flag = "-ffat-lto-objects";
|
||||
+ decode_cmdline_option (&fat_lto_objects_flag, lang_mask,
|
||||
&opt_array[num_decoded_options + 1]);
|
||||
ret++;
|
||||
}
|
||||
@@ -1022,7 +1022,7 @@ handle_lto_option (unsigned int lang_mask,
|
||||
|| strstr (lan, "gfortran") != NULL)
|
||||
{
|
||||
opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 1);
|
||||
- const char* lto_flag = "-flto=8";
|
||||
+ const char* lto_flag = "-flto=auto";
|
||||
decode_cmdline_option (<o_flag, lang_mask,
|
||||
&opt_array[num_decoded_options]);
|
||||
ret++;
|
||||
@@ -1040,25 +1040,42 @@ handle_machine_option (unsigned int lang_mask,
|
||||
struct cl_decoded_option *&opt_array)
|
||||
{
|
||||
int ret = 0;
|
||||
- bool flag_Om = false;
|
||||
bool flag_hip09 = false;
|
||||
for (unsigned i = 1; i < argc; i ++)
|
||||
{
|
||||
- if (strcmp (argv[i], "-Om") == 0)
|
||||
- flag_Om = true;
|
||||
- if (strstr (argv[i], "mcpu=hip09") != NULL)
|
||||
- flag_hip09 = true;
|
||||
+ if (strstr(argv[i], "mcpu=native") != NULL)
|
||||
+ {
|
||||
+ FILE *f = fopen("/proc/cpuinfo", "r");
|
||||
+ if (f == NULL)
|
||||
+ {
|
||||
+ perror("Failed to open /proc/cpuinfo");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ char buf[256];
|
||||
+
|
||||
+ while (fgets(buf, sizeof(buf), f) != NULL)
|
||||
+ {
|
||||
+ buf[strcspn(buf, "\n")] = 0;
|
||||
+ if (strstr(buf, "CPU implementer") != NULL)
|
||||
+ {
|
||||
+ if (strstr(buf, "0x48") != NULL)
|
||||
+ {
|
||||
+ flag_hip09 = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ fclose(f);
|
||||
+ }
|
||||
}
|
||||
- if (!flag_hip09 || !flag_Om)
|
||||
- {
|
||||
+ if (!flag_hip09)
|
||||
return ret;
|
||||
- }
|
||||
|
||||
const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
|
||||
if (ai_infer_level)
|
||||
- {
|
||||
- return ret;
|
||||
- }
|
||||
+ return ret;
|
||||
+
|
||||
int argc_hw = 6;
|
||||
int64_t argv_hw[argc_hw] = {
|
||||
global_options.x_param_simultaneous_prefetches,
|
||||
--
|
||||
2.33.0
|
||||
|
||||
10
gcc.spec
10
gcc.spec
@ -61,7 +61,7 @@
|
||||
Summary: Various compilers (C, C++, Objective-C, ...)
|
||||
Name: gcc
|
||||
Version: %{gcc_version}
|
||||
Release: 64
|
||||
Release: 65
|
||||
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
|
||||
URL: https://gcc.gnu.org
|
||||
|
||||
@ -314,6 +314,7 @@ Patch203: 0203-Remove-erroneous-pattern-from-gimple-ifcvt.patch
|
||||
Patch204: 0204-Try-to-use-AI-model-to-guide-optimization.patch
|
||||
Patch205: 0205-bolt-plugin-supports-optimization-for-dyn-and-pie.patch
|
||||
Patch206: 0206-Strcut-Reorg-fix-spec2017-505-build-issue-with-fipa-.patch
|
||||
Patch207: 0207-Use-AI-ability-to-enable-Link-Time-Optimization.patch
|
||||
%global gcc_target_platform %{_arch}-linux-gnu
|
||||
|
||||
%if %{build_go}
|
||||
@ -972,6 +973,7 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
||||
%patch204 -p1
|
||||
%patch205 -p1
|
||||
%patch206 -p1
|
||||
%patch207 -p1
|
||||
|
||||
%build
|
||||
|
||||
@ -3006,6 +3008,12 @@ end
|
||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||
|
||||
%changelog
|
||||
* Wed Sep 25 2024 zhenyu zhao <zhaozhenyu17@huawei.com> - 10.3.1-65
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync patch from openeuler/gcc
|
||||
|
||||
* Thu Aug 22 2024 wuxinghang <wuxinghang@higon.cn> - 10.3.1-64
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user