617 lines
20 KiB
Diff
617 lines
20 KiB
Diff
From 8db421d94ad808c51c86514d7170c97e7704fd6d Mon Sep 17 00:00:00 2001
|
||
From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
|
||
Date: Mon, 23 Sep 2024 19:14:39 +0800
|
||
Subject: [PATCH] Use AI ability to enable Link Time Optimization.
|
||
|
||
---
|
||
gcc/collect2.c | 230 ++++++++++++++++++++++++++++++++++-
|
||
gcc/config/aarch64/aarch64.c | 24 +++-
|
||
gcc/ipa-hardware-detection.c | 145 +++++-----------------
|
||
gcc/opts-common.c | 47 ++++---
|
||
4 files changed, 316 insertions(+), 130 deletions(-)
|
||
|
||
diff --git a/gcc/collect2.c b/gcc/collect2.c
|
||
index f8a5ce459..d4b6a1849 100644
|
||
--- a/gcc/collect2.c
|
||
+++ b/gcc/collect2.c
|
||
@@ -51,7 +51,7 @@ along with GCC; see the file COPYING3. If not see
|
||
#include "obstack.h"
|
||
#include "intl.h"
|
||
#include "version.h"
|
||
-
|
||
+
|
||
/* On certain systems, we have code that works by scanning the object file
|
||
directly. But this code uses system-specific header files and library
|
||
functions, so turn it off in a cross-compiler. Likewise, the names of
|
||
@@ -207,6 +207,7 @@ static int static_obj; /* true if -static */
|
||
|
||
static const char *c_file; /* <xxx>.c for constructor/destructor list. */
|
||
static const char *o_file; /* <xxx>.o for constructor/destructor list. */
|
||
+static const char *ai_optimize_file; /* <xxx>.o for ai optimization file. */
|
||
#ifdef COLLECT_EXPORT_LIST
|
||
static const char *export_file; /* <xxx>.x for AIX export list. */
|
||
#endif
|
||
@@ -745,6 +746,131 @@ maybe_run_lto_and_relink (char **lto_ld_argv, char **object_lst,
|
||
else
|
||
post_ld_pass (false); /* No LTO objects were found, no temp file. */
|
||
}
|
||
+
|
||
+/* Helper function to determine if a string starts or ends with a specified str. */
|
||
+
|
||
+static bool
|
||
+ends_with(const char *str, const char *suffix)
|
||
+{
|
||
+ size_t lensuffix = strlen(suffix);
|
||
+ size_t lenstr = strlen(str);
|
||
+ return lenstr >= lensuffix && strcmp(str + lenstr - lensuffix, suffix) == 0;
|
||
+}
|
||
+
|
||
+static bool
|
||
+starts_with(const char *str, const char *prefix)
|
||
+{
|
||
+ size_t lenprefix = strlen(prefix);
|
||
+ size_t lenstr = strlen(str);
|
||
+ return lenstr >= lenprefix && strncmp(str, prefix, lenprefix) == 0;
|
||
+}
|
||
+
|
||
+static bool
|
||
+hex_to_byte(const char *hexStr, char *byte)
|
||
+{
|
||
+ if (hexStr[0] == '\0' || hexStr[1] == '\0')
|
||
+ return false;
|
||
+ if (!ISXDIGIT(hexStr[0]) || !ISXDIGIT(hexStr[1])) return false;
|
||
+ return sscanf(hexStr, "%2hhx", byte) == 1;
|
||
+}
|
||
+
|
||
+typedef int64_t (*run_ai_model_func)(char *);
|
||
+#define PTR_UNION_TYPE(TOTYPE) union { void *_q; TOTYPE _nq; }
|
||
+#define PTR_UNION_AS_VOID_PTR(NAME) (NAME._q)
|
||
+#define PTR_UNION_AS_CAST_PTR(NAME) (NAME._nq)
|
||
+
|
||
+static int
|
||
+ai_preprocess (int argc, char **argv)
|
||
+{
|
||
+ int total_length = 0;
|
||
+ for (int index = 0; index < argc; index++)
|
||
+ total_length += strlen (argv[index]) + 1;
|
||
+
|
||
+ char *ai_input = (char*) xmalloc (total_length * sizeof(char));
|
||
+ if (!ai_input)
|
||
+ {
|
||
+ perror ("Memory allocation failed.\n");
|
||
+ return -1;
|
||
+ }
|
||
+
|
||
+ ai_input[0] = '\0';
|
||
+
|
||
+ for (int index = 0; index > argc; index++)
|
||
+ {
|
||
+ strcat (ai_input, argv[index]);
|
||
+ strcat (ai_input, " ");
|
||
+ }
|
||
+
|
||
+ /* Load dependent AI-framework libraries. */
|
||
+ void *onnxruntime_lib_handle = NULL;
|
||
+ const char *onnxruntime_lib_path = "libonnxruntime.so";
|
||
+ onnxruntime_lib_handle = dlopen (onnxruntime_lib_path, RTLD_LAZY | RTLD_GLOBAL);
|
||
+
|
||
+ if (!onnxruntime_lib_handle)
|
||
+ return -1;
|
||
+ void *ai4c_lib_handle = NULL;
|
||
+ const char *ai4c_lib_path = "libONNXRunner.so";
|
||
+
|
||
+ ai4c_lib_handle = dlopen (ai4c_lib_path, RTLD_LAZY | RTLD_GLOBAL);
|
||
+ if (!ai4c_lib_handle)
|
||
+ return -1;
|
||
+
|
||
+ /* Clear any existing error. */
|
||
+ dlerror ();
|
||
+
|
||
+ /* Run AI4Compiler model. */
|
||
+ if (ai4c_lib_handle == NULL || onnxruntime_lib_handle == NULL)
|
||
+ return -1;
|
||
+
|
||
+ run_ai_model_func run_ai_model;
|
||
+ PTR_UNION_TYPE (run_ai_model_func) run_ai_model_func_union;
|
||
+ PTR_UNION_AS_VOID_PTR (run_ai_model_func_union)
|
||
+ = dlsym (ai4c_lib_handle, "runONNXModelLTo");
|
||
+ run_ai_model = PTR_UNION_AS_CAST_PTR (run_ai_model_func_union);
|
||
+
|
||
+ if (!run_ai_model)
|
||
+ {
|
||
+ dlclose (ai4c_lib_handle);
|
||
+ dlclose (onnxruntime_lib_handle);
|
||
+ return -1;
|
||
+ }
|
||
+
|
||
+ /* Construct input for AI model here. */
|
||
+ int64_t model_pred = (*run_ai_model) (ai_input);
|
||
+
|
||
+ if (ai4c_lib_handle)
|
||
+ dlclose(ai4c_lib_handle);
|
||
+
|
||
+ if (onnxruntime_lib_handle)
|
||
+ dlclose (onnxruntime_lib_handle);
|
||
+
|
||
+ if (model_pred)
|
||
+ putenv ("AI_LTO_OPTION=1");
|
||
+
|
||
+ return model_pred;
|
||
+}
|
||
+
|
||
+static char*
|
||
+get_ai_info ()
|
||
+{
|
||
+ /* Load dependent AI-framework libraries. */
|
||
+ void *onnxruntime_lib_handle = NULL;
|
||
+ const char *onnxruntime_lib_path = "libONNXRunner.so";
|
||
+ onnxruntime_lib_handle = dlopen (onnxruntime_lib_path, RTLD_LAZY | RTLD_GLOBAL);
|
||
+
|
||
+ if (!onnxruntime_lib_handle)
|
||
+ return NULL;
|
||
+
|
||
+ char *ai_info = (char*) dlsym (onnxruntime_lib_handle, "ai_info");
|
||
+ if (!ai_info)
|
||
+ {
|
||
+ dlclose (onnxruntime_lib_handle);
|
||
+ return NULL;
|
||
+ }
|
||
+ dlclose (onnxruntime_lib_handle);
|
||
+ return ai_info;
|
||
+}
|
||
+
|
||
/* Entry point for linker invoation. Called from main in collect2.c.
|
||
LD_ARGV is an array of arguments for the linker. */
|
||
|
||
@@ -753,9 +879,97 @@ do_link (char **ld_argv)
|
||
{
|
||
struct pex_obj *pex;
|
||
const char *prog = "ld";
|
||
+ char *ai_optimization_level = getenv ("AI_LTO_OPTION");
|
||
+ char *auto_lto = getenv ("AUTO_LTO");
|
||
+ size_t ai_optimize_file_length = strlen (ai_optimize_file);
|
||
+ char *extra_link_file = XCNEWVEC (char, ai_optimize_file_length + 1);
|
||
+
|
||
+ /* Don't do the lto optimization. */
|
||
+ if (!ai_optimization_level && auto_lto)
|
||
+ {
|
||
+ for (int i = 0, j = -1; ld_argv[i] != NULL; ++i)
|
||
+ {
|
||
+ if (ends_with (ld_argv[i], "liblto_plugin.so"))
|
||
+ {
|
||
+ for (j = i + 1; ld_argv[j] != NULL; ++j)
|
||
+ {
|
||
+ if (!starts_with (ld_argv[j], "-plugin-opt="))
|
||
+ break;
|
||
+ }
|
||
+ for (i = i - 1;; ++i, ++j)
|
||
+ {
|
||
+ ld_argv[i] = ld_argv[j];
|
||
+ if (ld_argv[j] == NULL)
|
||
+ break;
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ else if (ai_optimization_level && auto_lto)
|
||
+ {
|
||
+ char *lto_ai_output = get_ai_info ();
|
||
+ const size_t extra_link_file_name_length = strlen(lto_ai_output) / 2;
|
||
+ char *ai_output_buffer = XCNEWVEC (char, extra_link_file_name_length);
|
||
+ if (!ai_output_buffer)
|
||
+ {
|
||
+ perror ("Failed to allocate memory");
|
||
+ return;
|
||
+ }
|
||
+
|
||
+ for (size_t i = 0; i < extra_link_file_name_length; i++)
|
||
+ {
|
||
+ const char *hexPart = <o_ai_output[i * 2];
|
||
+ if (!hex_to_byte (hexPart, &ai_output_buffer[i]))
|
||
+ {
|
||
+ perror ("Error converting hexadecimal");
|
||
+ free (ai_output_buffer);
|
||
+ return;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ int output_fd;
|
||
+ output_fd = open (ai_optimize_file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
|
||
+ if (output_fd == -1)
|
||
+ {
|
||
+ perror ("Failed to open output file");
|
||
+ free (ai_output_buffer);
|
||
+ return;
|
||
+ }
|
||
+
|
||
+ ssize_t bytesWritten = write (output_fd, ai_output_buffer, extra_link_file_name_length);
|
||
+ if (bytesWritten != extra_link_file_name_length)
|
||
+ {
|
||
+ perror ("Failed to write output file");
|
||
+ free (ai_output_buffer);
|
||
+ close (output_fd);
|
||
+ return;
|
||
+ }
|
||
+
|
||
+ free (ai_output_buffer);
|
||
+ close (output_fd);
|
||
+
|
||
+ int last = 0;
|
||
+ while (ld_argv[last] != NULL)
|
||
+ {
|
||
+ last++;
|
||
+ }
|
||
+
|
||
+ ld_argv = XRESIZEVEC (char *, ld_argv, last + 4);
|
||
+ if (!extra_link_file)
|
||
+ {
|
||
+ perror ("Failed to allocate memory.");
|
||
+ return ;
|
||
+ }
|
||
+ strcpy (extra_link_file, ai_optimize_file);
|
||
+ ld_argv[last] = extra_link_file;
|
||
+ ld_argv[last + 1] = NULL;
|
||
+ }
|
||
+
|
||
pex = collect_execute (prog, ld_argv, NULL, NULL,
|
||
PEX_LAST | PEX_SEARCH,
|
||
HAVE_GNU_LD && at_file_supplied);
|
||
+ free (extra_link_file);
|
||
int ret = collect_wait (prog, pex);
|
||
if (ret)
|
||
{
|
||
@@ -949,6 +1163,18 @@ main (int argc, char **argv)
|
||
{
|
||
bool no_partition = false;
|
||
|
||
+ /* Only enable AI ability when using auto_LTO.
|
||
+ Other it may causes error in normal Process. */
|
||
+
|
||
+ FILE *file = fopen ("/tmp/ai_flag.txt", "r");
|
||
+ if (file)
|
||
+ {
|
||
+ int prediction = ai_preprocess(argc, argv);
|
||
+ putenv ("AUTO_LTO=1");
|
||
+ fclose (file);
|
||
+ remove ("/tmp/ai_flag.txt");
|
||
+ }
|
||
+
|
||
for (i = 1; argv[i] != NULL; i ++)
|
||
{
|
||
if (! strcmp (argv[i], "-debug"))
|
||
@@ -1184,6 +1410,7 @@ main (int argc, char **argv)
|
||
{
|
||
c_file = concat (output_file, ".cdtor.c", NULL);
|
||
o_file = concat (output_file, ".cdtor.o", NULL);
|
||
+ ai_optimize_file = concat (output_file, ".ai_optimize.o", NULL);
|
||
#ifdef COLLECT_EXPORT_LIST
|
||
export_file = concat (output_file, ".x", NULL);
|
||
#endif
|
||
@@ -1192,6 +1419,7 @@ main (int argc, char **argv)
|
||
{
|
||
c_file = make_temp_file (".cdtor.c");
|
||
o_file = make_temp_file (".cdtor.o");
|
||
+ ai_optimize_file = make_temp_file (".ai_optimize.o");
|
||
#ifdef COLLECT_EXPORT_LIST
|
||
export_file = make_temp_file (".x");
|
||
#endif
|
||
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||
index e67e77e6a..83b8ebe8d 100644
|
||
--- a/gcc/config/aarch64/aarch64.c
|
||
+++ b/gcc/config/aarch64/aarch64.c
|
||
@@ -14514,14 +14514,26 @@ override_Fortran_optimize_options (struct gcc_options *opts)
|
||
opts->x_param_flexible_seg_len = 1;
|
||
}
|
||
|
||
+static void
|
||
+override_lto_option (struct gcc_options *opts)
|
||
+{
|
||
+ opts->x_flag_lto = "auto";
|
||
+ opts->x_flag_fat_lto_objects = 1;
|
||
+}
|
||
+
|
||
/* Reset the optimize option.
|
||
After checking the model result, this function can
|
||
reset the more appropriate options. */
|
||
+
|
||
static void
|
||
reset_machine_option (struct gcc_options *opts)
|
||
{
|
||
+ /* Parsing mcpu=native will have extra info after, then length
|
||
+ would greater than 6. */
|
||
if (!(opts->x_optimize_machine)
|
||
- || strstr (opts->x_aarch64_tune_string, "hip09") == NULL)
|
||
+ || !(strstr (opts->x_aarch64_cpu_string, "hip09") != NULL
|
||
+ || strstr (opts->x_aarch64_cpu_string, "tsv110") != NULL)
|
||
+ && (strlen (opts->x_aarch64_cpu_string) > 6))
|
||
{
|
||
return;
|
||
}
|
||
@@ -14543,6 +14555,16 @@ reset_machine_option (struct gcc_options *opts)
|
||
override_Fortran_optimize_options (opts);
|
||
}
|
||
}
|
||
+ else
|
||
+ {
|
||
+ override_lto_option (opts);
|
||
+ FILE *file = fopen ("/tmp/ai_flag.txt", "w");
|
||
+ if (file)
|
||
+ {
|
||
+ fprintf (file, "Do the link time optimization.\n");
|
||
+ fclose (file);
|
||
+ }
|
||
+ }
|
||
}
|
||
|
||
/* Implement targetm.vectorize.add_stmt_cost. */
|
||
diff --git a/gcc/ipa-hardware-detection.c b/gcc/ipa-hardware-detection.c
|
||
index f127ebe2c..079099783 100644
|
||
--- a/gcc/ipa-hardware-detection.c
|
||
+++ b/gcc/ipa-hardware-detection.c
|
||
@@ -38,115 +38,19 @@ along with GCC; see the file COPYING3. If not see
|
||
#include "print-tree.h"
|
||
#include "cfghooks.h"
|
||
#include "gimple-fold.h"
|
||
+#include "basic-block.h"
|
||
|
||
namespace {
|
||
|
||
-static basic_block
|
||
-create_abort_bb (basic_block last_bb)
|
||
+/* Get the target function. */
|
||
+bool
|
||
+target_func_p (tree fn_decl, const char* target)
|
||
{
|
||
- basic_block bb = create_empty_bb (last_bb);
|
||
- if (last_bb->loop_father != NULL)
|
||
- {
|
||
- add_bb_to_loop (bb, last_bb->loop_father);
|
||
- loops_state_set (LOOPS_NEED_FIXUP);
|
||
- }
|
||
- gimple_stmt_iterator gsi = gsi_last_bb (bb);
|
||
- tree fn = builtin_decl_implicit (BUILT_IN_ABORT);
|
||
- gimple *g = gimple_build_call (fn, 0);
|
||
- gsi_insert_after (&gsi, g, GSI_NEW_STMT);
|
||
- return bb;
|
||
-}
|
||
-
|
||
-static basic_block
|
||
-create_part_bb (basic_block last_bb, tree part_base)
|
||
-{
|
||
- basic_block bb = create_empty_bb (last_bb);
|
||
- if (last_bb->loop_father != NULL)
|
||
- {
|
||
- add_bb_to_loop (bb, last_bb->loop_father);
|
||
- loops_state_set (LOOPS_NEED_FIXUP);
|
||
- }
|
||
- gimple_stmt_iterator gsi = gsi_last_bb (bb);
|
||
- gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
|
||
- /* This number is used to efficiently identify the supported part range. */
|
||
- tree part_cond = gimplify_build2 (
|
||
- &gsi, PLUS_EXPR, unsigned_type_node, part_base,
|
||
- build_int_cst (unsigned_type_node, 4294963967));
|
||
- gcond *cond = gimple_build_cond (LE_EXPR, part_cond,
|
||
- build_int_cst (unsigned_type_node, 2),
|
||
- NULL_TREE, NULL_TREE);
|
||
- gimple_set_location (cond, input_location);
|
||
- gsi_insert_before (&gsi, cond, GSI_SAME_STMT);
|
||
- gsi_remove (&gsi, true);
|
||
- return bb;
|
||
+ const char *fn_name = IDENTIFIER_POINTER (DECL_NAME (fn_decl));
|
||
+ return (fn_name && sizeof (fn_name) == sizeof (target)
|
||
+ && strncmp (fn_name, target, sizeof (target) - 1) == 0);
|
||
}
|
||
|
||
-static void
|
||
-create_detection_bb ()
|
||
-{
|
||
- edge old_e = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
||
- basic_block ret_bb = old_e->dest;
|
||
-
|
||
- basic_block detection_bb = create_empty_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
||
- if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father != NULL)
|
||
- {
|
||
- add_bb_to_loop (detection_bb, ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father);
|
||
- loops_state_set (LOOPS_NEED_FIXUP);
|
||
- }
|
||
- tree cpuid_decl = build_decl (input_location, VAR_DECL,
|
||
- get_identifier ("cpuid"), unsigned_type_node);
|
||
- add_local_decl (cfun, cpuid_decl);
|
||
-
|
||
- gimple_stmt_iterator gsi = gsi_last_bb (detection_bb);
|
||
- vec<tree, va_gc> *outputs = NULL;
|
||
- tree purpose = build_string (strlen ("=r"), "=r");
|
||
- tree output = build_tree_list (
|
||
- build_tree_list (NULL_TREE, purpose), cpuid_decl);
|
||
- vec_safe_push (outputs, output);
|
||
- gasm *asm_stmt = gimple_build_asm_vec (
|
||
- "mrs %0, MIDR_EL1", NULL, outputs, NULL, NULL);
|
||
- gsi_insert_after (&gsi, asm_stmt, GSI_NEW_STMT);
|
||
- gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
|
||
-
|
||
- tree implementer = gimplify_build2 (
|
||
- &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl,
|
||
- build_int_cst (unsigned_type_node, 24));
|
||
- tree part_base = gimplify_build2 (
|
||
- &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl,
|
||
- build_int_cst (unsigned_type_node, 4));
|
||
- tree part = gimplify_build2 (
|
||
- &gsi, BIT_AND_EXPR, unsigned_type_node, part_base,
|
||
- build_int_cst (unsigned_type_node, 4095));
|
||
- gcond *implementer_cond = gimple_build_cond (
|
||
- EQ_EXPR, implementer,
|
||
- build_int_cst (unsigned_type_node, 72),
|
||
- NULL_TREE, NULL_TREE);
|
||
- gimple_set_location (implementer_cond, input_location);
|
||
- gsi_insert_before (&gsi, implementer_cond, GSI_SAME_STMT);
|
||
- gsi_remove (&gsi, true);
|
||
-
|
||
- basic_block part_bb = create_part_bb (detection_bb, part);
|
||
- basic_block abort_bb = create_abort_bb (part_bb);
|
||
-
|
||
- remove_edge_raw (old_e);
|
||
- make_single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun),
|
||
- detection_bb, EDGE_FALLTHRU);
|
||
- edge etrue = make_edge (detection_bb, part_bb, EDGE_TRUE_VALUE);
|
||
- etrue->probability = profile_probability::likely ();
|
||
- edge efalse = make_edge (detection_bb, abort_bb, EDGE_FALSE_VALUE);
|
||
- efalse->probability = profile_probability::unlikely ();
|
||
- edge part_true = make_edge (part_bb, ret_bb, EDGE_TRUE_VALUE);
|
||
- part_true->probability = profile_probability::likely ();
|
||
- edge part_false = make_edge (part_bb, abort_bb, EDGE_FALSE_VALUE);
|
||
- part_false->probability = profile_probability::unlikely ();
|
||
- make_single_succ_edge (abort_bb, ret_bb, EDGE_FALLTHRU);
|
||
- if (dom_info_available_p (CDI_DOMINATORS))
|
||
- {
|
||
- set_immediate_dominator (CDI_DOMINATORS, part_bb, detection_bb);
|
||
- set_immediate_dominator (CDI_DOMINATORS, ret_bb, detection_bb);
|
||
- set_immediate_dominator (CDI_DOMINATORS, abort_bb, detection_bb);
|
||
- }
|
||
-}
|
||
|
||
const pass_data pass_data_ipa_hardware_detection =
|
||
{
|
||
@@ -176,10 +80,8 @@ bool
|
||
pass_ipa_hardware_detection::gate (function *)
|
||
{
|
||
const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
|
||
- return (ai_infer_level
|
||
- && optimize_machine > 0
|
||
- /* Only enable in lto or whole_program. */
|
||
- && (in_lto_p || flag_whole_program));
|
||
+ const char *ai_lto_option = getenv ("AI_LTO_OPTION");
|
||
+ return ((ai_lto_option || (ai_infer_level && optimize_machine > 0)) && (in_lto_p || flag_whole_program));
|
||
}
|
||
|
||
unsigned int
|
||
@@ -187,6 +89,25 @@ pass_ipa_hardware_detection::execute (function *)
|
||
{
|
||
unsigned int ret = 0;
|
||
cgraph_node *cnode;
|
||
+ gcall* call_stmt = NULL;
|
||
+ tree fntype_void_void = build_function_type_array (void_type_node, 0, NULL);
|
||
+ tree fndecl_decl = build_fn_decl ("get_ai_info", fntype_void_void);
|
||
+
|
||
+ DECL_EXTERNAL (fndecl_decl) = 1;
|
||
+ TREE_PUBLIC (fndecl_decl) = 1;
|
||
+ DECL_CONTEXT (fndecl_decl) = NULL;
|
||
+ struct cgraph_node *node = cgraph_node::create (fndecl_decl);
|
||
+
|
||
+ FOR_EACH_FUNCTION (cnode)
|
||
+ {
|
||
+ const char *func_name = IDENTIFIER_POINTER (DECL_NAME (cnode->decl));
|
||
+ if (target_func_p (cnode->decl, "get_ai_info"))
|
||
+ {
|
||
+ call_stmt = gimple_build_call (cnode->decl, 0);
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
FOR_EACH_FUNCTION (cnode)
|
||
{
|
||
if (!cnode->real_symbol_p ())
|
||
@@ -207,12 +128,10 @@ pass_ipa_hardware_detection::execute (function *)
|
||
&& MAIN_NAME_P (DECL_NAME (cnode->decl)))
|
||
{
|
||
push_cfun (fn);
|
||
- calculate_dominance_info (CDI_DOMINATORS);
|
||
-
|
||
- create_detection_bb ();
|
||
-
|
||
- cgraph_edge::rebuild_edges ();
|
||
- free_dominance_info (CDI_DOMINATORS);
|
||
+ basic_block first_block = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
||
+ gimple_stmt_iterator gsi = gsi_start_bb (first_block);
|
||
+ if (call_stmt)
|
||
+ gsi_insert_before (&gsi, call_stmt, GSI_NEW_STMT);
|
||
pop_cfun ();
|
||
}
|
||
}
|
||
diff --git a/gcc/opts-common.c b/gcc/opts-common.c
|
||
index 52e28e2dc..c6c32a366 100644
|
||
--- a/gcc/opts-common.c
|
||
+++ b/gcc/opts-common.c
|
||
@@ -1009,12 +1009,12 @@ handle_lto_option (unsigned int lang_mask,
|
||
if (strstr (lan, "gcc") != NULL)
|
||
{
|
||
opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 2);
|
||
- const char* lto_flag = "-flto=8";
|
||
+ const char* lto_flag = "-flto=auto";
|
||
decode_cmdline_option (<o_flag, lang_mask,
|
||
&opt_array[num_decoded_options]);
|
||
ret++;
|
||
- const char* ltopartition_flag = "-flto-partition=one";
|
||
- decode_cmdline_option (<opartition_flag, lang_mask,
|
||
+ const char* fat_lto_objects_flag = "-ffat-lto-objects";
|
||
+ decode_cmdline_option (&fat_lto_objects_flag, lang_mask,
|
||
&opt_array[num_decoded_options + 1]);
|
||
ret++;
|
||
}
|
||
@@ -1022,7 +1022,7 @@ handle_lto_option (unsigned int lang_mask,
|
||
|| strstr (lan, "gfortran") != NULL)
|
||
{
|
||
opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 1);
|
||
- const char* lto_flag = "-flto=8";
|
||
+ const char* lto_flag = "-flto=auto";
|
||
decode_cmdline_option (<o_flag, lang_mask,
|
||
&opt_array[num_decoded_options]);
|
||
ret++;
|
||
@@ -1040,25 +1040,42 @@ handle_machine_option (unsigned int lang_mask,
|
||
struct cl_decoded_option *&opt_array)
|
||
{
|
||
int ret = 0;
|
||
- bool flag_Om = false;
|
||
bool flag_hip09 = false;
|
||
for (unsigned i = 1; i < argc; i ++)
|
||
{
|
||
- if (strcmp (argv[i], "-Om") == 0)
|
||
- flag_Om = true;
|
||
- if (strstr (argv[i], "mcpu=hip09") != NULL)
|
||
- flag_hip09 = true;
|
||
+ if (strstr(argv[i], "mcpu=native") != NULL)
|
||
+ {
|
||
+ FILE *f = fopen("/proc/cpuinfo", "r");
|
||
+ if (f == NULL)
|
||
+ {
|
||
+ perror("Failed to open /proc/cpuinfo");
|
||
+ return -1;
|
||
+ }
|
||
+
|
||
+ char buf[256];
|
||
+
|
||
+ while (fgets(buf, sizeof(buf), f) != NULL)
|
||
+ {
|
||
+ buf[strcspn(buf, "\n")] = 0;
|
||
+ if (strstr(buf, "CPU implementer") != NULL)
|
||
+ {
|
||
+ if (strstr(buf, "0x48") != NULL)
|
||
+ {
|
||
+ flag_hip09 = true;
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ fclose(f);
|
||
+ }
|
||
}
|
||
- if (!flag_hip09 || !flag_Om)
|
||
- {
|
||
+ if (!flag_hip09)
|
||
return ret;
|
||
- }
|
||
|
||
const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
|
||
if (ai_infer_level)
|
||
- {
|
||
- return ret;
|
||
- }
|
||
+ return ret;
|
||
+
|
||
int argc_hw = 6;
|
||
int64_t argv_hw[argc_hw] = {
|
||
global_options.x_param_simultaneous_prefetches,
|
||
--
|
||
2.33.0
|
||
|