diff --git a/pyproject.toml b/pyproject.toml index 7f29dff..34d1ae5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "redisbench-admin" -version = "0.11.20" +version = "0.11.26" description = "Redis benchmark run helper. A wrapper around Redis and Redis Modules benchmark tools ( ftsb_redisearch, memtier_benchmark, redis-benchmark, aibench, etc... )." authors = ["filipecosta90 ","Redis Performance Group "] readme = "README.md" diff --git a/redisbench_admin/compare/args.py b/redisbench_admin/compare/args.py index 2fd35c5..8378fce 100644 --- a/redisbench_admin/compare/args.py +++ b/redisbench_admin/compare/args.py @@ -6,7 +6,7 @@ # environment variables import datetime - +import os from redisbench_admin.run.common import get_start_time_vars, PERFORMANCE_GH_TOKEN from redisbench_admin.utils.remote import ( PERFORMANCE_RTS_HOST, @@ -29,6 +29,10 @@ LAST_MONTH_UTC = NOW_UTC - (31 * 24 * 60 * 60 * 1000) START_TIME_NOW_UTC, _, _ = get_start_time_vars() START_TIME_LAST_MONTH_UTC = START_TIME_NOW_UTC - datetime.timedelta(days=30) +ARCH_X86 = "x86_64" +ARCH_ARM = "aarch64" +VALID_ARCHS = [ARCH_X86, ARCH_ARM] +ARCH = os.getenv("ARCH", ARCH_X86) def create_compare_arguments(parser): @@ -56,6 +60,20 @@ def create_compare_arguments(parser): parser.add_argument("--metric_name", type=str, default=None) parser.add_argument("--running_platform", type=str, default=None) parser.add_argument("--extra-filter", type=str, default=None) + parser.add_argument( + "--baseline_architecture", + type=str, + required=False, + default=ARCH, + help=f"Architecture to filter baseline time-series. One of {VALID_ARCHS}.", + ) + parser.add_argument( + "--comparison_architecture", + type=str, + required=False, + default=ARCH, + help=f"Architecture to filter comparison time-series. One of {VALID_ARCHS}.", + ) parser.add_argument( "--last_n", type=int, diff --git a/redisbench_admin/compare/compare.py b/redisbench_admin/compare/compare.py index e1743f8..f73172d 100644 --- a/redisbench_admin/compare/compare.py +++ b/redisbench_admin/compare/compare.py @@ -22,6 +22,7 @@ generate_new_pr_comment_notification, ) from redisbench_admin.utils.remote import get_overall_dashboard_keynames +from redisbench_admin.compare.args import ARCH_X86 def get_project_compare_zsets(triggering_env, org, repo): @@ -225,6 +226,8 @@ def compare_command_logic(args, project_name, project_version): "redisjson": "UErSC0jGk", "redistimeseries": "2WMw61UGz", } + baseline_architecture = args.baseline_architecture + comparison_architecture = args.comparison_architecture uid = None if tf_github_repo.lower() in grafana_dashboards_uids: uid = grafana_dashboards_uids[tf_github_repo.lower()] @@ -274,6 +277,8 @@ def compare_command_logic(args, project_name, project_version): to_ts_ms, use_metric_context_path, running_platform, + baseline_architecture, + comparison_architecture, ) comment_body = "" if total_comparison_points > 0: @@ -498,6 +503,8 @@ def compute_regression_table( to_ts_ms=None, use_metric_context_path=None, running_platform=None, + baseline_architecture=ARCH_X86, + comparison_architecture=ARCH_X86, ): START_TIME_NOW_UTC, _, _ = get_start_time_vars() START_TIME_LAST_MONTH_UTC = START_TIME_NOW_UTC - datetime.timedelta(days=31) @@ -584,6 +591,8 @@ def compute_regression_table( tf_triggering_env, verbose, running_platform, + baseline_architecture, + comparison_architecture, ) logging.info( "Printing differential analysis between {} and {}".format( @@ -711,6 +720,8 @@ def from_rts_to_regression_table( tf_triggering_env, verbose, running_platform=None, + baseline_architecture=ARCH_X86, + comparison_architecture=ARCH_X86, ): print_all = print_regressions_only is False and print_improvements_only is False table = [] @@ -735,6 +746,8 @@ def from_rts_to_regression_table( ] if running_platform is not None: filters_baseline.append("running_platform={}".format(running_platform)) + if baseline_architecture != ARCH_X86: + filters_baseline.append(f"arch={baseline_architecture}") filters_comparison = [ "{}={}".format(by_str_comparison, comparison_str), "metric={}".format(metric_name), @@ -744,6 +757,8 @@ def from_rts_to_regression_table( ] if running_platform is not None: filters_comparison.append("running_platform={}".format(running_platform)) + if comparison_architecture != ARCH_X86: + filters_comparison.append(f"arch={comparison_architecture}") baseline_timeseries = rts.ts().queryindex(filters_baseline) comparison_timeseries = rts.ts().queryindex(filters_comparison) diff --git a/redisbench_admin/run/args.py b/redisbench_admin/run/args.py index 7e2316c..993591a 100644 --- a/redisbench_admin/run/args.py +++ b/redisbench_admin/run/args.py @@ -44,9 +44,20 @@ ALLOWED_TOOLS_DEFAULT = "memtier_benchmark,redis-benchmark,redisgraph-benchmark-go,ycsb,go-ycsb,tsbs_run_queries_redistimeseries,tsbs_load_redistimeseries,ftsb_redisearch,aibench_run_inference_redisai_vision,ann-benchmarks" ALLOWED_BENCH_TOOLS = os.getenv("ALLOWED_BENCH_TOOLS", ALLOWED_TOOLS_DEFAULT) SKIP_DB_SETUP = bool(int(os.getenv("SKIP_DB_SETUP", "0"))) +ARCH_X86 = "x86_64" +ARCH_ARM = "aarch64" +VALID_ARCHS = [ARCH_X86, ARCH_ARM] +ARCH = os.getenv("ARCH", ARCH_X86) def common_run_args(parser): + parser.add_argument( + "--architecture", + type=str, + required=False, + default=ARCH, + help=f"Architecture to run the benchmark on. One of {VALID_ARCHS}.", + ) parser.add_argument( "--keep_env_and_topo", required=False, diff --git a/redisbench_admin/run/common.py b/redisbench_admin/run/common.py index 57dd252..7177d0a 100644 --- a/redisbench_admin/run/common.py +++ b/redisbench_admin/run/common.py @@ -702,45 +702,54 @@ def dso_check(dso, local_module_file): def dbconfig_keyspacelen_check( - benchmark_config, redis_conns, ignore_keyspace_errors=False + benchmark_config, redis_conns, ignore_keyspace_errors=False, timeout=60 ): - result = True + start_time = time.time() ( requires_keyspacelen_check, keyspacelen, ) = check_dbconfig_keyspacelen_requirement(benchmark_config) - if requires_keyspacelen_check: - result = False + + if not requires_keyspacelen_check: + return True + + attempt = 0 + while time.time() - start_time < timeout: logging.info( - "Ensuring keyspace length requirement = {} is met.".format(keyspacelen) + f"Ensuring keyspace length requirement = {keyspacelen} is met. attempt #{attempt + 1}" ) total_keys = 0 for shard_conn in redis_conns: keyspace_dict = shard_conn.info("keyspace") for _, dbdict in keyspace_dict.items(): - shard_keys = dbdict["keys"] - total_keys += shard_keys + total_keys += dbdict.get("keys", 0) if total_keys == keyspacelen: logging.info( - "The total numbers of keys in setup matches the expected spec: {}=={}".format( + "The total number of keys in setup matches the expected spec: {} == {}".format( keyspacelen, total_keys ) ) - result = True - else: - logging.error( - "The total numbers of keys in setup does not match the expected spec: {}!={}. Aborting...".format( - keyspacelen, total_keys - ) + return True + + logging.warning( + "Keyspace length mismatch ({} != {}). Retrying in {} seconds...".format( + total_keys, keyspacelen, 2**attempt ) - if ignore_keyspace_errors is False: - raise Exception( - "The total numbers of keys in setup does not match the expected spec: {}!={}. Aborting...".format( - keyspacelen, total_keys - ) - ) - return result + ) + time.sleep(2**attempt) # Exponential backoff + attempt += 1 + + logging.error( + f"The total number of keys in setup does not match the expected spec: {keyspacelen} != {total_keys}. Aborting after {attempt + 1} tries..." + ) + + if not ignore_keyspace_errors: + raise Exception( + f"The total number of keys in setup does not match the expected spec: {keyspacelen} != {total_keys}. Aborting after {attempt + 1} tries..." + ) + + return False def common_properties_log( diff --git a/redisbench_admin/run/ftsb/ftsb.py b/redisbench_admin/run/ftsb/ftsb.py index f01bfc5..7ab2736 100644 --- a/redisbench_admin/run/ftsb/ftsb.py +++ b/redisbench_admin/run/ftsb/ftsb.py @@ -29,7 +29,6 @@ def prepare_ftsb_benchmark_command( :return: [string] containing the required command to run the benchmark given the configurations """ command_arr = [executable_path] - command_arr.extend( ["--host", "{}:{}".format(server_private_ip, server_plaintext_port)] ) diff --git a/redisbench_admin/run/run.py b/redisbench_admin/run/run.py index 25640e5..d7004cc 100644 --- a/redisbench_admin/run/run.py +++ b/redisbench_admin/run/run.py @@ -57,9 +57,13 @@ def define_benchmark_plan(benchmark_definitions, default_specs): benchmark_runs_plan[benchmark_type] = {} # extract dataset-name - benchmark_contains_dbconfig, dataset_name, _, _, _ = ( - extract_redis_dbconfig_parameters(benchmark_config, "dbconfig") - ) + ( + benchmark_contains_dbconfig, + dataset_name, + _, + _, + _, + ) = extract_redis_dbconfig_parameters(benchmark_config, "dbconfig") logging.info( f"Benchmark contains specific dbconfig on test {test_name}: {benchmark_contains_dbconfig}" ) diff --git a/redisbench_admin/run_local/local_db.py b/redisbench_admin/run_local/local_db.py index 3a36c16..80b1571 100644 --- a/redisbench_admin/run_local/local_db.py +++ b/redisbench_admin/run_local/local_db.py @@ -158,7 +158,7 @@ def local_db_spin( logging.info("Skipping DB spin step...") if setup_type == "oss-standalone": - r = redis.Redis(port=args.port, host=args.host) + r = redis.Redis(port=args.port, host=args.host, password=args.password) r.ping() r.client_setname("redisbench-admin-standalone") redis_conns.append(r) @@ -192,6 +192,11 @@ def local_db_spin( benchmark_tool_workdir, cluster_api_enabled, "dbconfig", + None, + None, + None, + None, + args.password, ) # run the benchmark @@ -206,7 +211,6 @@ def local_db_spin( load_via_benchmark_duration_seconds ) ) - dbconfig_keyspacelen_check(benchmark_config, redis_conns, ignore_keyspace_errors) artifact_version = run_redis_pre_steps( diff --git a/redisbench_admin/run_remote/remote_db.py b/redisbench_admin/run_remote/remote_db.py index 00741ee..0c9c79e 100644 --- a/redisbench_admin/run_remote/remote_db.py +++ b/redisbench_admin/run_remote/remote_db.py @@ -103,6 +103,7 @@ def remote_db_spin( flushall_on_every_test_start=False, ignore_keyspace_errors=False, continue_on_module_check_error=False, + keyspace_check_timeout=60, ): ( _, @@ -372,6 +373,7 @@ def remote_db_spin( benchmark_config, redis_conns, ignore_keyspace_errors, + keyspace_check_timeout, ) artifact_version = run_redis_pre_steps( benchmark_config, redis_conns[0], required_modules diff --git a/redisbench_admin/run_remote/remote_env.py b/redisbench_admin/run_remote/remote_env.py index 65e232a..6c062a3 100644 --- a/redisbench_admin/run_remote/remote_env.py +++ b/redisbench_admin/run_remote/remote_env.py @@ -7,6 +7,7 @@ from python_terraform import TerraformCommandError +from redisbench_admin.run.args import ARCH_X86 from redisbench_admin.run_remote.terraform import ( retrieve_inventory_info, terraform_spin_or_reuse_env, @@ -33,12 +34,18 @@ def remote_env_setup( spot_instance_error=False, spot_price_counter=0, full_price_counter=0, + architecture=ARCH_X86, ): server_plaintext_port = args.db_port db_ssh_port = args.db_ssh_port client_ssh_port = args.client_ssh_port username = args.user + + logging.info(f"specified arch for deployment {architecture}") if args.inventory is not None: + logging.info( + f"inventory info passed. avoiding to deploy using terraform {args.inventory}" + ) ( status, client_public_ip, @@ -85,6 +92,7 @@ def remote_env_setup( tf_timeout_secs, tf_override_name, tf_folder_spot_path, + architecture, ) spot_available_and_used = True spot_price_counter = spot_price_counter + 1 @@ -123,6 +131,7 @@ def remote_env_setup( tf_timeout_secs, tf_override_name, tf_folder_path, + architecture, ) full_price_counter = full_price_counter + 1 logging.info("Using the following connection addresses.") diff --git a/redisbench_admin/run_remote/run_remote.py b/redisbench_admin/run_remote/run_remote.py index fd57348..bf16d48 100644 --- a/redisbench_admin/run_remote/run_remote.py +++ b/redisbench_admin/run_remote/run_remote.py @@ -21,7 +21,7 @@ PerfDaemonRemoteCaller, PERF_DAEMON_LOGNAME, ) -from redisbench_admin.run.args import PROFILE_FREQ +from redisbench_admin.run.args import PROFILE_FREQ, VALID_ARCHS from redisbench_admin.run.common import ( get_start_time_vars, BENCHMARK_REPETITIONS, @@ -300,6 +300,14 @@ def run_remote_command_logic(args, project_name, project_version): benchmark_artifacts_table_name = "Benchmark client artifacts" benchmark_artifacts_table_headers = ["Setup", "Test-case", "Artifact", "link"] benchmark_artifacts_links = [] + architecture = args.architecture + if architecture not in VALID_ARCHS: + logging.critical( + f"The specified architecture {architecture} is not valid. Specify one of {VALID_ARCHS}" + ) + exit(1) + else: + logging.info(f"Running benchmark for architecture {architecture}") # contains the overall target-tables ( if any target is defined ) overall_tables = {} @@ -441,6 +449,7 @@ def run_remote_command_logic(args, project_name, project_version): spot_instance_error, 0, 0, + architecture, ) # after we've created the env, even on error we should always teardown diff --git a/redisbench_admin/run_remote/terraform.py b/redisbench_admin/run_remote/terraform.py index 5aab403..3497b23 100644 --- a/redisbench_admin/run_remote/terraform.py +++ b/redisbench_admin/run_remote/terraform.py @@ -30,6 +30,7 @@ def terraform_spin_or_reuse_env( tf_timeout_secs=7200, tf_override_name=None, tf_folder_path=None, + architecture="x86_64", ): ( remote_setup, @@ -40,11 +41,10 @@ def terraform_spin_or_reuse_env( "https://github.com/redis-performance/testing-infrastructure.git", "master", tf_folder_path, + architecture, ) logging.info( - "Repetition {} of {}. Deploying test {} on AWS using {}".format( - repetition, BENCHMARK_REPETITIONS, test_name, remote_setup - ) + f"Repetition {repetition} of {BENCHMARK_REPETITIONS}. Deploying test {test_name} on AWS using (architecture={architecture}) {remote_setup}" ) if tf_override_name is None: tf_setup_name = "{}{}".format(remote_setup, tf_setup_name_sufix) diff --git a/redisbench_admin/utils/remote.py b/redisbench_admin/utils/remote.py index 68846ab..e60c8bf 100644 --- a/redisbench_admin/utils/remote.py +++ b/redisbench_admin/utils/remote.py @@ -29,6 +29,12 @@ EC2_ACCESS_KEY, ) +ARCH_X86 = "x86_64" +ARCH_ARM = "aarch64" +VALID_ARCHS = [ARCH_X86, ARCH_ARM] +ARCH = os.getenv("ARCH", ARCH_X86) + + # environment variables PERFORMANCE_RTS_PUSH = bool(int(os.getenv("PUSH_RTS", "0"))) PERFORMANCE_RTS_AUTH = os.getenv("PERFORMANCE_RTS_AUTH", None) @@ -270,7 +276,9 @@ def setup_remote_environment( _, _, _ = tf.init( capture_output=True, backend_config={ - "key": "benchmarks/infrastructure/{}.tfstate".format(tf_setup_name) + "key": "benchmarks/infrastructure/{}.tfstate".format( + tf_setup_name.replace("/", "-") + ) }, ) _, _, _ = tf.refresh() @@ -573,8 +581,10 @@ def fetch_remote_setup_from_config( repo="https://github.com/redis-performance/testing-infrastructure.git", branch="master", path=None, + architecture=ARCH_X86, ): setup_type = "oss-standalone" + logging.info(f"fetch_remote_setup_from_config, architecture={architecture}") setup = None if path is None: for remote_setup_property in remote_setup_config: @@ -584,7 +594,17 @@ def fetch_remote_setup_from_config( setup = remote_setup_property["setup"] # fetch terraform folder path = "/terraform/{}-{}".format(setup_type, setup) + if architecture != ARCH_X86: + logging.info( + f"Checking if the architecture info is specified on the terraform path {path}" + ) + if architecture == ARCH_ARM and ARCH_ARM not in path: + logging.info(f"adding suffix '-{ARCH_ARM}' to {path}") + path = f"{path}-{ARCH_ARM}" + else: + logging.info(f"'-{ARCH_ARM}' suffix already in {path}") terraform_working_dir = common_tf(branch, path, repo) + return terraform_working_dir, setup_type, setup @@ -929,6 +949,7 @@ def get_ts_tags_and_name( tf_github_repo, tf_triggering_env, use_metric_context_path, + arch=ARCH_X86, ): # prepare tags timeserie_tags = get_project_ts_tags( @@ -956,6 +977,7 @@ def get_ts_tags_and_name( ) timeserie_tags["metric"] = str(metric_name) timeserie_tags["metric_name"] = metric_name + timeserie_tags["arch"] = arch timeserie_tags["metric_context_path"] = metric_context_path if metric_context_path is not None: timeserie_tags["test_name:metric_context_path"] = "{}:{}".format( @@ -978,6 +1000,7 @@ def get_ts_tags_and_name( use_metric_context_path, build_variant_name, running_platform, + arch, ) return timeserie_tags, ts_name diff --git a/redisbench_admin/utils/utils.py b/redisbench_admin/utils/utils.py index fe2b1c4..b86fcb7 100644 --- a/redisbench_admin/utils/utils.py +++ b/redisbench_admin/utils/utils.py @@ -331,6 +331,7 @@ def get_ts_metric_name( use_metric_context_path=False, build_variant_name=None, running_platform=None, + arch="x86_64", ): if use_metric_context_path: metric_name = "{}/{}".format(metric_name, metric_context_path) @@ -361,6 +362,11 @@ def get_ts_metric_name( metric=metric_name, ) ) + if arch != "x86_64": + logging.info( + f"Extending timeseries name with architecture given it's not x86. arch={arch}" + ) + ts_name = ts_name + f"arch={arch}" return ts_name diff --git a/tests/test_remote.py b/tests/test_remote.py index 680cba2..96afe7d 100644 --- a/tests/test_remote.py +++ b/tests/test_remote.py @@ -106,6 +106,22 @@ def test_fetch_remote_setup_from_config(): assert type == "oss-standalone" +def test_fetch_remote_setup_from_config_aarch64(): + architecture = "aarch64" + path = None + branch = "master" + repo = "https://github.com/redis-performance/testing-infrastructure.git" + terraform_working_dir, type, _ = fetch_remote_setup_from_config( + [{"type": "oss-standalone"}, {"setup": "redisearch-m5"}], + repo, + branch, + path, + architecture, + ) + assert type == "oss-standalone" + assert terraform_working_dir.endswith("/oss-standalone-redisearch-m5-aarch64") + + def test_push_data_to_redistimeseries(): time_series_dict = {} try: