1*da0073e9SAndroid Build Coastguard Workerfrom __future__ import annotations 2*da0073e9SAndroid Build Coastguard Worker 3*da0073e9SAndroid Build Coastguard Workerimport datetime 4*da0073e9SAndroid Build Coastguard Workerimport inspect 5*da0073e9SAndroid Build Coastguard Workerimport os 6*da0073e9SAndroid Build Coastguard Workerimport time 7*da0073e9SAndroid Build Coastguard Workerimport uuid 8*da0073e9SAndroid Build Coastguard Workerfrom decimal import Decimal 9*da0073e9SAndroid Build Coastguard Workerfrom typing import Any 10*da0073e9SAndroid Build Coastguard Workerfrom warnings import warn 11*da0073e9SAndroid Build Coastguard Worker 12*da0073e9SAndroid Build Coastguard Worker 13*da0073e9SAndroid Build Coastguard Worker# boto3 is an optional dependency. If it's not installed, 14*da0073e9SAndroid Build Coastguard Worker# we'll just not emit the metrics. 15*da0073e9SAndroid Build Coastguard Worker# Keeping this logic here so that callers don't have to 16*da0073e9SAndroid Build Coastguard Worker# worry about it. 17*da0073e9SAndroid Build Coastguard WorkerEMIT_METRICS = False 18*da0073e9SAndroid Build Coastguard Workertry: 19*da0073e9SAndroid Build Coastguard Worker import boto3 # type: ignore[import] 20*da0073e9SAndroid Build Coastguard Worker 21*da0073e9SAndroid Build Coastguard Worker EMIT_METRICS = True 22*da0073e9SAndroid Build Coastguard Workerexcept ImportError as e: 23*da0073e9SAndroid Build Coastguard Worker print(f"Unable to import boto3. Will not be emitting metrics.... Reason: {e}") 24*da0073e9SAndroid Build Coastguard Worker 25*da0073e9SAndroid Build Coastguard Worker# Sometimes our runner machines are located in one AWS account while the metrics table may be in 26*da0073e9SAndroid Build Coastguard Worker# another, so we need to specify the table's ARN explicitly. 27*da0073e9SAndroid Build Coastguard WorkerTORCHCI_METRICS_TABLE_ARN = ( 28*da0073e9SAndroid Build Coastguard Worker "arn:aws:dynamodb:us-east-1:308535385114:table/torchci-metrics" 29*da0073e9SAndroid Build Coastguard Worker) 30*da0073e9SAndroid Build Coastguard Worker 31*da0073e9SAndroid Build Coastguard Worker 32*da0073e9SAndroid Build Coastguard Workerclass EnvVarMetric: 33*da0073e9SAndroid Build Coastguard Worker name: str 34*da0073e9SAndroid Build Coastguard Worker env_var: str 35*da0073e9SAndroid Build Coastguard Worker required: bool = True 36*da0073e9SAndroid Build Coastguard Worker # Used to cast the value of the env_var to the correct type (defaults to str) 37*da0073e9SAndroid Build Coastguard Worker type_conversion_fn: Any = None 38*da0073e9SAndroid Build Coastguard Worker 39*da0073e9SAndroid Build Coastguard Worker def __init__( 40*da0073e9SAndroid Build Coastguard Worker self, 41*da0073e9SAndroid Build Coastguard Worker name: str, 42*da0073e9SAndroid Build Coastguard Worker env_var: str, 43*da0073e9SAndroid Build Coastguard Worker required: bool = True, 44*da0073e9SAndroid Build Coastguard Worker type_conversion_fn: Any = None, 45*da0073e9SAndroid Build Coastguard Worker ) -> None: 46*da0073e9SAndroid Build Coastguard Worker self.name = name 47*da0073e9SAndroid Build Coastguard Worker self.env_var = env_var 48*da0073e9SAndroid Build Coastguard Worker self.required = required 49*da0073e9SAndroid Build Coastguard Worker self.type_conversion_fn = type_conversion_fn 50*da0073e9SAndroid Build Coastguard Worker 51*da0073e9SAndroid Build Coastguard Worker def value(self) -> Any: 52*da0073e9SAndroid Build Coastguard Worker value = os.environ.get(self.env_var) 53*da0073e9SAndroid Build Coastguard Worker 54*da0073e9SAndroid Build Coastguard Worker # Github CI will set some env vars to an empty string 55*da0073e9SAndroid Build Coastguard Worker DEFAULT_ENVVAR_VALUES = [None, ""] 56*da0073e9SAndroid Build Coastguard Worker if value in DEFAULT_ENVVAR_VALUES: 57*da0073e9SAndroid Build Coastguard Worker if not self.required: 58*da0073e9SAndroid Build Coastguard Worker return None 59*da0073e9SAndroid Build Coastguard Worker 60*da0073e9SAndroid Build Coastguard Worker raise ValueError( 61*da0073e9SAndroid Build Coastguard Worker f"Missing {self.name}. Please set the {self.env_var} " 62*da0073e9SAndroid Build Coastguard Worker "environment variable to pass in this value." 63*da0073e9SAndroid Build Coastguard Worker ) 64*da0073e9SAndroid Build Coastguard Worker 65*da0073e9SAndroid Build Coastguard Worker if self.type_conversion_fn: 66*da0073e9SAndroid Build Coastguard Worker return self.type_conversion_fn(value) 67*da0073e9SAndroid Build Coastguard Worker return value 68*da0073e9SAndroid Build Coastguard Worker 69*da0073e9SAndroid Build Coastguard Worker 70*da0073e9SAndroid Build Coastguard Workerglobal_metrics: dict[str, Any] = {} 71*da0073e9SAndroid Build Coastguard Worker 72*da0073e9SAndroid Build Coastguard Worker 73*da0073e9SAndroid Build Coastguard Workerdef add_global_metric(metric_name: str, metric_value: Any) -> None: 74*da0073e9SAndroid Build Coastguard Worker """ 75*da0073e9SAndroid Build Coastguard Worker Adds stats that should be emitted with every metric by the current process. 76*da0073e9SAndroid Build Coastguard Worker If the emit_metrics method specifies a metric with the same name, it will 77*da0073e9SAndroid Build Coastguard Worker overwrite this value. 78*da0073e9SAndroid Build Coastguard Worker """ 79*da0073e9SAndroid Build Coastguard Worker global_metrics[metric_name] = metric_value 80*da0073e9SAndroid Build Coastguard Worker 81*da0073e9SAndroid Build Coastguard Worker 82*da0073e9SAndroid Build Coastguard Workerdef emit_metric( 83*da0073e9SAndroid Build Coastguard Worker metric_name: str, 84*da0073e9SAndroid Build Coastguard Worker metrics: dict[str, Any], 85*da0073e9SAndroid Build Coastguard Worker) -> None: 86*da0073e9SAndroid Build Coastguard Worker """ 87*da0073e9SAndroid Build Coastguard Worker Upload a metric to DynamoDB (and from there, Rockset). 88*da0073e9SAndroid Build Coastguard Worker 89*da0073e9SAndroid Build Coastguard Worker Even if EMIT_METRICS is set to False, this function will still run the code to 90*da0073e9SAndroid Build Coastguard Worker validate and shape the metrics, skipping just the upload. 91*da0073e9SAndroid Build Coastguard Worker 92*da0073e9SAndroid Build Coastguard Worker Parameters: 93*da0073e9SAndroid Build Coastguard Worker metric_name: 94*da0073e9SAndroid Build Coastguard Worker Name of the metric. Every unique metric should have a different name 95*da0073e9SAndroid Build Coastguard Worker and be emitted just once per run attempt. 96*da0073e9SAndroid Build Coastguard Worker Metrics are namespaced by their module and the function that emitted them. 97*da0073e9SAndroid Build Coastguard Worker metrics: The actual data to record. 98*da0073e9SAndroid Build Coastguard Worker 99*da0073e9SAndroid Build Coastguard Worker Some default values are populated from environment variables, which must be set 100*da0073e9SAndroid Build Coastguard Worker for metrics to be emitted. (If they're not set, this function becomes a noop): 101*da0073e9SAndroid Build Coastguard Worker """ 102*da0073e9SAndroid Build Coastguard Worker 103*da0073e9SAndroid Build Coastguard Worker if metrics is None: 104*da0073e9SAndroid Build Coastguard Worker raise ValueError("You didn't ask to upload any metrics!") 105*da0073e9SAndroid Build Coastguard Worker 106*da0073e9SAndroid Build Coastguard Worker # Merge the given metrics with the global metrics, overwriting any duplicates 107*da0073e9SAndroid Build Coastguard Worker # with the given metrics. 108*da0073e9SAndroid Build Coastguard Worker metrics = {**global_metrics, **metrics} 109*da0073e9SAndroid Build Coastguard Worker 110*da0073e9SAndroid Build Coastguard Worker # We use these env vars that to determine basic info about the workflow run. 111*da0073e9SAndroid Build Coastguard Worker # By using env vars, we don't have to pass this info around to every function. 112*da0073e9SAndroid Build Coastguard Worker # It also helps ensure that we only emit metrics during CI 113*da0073e9SAndroid Build Coastguard Worker env_var_metrics = [ 114*da0073e9SAndroid Build Coastguard Worker EnvVarMetric("repo", "GITHUB_REPOSITORY"), 115*da0073e9SAndroid Build Coastguard Worker EnvVarMetric("workflow", "GITHUB_WORKFLOW"), 116*da0073e9SAndroid Build Coastguard Worker EnvVarMetric("build_environment", "BUILD_ENVIRONMENT", required=False), 117*da0073e9SAndroid Build Coastguard Worker EnvVarMetric("job", "GITHUB_JOB"), 118*da0073e9SAndroid Build Coastguard Worker EnvVarMetric("test_config", "TEST_CONFIG", required=False), 119*da0073e9SAndroid Build Coastguard Worker EnvVarMetric("pr_number", "PR_NUMBER", required=False, type_conversion_fn=int), 120*da0073e9SAndroid Build Coastguard Worker EnvVarMetric("run_id", "GITHUB_RUN_ID", type_conversion_fn=int), 121*da0073e9SAndroid Build Coastguard Worker EnvVarMetric("run_number", "GITHUB_RUN_NUMBER", type_conversion_fn=int), 122*da0073e9SAndroid Build Coastguard Worker EnvVarMetric("run_attempt", "GITHUB_RUN_ATTEMPT", type_conversion_fn=int), 123*da0073e9SAndroid Build Coastguard Worker EnvVarMetric("job_id", "JOB_ID", type_conversion_fn=int), 124*da0073e9SAndroid Build Coastguard Worker EnvVarMetric("job_name", "JOB_NAME"), 125*da0073e9SAndroid Build Coastguard Worker ] 126*da0073e9SAndroid Build Coastguard Worker 127*da0073e9SAndroid Build Coastguard Worker # Use info about the function that invoked this one as a namespace and a way to filter metrics. 128*da0073e9SAndroid Build Coastguard Worker calling_frame = inspect.currentframe().f_back # type: ignore[union-attr] 129*da0073e9SAndroid Build Coastguard Worker calling_frame_info = inspect.getframeinfo(calling_frame) # type: ignore[arg-type] 130*da0073e9SAndroid Build Coastguard Worker calling_file = os.path.basename(calling_frame_info.filename) 131*da0073e9SAndroid Build Coastguard Worker calling_module = inspect.getmodule(calling_frame).__name__ # type: ignore[union-attr] 132*da0073e9SAndroid Build Coastguard Worker calling_function = calling_frame_info.function 133*da0073e9SAndroid Build Coastguard Worker 134*da0073e9SAndroid Build Coastguard Worker try: 135*da0073e9SAndroid Build Coastguard Worker reserved_metrics = { 136*da0073e9SAndroid Build Coastguard Worker "metric_name": metric_name, 137*da0073e9SAndroid Build Coastguard Worker "calling_file": calling_file, 138*da0073e9SAndroid Build Coastguard Worker "calling_module": calling_module, 139*da0073e9SAndroid Build Coastguard Worker "calling_function": calling_function, 140*da0073e9SAndroid Build Coastguard Worker "timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f"), 141*da0073e9SAndroid Build Coastguard Worker **{m.name: m.value() for m in env_var_metrics if m.value()}, 142*da0073e9SAndroid Build Coastguard Worker } 143*da0073e9SAndroid Build Coastguard Worker except ValueError as e: 144*da0073e9SAndroid Build Coastguard Worker warn(f"Not emitting metrics for {metric_name}. {e}") 145*da0073e9SAndroid Build Coastguard Worker return 146*da0073e9SAndroid Build Coastguard Worker 147*da0073e9SAndroid Build Coastguard Worker # Prefix key with metric name and timestamp to derisk chance of a uuid1 name collision 148*da0073e9SAndroid Build Coastguard Worker reserved_metrics[ 149*da0073e9SAndroid Build Coastguard Worker "dynamo_key" 150*da0073e9SAndroid Build Coastguard Worker ] = f"{metric_name}_{int(time.time())}_{uuid.uuid1().hex}" 151*da0073e9SAndroid Build Coastguard Worker 152*da0073e9SAndroid Build Coastguard Worker # Ensure the metrics dict doesn't contain any reserved keys 153*da0073e9SAndroid Build Coastguard Worker for key in reserved_metrics.keys(): 154*da0073e9SAndroid Build Coastguard Worker used_reserved_keys = [k for k in metrics.keys() if k == key] 155*da0073e9SAndroid Build Coastguard Worker if used_reserved_keys: 156*da0073e9SAndroid Build Coastguard Worker raise ValueError(f"Metrics dict contains reserved keys: [{', '.join(key)}]") 157*da0073e9SAndroid Build Coastguard Worker 158*da0073e9SAndroid Build Coastguard Worker # boto3 doesn't support uploading float values to DynamoDB, so convert them all to decimals. 159*da0073e9SAndroid Build Coastguard Worker metrics = _convert_float_values_to_decimals(metrics) 160*da0073e9SAndroid Build Coastguard Worker 161*da0073e9SAndroid Build Coastguard Worker if EMIT_METRICS: 162*da0073e9SAndroid Build Coastguard Worker try: 163*da0073e9SAndroid Build Coastguard Worker session = boto3.Session(region_name="us-east-1") 164*da0073e9SAndroid Build Coastguard Worker session.resource("dynamodb").Table(TORCHCI_METRICS_TABLE_ARN).put_item( 165*da0073e9SAndroid Build Coastguard Worker Item={ 166*da0073e9SAndroid Build Coastguard Worker **reserved_metrics, 167*da0073e9SAndroid Build Coastguard Worker **metrics, 168*da0073e9SAndroid Build Coastguard Worker } 169*da0073e9SAndroid Build Coastguard Worker ) 170*da0073e9SAndroid Build Coastguard Worker except Exception as e: 171*da0073e9SAndroid Build Coastguard Worker # We don't want to fail the job if we can't upload the metric. 172*da0073e9SAndroid Build Coastguard Worker # We still raise the ValueErrors outside this try block since those indicate improperly configured metrics 173*da0073e9SAndroid Build Coastguard Worker warn(f"Error uploading metric {metric_name} to DynamoDB: {e}") 174*da0073e9SAndroid Build Coastguard Worker return 175*da0073e9SAndroid Build Coastguard Worker else: 176*da0073e9SAndroid Build Coastguard Worker print(f"Not emitting metrics for {metric_name}. Boto wasn't imported.") 177*da0073e9SAndroid Build Coastguard Worker 178*da0073e9SAndroid Build Coastguard Worker 179*da0073e9SAndroid Build Coastguard Workerdef _convert_float_values_to_decimals(data: dict[str, Any]) -> dict[str, Any]: 180*da0073e9SAndroid Build Coastguard Worker # Attempt to recurse 181*da0073e9SAndroid Build Coastguard Worker def _helper(o: Any) -> Any: 182*da0073e9SAndroid Build Coastguard Worker if isinstance(o, float): 183*da0073e9SAndroid Build Coastguard Worker return Decimal(str(o)) 184*da0073e9SAndroid Build Coastguard Worker if isinstance(o, list): 185*da0073e9SAndroid Build Coastguard Worker return [_helper(v) for v in o] 186*da0073e9SAndroid Build Coastguard Worker if isinstance(o, dict): 187*da0073e9SAndroid Build Coastguard Worker return {_helper(k): _helper(v) for k, v in o.items()} 188*da0073e9SAndroid Build Coastguard Worker if isinstance(o, tuple): 189*da0073e9SAndroid Build Coastguard Worker return tuple(_helper(v) for v in o) 190*da0073e9SAndroid Build Coastguard Worker return o 191*da0073e9SAndroid Build Coastguard Worker 192*da0073e9SAndroid Build Coastguard Worker return {k: _helper(v) for k, v in data.items()} 193