xref: /aosp_15_r20/external/cronet/build/android/resource_sizes.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1#!/usr/bin/env vpython3
2# Copyright 2011 The Chromium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Reports binary size metrics for an APK.
7
8More information at //docs/speed/binary_size/metrics.md.
9"""
10
11
12import argparse
13import collections
14from contextlib import contextmanager
15import json
16import logging
17import os
18import posixpath
19import re
20import struct
21import sys
22import tempfile
23import zipfile
24import zlib
25
26import devil_chromium
27from devil.android.sdk import build_tools
28from devil.utils import cmd_helper
29from devil.utils import lazy
30import method_count
31from pylib import constants
32from pylib.constants import host_paths
33
34_AAPT_PATH = lazy.WeakConstant(lambda: build_tools.GetPath('aapt'))
35_ANDROID_UTILS_PATH = os.path.join(host_paths.DIR_SOURCE_ROOT, 'build',
36                                   'android', 'gyp')
37_BUILD_UTILS_PATH = os.path.join(host_paths.DIR_SOURCE_ROOT, 'build', 'util')
38_READOBJ_PATH = os.path.join(host_paths.DIR_SOURCE_ROOT, 'third_party',
39                             'llvm-build', 'Release+Asserts', 'bin',
40                             'llvm-readobj')
41
42with host_paths.SysPath(host_paths.BUILD_COMMON_PATH):
43  import perf_tests_results_helper  # pylint: disable=import-error
44
45with host_paths.SysPath(host_paths.TRACING_PATH):
46  from tracing.value import convert_chart_json  # pylint: disable=import-error
47
48with host_paths.SysPath(_ANDROID_UTILS_PATH, 0):
49  from util import build_utils  # pylint: disable=import-error
50
51with host_paths.SysPath(_BUILD_UTILS_PATH, 0):
52  from lib.results import result_sink  # pylint: disable=import-error
53  from lib.results import result_types  # pylint: disable=import-error
54
55# Captures an entire config from aapt output.
56_AAPT_CONFIG_PATTERN = r'config %s:(.*?)config [a-zA-Z-]+:'
57# Matches string resource entries from aapt output.
58_AAPT_ENTRY_RE = re.compile(
59    r'resource (?P<id>\w{10}) [\w\.]+:string/.*?"(?P<val>.+?)"', re.DOTALL)
60_BASE_CHART = {
61    'format_version': '0.1',
62    'benchmark_name': 'resource_sizes',
63    'benchmark_description': 'APK resource size information.',
64    'trace_rerun_options': [],
65    'charts': {}
66}
67# Macro definitions look like (something, 123) when
68# enable_resource_allowlist_generation=true.
69_RC_HEADER_RE = re.compile(r'^#define (?P<name>\w+).* (?P<id>\d+)\)?$')
70_RE_NON_LANGUAGE_PAK = re.compile(r'^assets/.*(resources|percent)\.pak$')
71_READELF_SIZES_METRICS = {
72    'text': ['.text'],
73    'data': ['.data', '.rodata', '.data.rel.ro', '.data.rel.ro.local'],
74    'relocations':
75    ['.rel.dyn', '.rel.plt', '.rela.dyn', '.rela.plt', '.relr.dyn'],
76    'unwind': [
77        '.ARM.extab', '.ARM.exidx', '.eh_frame', '.eh_frame_hdr',
78        '.ARM.exidxsentinel_section_after_text'
79    ],
80    'symbols': [
81        '.dynsym', '.dynstr', '.dynamic', '.shstrtab', '.got', '.plt', '.iplt',
82        '.got.plt', '.hash', '.gnu.hash'
83    ],
84    'other': [
85        '.init_array', '.preinit_array', '.ctors', '.fini_array', '.comment',
86        '.note.gnu.gold-version', '.note.crashpad.info', '.note.android.ident',
87        '.ARM.attributes', '.note.gnu.build-id', '.gnu.version',
88        '.gnu.version_d', '.gnu.version_r', '.interp', '.gcc_except_table',
89        '.note.gnu.property'
90    ]
91}
92
93
94class _AccumulatingReporter:
95  def __init__(self):
96    self._combined_metrics = collections.defaultdict(int)
97
98  def __call__(self, graph_title, trace_title, value, units):
99    self._combined_metrics[(graph_title, trace_title, units)] += value
100
101  def DumpReports(self, report_func):
102    for (graph_title, trace_title,
103         units), value in sorted(self._combined_metrics.items()):
104      report_func(graph_title, trace_title, value, units)
105
106
107class _ChartJsonReporter(_AccumulatingReporter):
108  def __init__(self, chartjson):
109    super().__init__()
110    self._chartjson = chartjson
111    self.trace_title_prefix = ''
112
113  def __call__(self, graph_title, trace_title, value, units):
114    super().__call__(graph_title, trace_title, value, units)
115
116    perf_tests_results_helper.ReportPerfResult(
117        self._chartjson, graph_title, self.trace_title_prefix + trace_title,
118        value, units)
119
120  def SynthesizeTotals(self, unique_method_count):
121    for tup, value in sorted(self._combined_metrics.items()):
122      graph_title, trace_title, units = tup
123      if trace_title == 'unique methods':
124        value = unique_method_count
125      perf_tests_results_helper.ReportPerfResult(self._chartjson, graph_title,
126                                                 'Combined_' + trace_title,
127                                                 value, units)
128
129
130def _PercentageDifference(a, b):
131  if a == 0:
132    return 0
133  return float(b - a) / a
134
135
136def _ReadZipInfoExtraFieldLength(zip_file, zip_info):
137  """Reads the value of |extraLength| from |zip_info|'s local file header.
138
139  |zip_info| has an |extra| field, but it's read from the central directory.
140  Android's zipalign tool sets the extra field only in local file headers.
141  """
142  # Refer to https://en.wikipedia.org/wiki/Zip_(file_format)#File_headers
143  zip_file.fp.seek(zip_info.header_offset + 28)
144  return struct.unpack('<H', zip_file.fp.read(2))[0]
145
146
147def _MeasureApkSignatureBlock(zip_file):
148  """Measures the size of the v2 / v3 signing block.
149
150  Refer to: https://source.android.com/security/apksigning/v2
151  """
152  # Seek to "end of central directory" struct.
153  eocd_offset_from_end = -22 - len(zip_file.comment)
154  zip_file.fp.seek(eocd_offset_from_end, os.SEEK_END)
155  assert zip_file.fp.read(4) == b'PK\005\006', (
156      'failed to find end-of-central-directory')
157
158  # Read out the "start of central directory" offset.
159  zip_file.fp.seek(eocd_offset_from_end + 16, os.SEEK_END)
160  start_of_central_directory = struct.unpack('<I', zip_file.fp.read(4))[0]
161
162  # Compute the offset after the last zip entry.
163  last_info = max(zip_file.infolist(), key=lambda i: i.header_offset)
164  last_header_size = (30 + len(last_info.filename) +
165                      _ReadZipInfoExtraFieldLength(zip_file, last_info))
166  end_of_last_file = (last_info.header_offset + last_header_size +
167                      last_info.compress_size)
168  return start_of_central_directory - end_of_last_file
169
170
171def _RunReadobj(so_path, options):
172  return cmd_helper.GetCmdOutput([_READOBJ_PATH, '--elf-output-style=GNU'] +
173                                 options + [so_path])
174
175
176def _ExtractLibSectionSizesFromApk(apk_path, lib_path):
177  with Unzip(apk_path, filename=lib_path) as extracted_lib_path:
178    grouped_section_sizes = collections.defaultdict(int)
179    no_bits_section_sizes, section_sizes = _CreateSectionNameSizeMap(
180        extracted_lib_path)
181    for group_name, section_names in _READELF_SIZES_METRICS.items():
182      for section_name in section_names:
183        if section_name in section_sizes:
184          grouped_section_sizes[group_name] += section_sizes.pop(section_name)
185
186    # Consider all NOBITS sections as .bss.
187    grouped_section_sizes['bss'] = sum(no_bits_section_sizes.values())
188
189    # Group any unknown section headers into the "other" group.
190    for section_header, section_size in section_sizes.items():
191      sys.stderr.write('Unknown elf section header: %s\n' % section_header)
192      grouped_section_sizes['other'] += section_size
193
194    return grouped_section_sizes
195
196
197def _CreateSectionNameSizeMap(so_path):
198  stdout = _RunReadobj(so_path, ['-S', '--wide'])
199  section_sizes = {}
200  no_bits_section_sizes = {}
201  # Matches  [ 2] .hash HASH 00000000006681f0 0001f0 003154 04   A  3   0  8
202  for match in re.finditer(r'\[[\s\d]+\] (\..*)$', stdout, re.MULTILINE):
203    items = match.group(1).split()
204    target = no_bits_section_sizes if items[1] == 'NOBITS' else section_sizes
205    target[items[0]] = int(items[4], 16)
206
207  return no_bits_section_sizes, section_sizes
208
209
210def _ParseManifestAttributes(apk_path):
211  # Check if the manifest specifies whether or not to extract native libs.
212  output = cmd_helper.GetCmdOutput([
213      _AAPT_PATH.read(), 'd', 'xmltree', apk_path, 'AndroidManifest.xml'])
214
215  def parse_attr(namespace, name):
216    # android:extractNativeLibs(0x010104ea)=(type 0x12)0x0
217    # android:extractNativeLibs(0x010104ea)=(type 0x12)0xffffffff
218    # dist:onDemand=(type 0x12)0xffffffff
219    m = re.search(
220        f'(?:{namespace}:)?{name}' + r'(?:\(.*?\))?=\(type .*?\)(\w+)', output)
221    return m and int(m.group(1), 16)
222
223  skip_extract_lib = bool(parse_attr('android', 'extractNativeLibs'))
224  sdk_version = parse_attr('android', 'minSdkVersion')
225  is_feature_split = parse_attr('android', 'isFeatureSplit')
226  # Can use <dist:on-demand>, or <module dist:onDemand="true">.
227  on_demand = parse_attr('dist', 'onDemand') or 'on-demand' in output
228  on_demand = bool(on_demand and is_feature_split)
229
230  return sdk_version, skip_extract_lib, on_demand
231
232
233def _NormalizeLanguagePaks(translations, factor):
234  english_pak = translations.FindByPattern(r'.*/en[-_][Uu][Ss]\.l?pak')
235  num_translations = translations.GetNumEntries()
236  ret = 0
237  if english_pak:
238    ret -= translations.ComputeZippedSize()
239    ret += int(english_pak.compress_size * num_translations * factor)
240  return ret
241
242
243def _NormalizeResourcesArsc(apk_path, num_arsc_files, num_translations,
244                            out_dir):
245  """Estimates the expected overhead of untranslated strings in resources.arsc.
246
247  See http://crbug.com/677966 for why this is necessary.
248  """
249  # If there are multiple .arsc files, use the resource packaged APK instead.
250  if num_arsc_files > 1:
251    if not out_dir:
252      return -float('inf')
253    ap_name = os.path.basename(apk_path).replace('.apk', '.ap_')
254    ap_path = os.path.join(out_dir, 'arsc/apks', ap_name)
255    if not os.path.exists(ap_path):
256      raise Exception('Missing expected file: %s, try rebuilding.' % ap_path)
257    apk_path = ap_path
258
259  aapt_output = _RunAaptDumpResources(apk_path)
260  # en-rUS is in the default config and may be cluttered with non-translatable
261  # strings, so en-rGB is a better baseline for finding missing translations.
262  en_strings = _CreateResourceIdValueMap(aapt_output, 'en-rGB')
263  fr_strings = _CreateResourceIdValueMap(aapt_output, 'fr')
264
265  # en-US and en-GB will never be translated.
266  config_count = num_translations - 2
267
268  size = 0
269  for res_id, string_val in en_strings.items():
270    if string_val == fr_strings[res_id]:
271      string_size = len(string_val)
272      # 7 bytes is the per-entry overhead (not specific to any string). See
273      # https://android.googlesource.com/platform/frameworks/base.git/+/android-4.2.2_r1/tools/aapt/StringPool.cpp#414.
274      # The 1.5 factor was determined experimentally and is meant to account for
275      # other languages generally having longer strings than english.
276      size += config_count * (7 + string_size * 1.5)
277
278  return int(size)
279
280
281def _CreateResourceIdValueMap(aapt_output, lang):
282  """Return a map of resource ids to string values for the given |lang|."""
283  config_re = _AAPT_CONFIG_PATTERN % lang
284  return {entry.group('id'): entry.group('val')
285          for config_section in re.finditer(config_re, aapt_output, re.DOTALL)
286          for entry in re.finditer(_AAPT_ENTRY_RE, config_section.group(0))}
287
288
289def _RunAaptDumpResources(apk_path):
290  cmd = [_AAPT_PATH.read(), 'dump', '--values', 'resources', apk_path]
291  status, output = cmd_helper.GetCmdStatusAndOutput(cmd)
292  if status != 0:
293    raise Exception('Failed running aapt command: "%s" with output "%s".' %
294                    (' '.join(cmd), output))
295  return output
296
297
298class _FileGroup:
299  """Represents a category that apk files can fall into."""
300
301  def __init__(self, name):
302    self.name = name
303    self._zip_infos = []
304    self._extracted_multipliers = []
305
306  def AddZipInfo(self, zip_info, extracted_multiplier=0):
307    self._zip_infos.append(zip_info)
308    self._extracted_multipliers.append(extracted_multiplier)
309
310  def AllEntries(self):
311    return iter(self._zip_infos)
312
313  def GetNumEntries(self):
314    return len(self._zip_infos)
315
316  def FindByPattern(self, pattern):
317    return next((i for i in self._zip_infos if re.match(pattern, i.filename)),
318                None)
319
320  def FindLargest(self):
321    if not self._zip_infos:
322      return None
323    return max(self._zip_infos, key=lambda i: i.file_size)
324
325  def ComputeZippedSize(self):
326    return sum(i.compress_size for i in self._zip_infos)
327
328  def ComputeUncompressedSize(self):
329    return sum(i.file_size for i in self._zip_infos)
330
331  def ComputeExtractedSize(self):
332    ret = 0
333    for zi, multiplier in zip(self._zip_infos, self._extracted_multipliers):
334      ret += zi.file_size * multiplier
335    return ret
336
337  def ComputeInstallSize(self):
338    return self.ComputeExtractedSize() + self.ComputeZippedSize()
339
340
341def _AnalyzeInternal(apk_path,
342                     sdk_version,
343                     report_func,
344                     dex_stats_collector,
345                     out_dir,
346                     apks_path=None,
347                     split_name=None):
348  """Analyse APK to determine size contributions of different file classes.
349
350  Returns: Normalized APK size.
351  """
352  dex_stats_collector.CollectFromZip(split_name or '', apk_path)
353  file_groups = []
354
355  def make_group(name):
356    group = _FileGroup(name)
357    file_groups.append(group)
358    return group
359
360  def has_no_extension(filename):
361    return os.path.splitext(filename)[1] == ''
362
363  native_code = make_group('Native code')
364  java_code = make_group('Java code')
365  native_resources_no_translations = make_group('Native resources (no l10n)')
366  translations = make_group('Native resources (l10n)')
367  stored_translations = make_group('Native resources stored (l10n)')
368  icu_data = make_group('ICU (i18n library) data')
369  v8_snapshots = make_group('V8 Snapshots')
370  png_drawables = make_group('PNG drawables')
371  res_directory = make_group('Non-compiled Android resources')
372  arsc = make_group('Compiled Android resources')
373  metadata = make_group('Package metadata')
374  unknown = make_group('Unknown files')
375  notices = make_group('licenses.notice file')
376  unwind_cfi = make_group('unwind_cfi (dev and canary only)')
377
378  with zipfile.ZipFile(apk_path, 'r') as apk:
379    apk_contents = apk.infolist()
380    # Account for zipalign overhead that exists in local file header.
381    zipalign_overhead = sum(
382        _ReadZipInfoExtraFieldLength(apk, i) for i in apk_contents)
383    # Account for zipalign overhead that exists in central directory header.
384    # Happens when python aligns entries in apkbuilder.py, but does not
385    # exist when using Android's zipalign. E.g. for bundle .apks files.
386    zipalign_overhead += sum(len(i.extra) for i in apk_contents)
387    signing_block_size = _MeasureApkSignatureBlock(apk)
388
389  _, skip_extract_lib, _ = _ParseManifestAttributes(apk_path)
390
391  # Pre-L: Dalvik - .odex file is simply decompressed/optimized dex file (~1x).
392  # L, M: ART - .odex file is compiled version of the dex file (~4x).
393  # N: ART - Uses Dalvik-like JIT for normal apps (~1x), full compilation for
394  #    shared apps (~4x).
395  # Actual multipliers calculated using "apk_operations.py disk-usage".
396  # Will need to update multipliers once apk obfuscation is enabled.
397  # E.g. with obfuscation, the 4.04 changes to 4.46.
398  speed_profile_dex_multiplier = 1.17
399  orig_filename = apks_path or apk_path
400  is_webview = 'WebView' in orig_filename
401  is_monochrome = 'Monochrome' in orig_filename
402  is_library = 'Library' in orig_filename
403  is_trichrome = 'TrichromeChrome' in orig_filename
404  # WebView is always a shared APK since other apps load it.
405  # Library is always shared since it's used by chrome and webview
406  # Chrome is always shared since renderers can't access dex otherwise
407  # (see DexFixer).
408  is_shared_apk = sdk_version >= 24 and (is_monochrome or is_webview
409                                         or is_library or is_trichrome)
410  # Dex decompression overhead varies by Android version.
411  if sdk_version < 21:
412    # JellyBean & KitKat
413    dex_multiplier = 1.16
414  elif sdk_version < 24:
415    # Lollipop & Marshmallow
416    dex_multiplier = 4.04
417  elif is_shared_apk:
418    # Oreo and above, compilation_filter=speed
419    dex_multiplier = 4.04
420  else:
421    # Oreo and above, compilation_filter=speed-profile
422    dex_multiplier = speed_profile_dex_multiplier
423
424  total_apk_size = os.path.getsize(apk_path)
425  for member in apk_contents:
426    filename = member.filename
427    if filename.endswith('/'):
428      continue
429    if filename.endswith('.so'):
430      basename = posixpath.basename(filename)
431      should_extract_lib = not skip_extract_lib and basename.startswith('lib')
432      native_code.AddZipInfo(
433          member, extracted_multiplier=int(should_extract_lib))
434    elif filename.startswith('classes') and filename.endswith('.dex'):
435      # Android P+, uncompressed dex does not need to be extracted.
436      compressed = member.compress_type != zipfile.ZIP_STORED
437      multiplier = dex_multiplier
438      if not compressed and sdk_version >= 28:
439        multiplier -= 1
440
441      java_code.AddZipInfo(member, extracted_multiplier=multiplier)
442    elif re.search(_RE_NON_LANGUAGE_PAK, filename):
443      native_resources_no_translations.AddZipInfo(member)
444    elif filename.endswith('.pak') or filename.endswith('.lpak'):
445      compressed = member.compress_type != zipfile.ZIP_STORED
446      bucket = translations if compressed else stored_translations
447      extracted_multiplier = 0
448      if compressed:
449        extracted_multiplier = int('en_' in filename or 'en-' in filename)
450      bucket.AddZipInfo(member, extracted_multiplier=extracted_multiplier)
451    elif 'icu' in filename and filename.endswith('.dat'):
452      icu_data.AddZipInfo(member)
453    elif filename.endswith('.bin'):
454      v8_snapshots.AddZipInfo(member)
455    elif filename.startswith('res/'):
456      if (filename.endswith('.png') or filename.endswith('.webp')
457          or has_no_extension(filename)):
458        png_drawables.AddZipInfo(member)
459      else:
460        res_directory.AddZipInfo(member)
461    elif filename.endswith('.arsc'):
462      arsc.AddZipInfo(member)
463    elif filename.startswith('META-INF') or filename in (
464        'AndroidManifest.xml', 'assets/webapk_dex_version.txt',
465        'stamp-cert-sha256'):
466      metadata.AddZipInfo(member)
467    elif filename.endswith('.notice'):
468      notices.AddZipInfo(member)
469    elif filename.startswith('assets/unwind_cfi'):
470      unwind_cfi.AddZipInfo(member)
471    else:
472      unknown.AddZipInfo(member)
473
474  if apks_path:
475    # We're mostly focused on size of Chrome for non-English locales, so assume
476    # Hindi (arbitrarily chosen) locale split is installed.
477    with zipfile.ZipFile(apks_path) as z:
478      subpath = 'splits/{}-hi.apk'.format(split_name)
479      if subpath in z.namelist():
480        hindi_apk_info = z.getinfo(subpath)
481        total_apk_size += hindi_apk_info.file_size
482      else:
483        assert split_name != 'base', 'splits/base-hi.apk should always exist'
484
485  total_install_size = total_apk_size
486  total_install_size_android_go = total_apk_size
487  zip_overhead = total_apk_size
488
489  for group in file_groups:
490    actual_size = group.ComputeZippedSize()
491    install_size = group.ComputeInstallSize()
492    uncompressed_size = group.ComputeUncompressedSize()
493    extracted_size = group.ComputeExtractedSize()
494    total_install_size += extracted_size
495    zip_overhead -= actual_size
496
497    report_func('Breakdown', group.name + ' size', actual_size, 'bytes')
498    report_func('InstallBreakdown', group.name + ' size', int(install_size),
499                'bytes')
500    # Only a few metrics are compressed in the first place.
501    # To avoid over-reporting, track uncompressed size only for compressed
502    # entries.
503    if uncompressed_size != actual_size:
504      report_func('Uncompressed', group.name + ' size', uncompressed_size,
505                  'bytes')
506
507    if group is java_code:
508      # Updates are compiled using quicken, but system image uses speed-profile.
509      multiplier = speed_profile_dex_multiplier
510
511      # Android P+, uncompressed dex does not need to be extracted.
512      compressed = uncompressed_size != actual_size
513      if not compressed and sdk_version >= 28:
514        multiplier -= 1
515      extracted_size = int(uncompressed_size * multiplier)
516      total_install_size_android_go += extracted_size
517      report_func('InstallBreakdownGo', group.name + ' size',
518                  actual_size + extracted_size, 'bytes')
519    elif group is translations and apks_path:
520      # Assume Hindi rather than English (accounted for above in total_apk_size)
521      total_install_size_android_go += actual_size
522    else:
523      total_install_size_android_go += extracted_size
524
525  # Per-file zip overhead is caused by:
526  # * 30 byte entry header + len(file name)
527  # * 46 byte central directory entry + len(file name)
528  # * 0-3 bytes for zipalign.
529  report_func('Breakdown', 'Zip Overhead', zip_overhead, 'bytes')
530  report_func('InstallSize', 'APK size', total_apk_size, 'bytes')
531  report_func('InstallSize', 'Estimated installed size',
532              int(total_install_size), 'bytes')
533  report_func('InstallSize', 'Estimated installed size (Android Go)',
534              int(total_install_size_android_go), 'bytes')
535  transfer_size = _CalculateCompressedSize(apk_path)
536  report_func('TransferSize', 'Transfer size (deflate)', transfer_size, 'bytes')
537
538  # Size of main dex vs remaining.
539  main_dex_info = java_code.FindByPattern('classes.dex')
540  if main_dex_info:
541    main_dex_size = main_dex_info.file_size
542    report_func('Specifics', 'main dex size', main_dex_size, 'bytes')
543    secondary_size = java_code.ComputeUncompressedSize() - main_dex_size
544    report_func('Specifics', 'secondary dex size', secondary_size, 'bytes')
545
546  main_lib_info = native_code.FindLargest()
547  native_code_unaligned_size = 0
548  for lib_info in native_code.AllEntries():
549    # Skip placeholders.
550    if lib_info.file_size == 0:
551      continue
552    section_sizes = _ExtractLibSectionSizesFromApk(apk_path, lib_info.filename)
553    native_code_unaligned_size += sum(v for k, v in section_sizes.items()
554                                      if k != 'bss')
555    # Size of main .so vs remaining.
556    if lib_info == main_lib_info:
557      main_lib_size = lib_info.file_size
558      report_func('Specifics', 'main lib size', main_lib_size, 'bytes')
559      secondary_size = native_code.ComputeUncompressedSize() - main_lib_size
560      report_func('Specifics', 'other lib size', secondary_size, 'bytes')
561
562      for metric_name, size in section_sizes.items():
563        report_func('MainLibInfo', metric_name, size, 'bytes')
564
565  # Main metric that we want to monitor for jumps.
566  normalized_apk_size = total_apk_size
567  # unwind_cfi exists only in dev, canary, and non-channel builds.
568  normalized_apk_size -= unwind_cfi.ComputeZippedSize()
569  # Sections within .so files get 4kb aligned, so use section sizes rather than
570  # file size. Also gets rid of compression.
571  normalized_apk_size -= native_code.ComputeZippedSize()
572  normalized_apk_size += native_code_unaligned_size
573  # Normalized dex size: Size within the zip + size on disk for Android Go
574  # devices running Android O (which ~= uncompressed dex size).
575  # Use a constant compression factor to account for fluctuations.
576  normalized_apk_size -= java_code.ComputeZippedSize()
577  normalized_apk_size += java_code.ComputeUncompressedSize()
578  # Don't include zipalign overhead in normalized size, since it effectively
579  # causes size changes files that proceed aligned files to be rounded.
580  # For APKs where classes.dex directly proceeds libchrome.so (the normal case),
581  # this causes small dex size changes to disappear into libchrome.so alignment.
582  normalized_apk_size -= zipalign_overhead
583  # Don't include the size of the apk's signing block because it can fluctuate
584  # by up to 4kb (from my non-scientific observations), presumably based on hash
585  # sizes.
586  normalized_apk_size -= signing_block_size
587
588  # Unaligned size should be ~= uncompressed size or something is wrong.
589  # As of now, padding_fraction ~= .007
590  padding_fraction = -_PercentageDifference(
591      native_code.ComputeUncompressedSize(), native_code_unaligned_size)
592  # Ignore this check for small / no native code
593  if native_code.ComputeUncompressedSize() > 1000000:
594    assert 0 <= padding_fraction < .02, (
595        'Padding was: {} (file_size={}, sections_sum={})'.format(
596            padding_fraction, native_code.ComputeUncompressedSize(),
597            native_code_unaligned_size))
598
599  if apks_path:
600    # Locale normalization not needed when measuring only one locale.
601    # E.g. a change that adds 300 chars of unstranslated strings would cause the
602    # metric to be off by only 390 bytes (assuming a multiplier of 2.3 for
603    # Hindi).
604    pass
605  else:
606    # Avoid noise caused when strings change and translations haven't yet been
607    # updated.
608    num_translations = translations.GetNumEntries()
609    num_stored_translations = stored_translations.GetNumEntries()
610
611    if num_translations > 1:
612      # Multipliers found by looking at MonochromePublic.apk and seeing how much
613      # smaller en-US.pak is relative to the average locale.pak.
614      normalized_apk_size += _NormalizeLanguagePaks(translations, 1.17)
615    if num_stored_translations > 1:
616      normalized_apk_size += _NormalizeLanguagePaks(stored_translations, 1.43)
617    if num_translations + num_stored_translations > 1:
618      if num_translations == 0:
619        # WebView stores all locale paks uncompressed.
620        num_arsc_translations = num_stored_translations
621      else:
622        # Monochrome has more configurations than Chrome since it includes
623        # WebView (which supports more locales), but these should mostly be
624        # empty so ignore them here.
625        num_arsc_translations = num_translations
626      normalized_apk_size += _NormalizeResourcesArsc(apk_path,
627                                                     arsc.GetNumEntries(),
628                                                     num_arsc_translations,
629                                                     out_dir)
630
631  # It will be -Inf for .apk files with multiple .arsc files and no out_dir set.
632  if normalized_apk_size < 0:
633    sys.stderr.write('Skipping normalized_apk_size (no output directory set)\n')
634  else:
635    report_func('Specifics', 'normalized apk size', normalized_apk_size,
636                'bytes')
637  # The "file count" metric cannot be grouped with any other metrics when the
638  # end result is going to be uploaded to the perf dashboard in the HistogramSet
639  # format due to mixed units (bytes vs. zip entries) causing malformed
640  # summaries to be generated.
641  # TODO(https://crbug.com/903970): Remove this workaround if unit mixing is
642  # ever supported.
643  report_func('FileCount', 'file count', len(apk_contents), 'zip entries')
644
645  for info in unknown.AllEntries():
646    sys.stderr.write(
647        'Unknown entry: %s %d\n' % (info.filename, info.compress_size))
648  return normalized_apk_size
649
650
651def _CalculateCompressedSize(file_path):
652  CHUNK_SIZE = 256 * 1024
653  compressor = zlib.compressobj()
654  total_size = 0
655  with open(file_path, 'rb') as f:
656    for chunk in iter(lambda: f.read(CHUNK_SIZE), b''):
657      total_size += len(compressor.compress(chunk))
658  total_size += len(compressor.flush())
659  return total_size
660
661
662@contextmanager
663def Unzip(zip_file, filename=None):
664  """Utility for temporary use of a single file in a zip archive."""
665  with build_utils.TempDir() as unzipped_dir:
666    unzipped_files = build_utils.ExtractAll(
667        zip_file, unzipped_dir, True, pattern=filename)
668    if len(unzipped_files) == 0:
669      raise Exception(
670          '%s not found in %s' % (filename, zip_file))
671    yield unzipped_files[0]
672
673
674def _ConfigOutDir(out_dir):
675  if out_dir:
676    constants.SetOutputDirectory(out_dir)
677  else:
678    try:
679      # Triggers auto-detection when CWD == output directory.
680      constants.CheckOutputDirectory()
681      out_dir = constants.GetOutDirectory()
682    except Exception:  # pylint: disable=broad-except
683      pass
684  return out_dir
685
686
687def _IterSplits(namelist):
688  for subpath in namelist:
689    # Looks for paths like splits/vr-master.apk, splits/vr-hi.apk.
690    name_parts = subpath.split('/')
691    if name_parts[0] == 'splits' and len(name_parts) == 2:
692      name_parts = name_parts[1].split('-')
693      if len(name_parts) == 2:
694        split_name, config_name = name_parts
695        if config_name == 'master.apk':
696          yield subpath, split_name
697
698
699def _ExtractToTempFile(zip_obj, subpath, temp_file):
700  temp_file.seek(0)
701  temp_file.truncate()
702  temp_file.write(zip_obj.read(subpath))
703  temp_file.flush()
704
705
706def _AnalyzeApkOrApks(report_func, apk_path, out_dir):
707  # Create DexStatsCollector here to track unique methods across base & chrome
708  # modules.
709  dex_stats_collector = method_count.DexStatsCollector()
710
711  if apk_path.endswith('.apk'):
712    sdk_version, _, _ = _ParseManifestAttributes(apk_path)
713    _AnalyzeInternal(apk_path, sdk_version, report_func, dex_stats_collector,
714                     out_dir)
715  elif apk_path.endswith('.apks'):
716    with tempfile.NamedTemporaryFile(suffix='.apk') as f:
717      with zipfile.ZipFile(apk_path) as z:
718        # Currently bundletool is creating two apks when .apks is created
719        # without specifying an sdkVersion. Always measure the one with an
720        # uncompressed shared library.
721        try:
722          info = z.getinfo('splits/base-master_2.apk')
723        except KeyError:
724          info = z.getinfo('splits/base-master.apk')
725        _ExtractToTempFile(z, info.filename, f)
726        sdk_version, _, _ = _ParseManifestAttributes(f.name)
727
728        orig_report_func = report_func
729        report_func = _AccumulatingReporter()
730
731        def do_measure(split_name, on_demand):
732          logging.info('Measuring %s on_demand=%s', split_name, on_demand)
733          # Use no-op reporting functions to get normalized size for DFMs.
734          inner_report_func = report_func
735          inner_dex_stats_collector = dex_stats_collector
736          if on_demand:
737            inner_report_func = lambda *_: None
738            inner_dex_stats_collector = method_count.DexStatsCollector()
739
740          size = _AnalyzeInternal(f.name,
741                                  sdk_version,
742                                  inner_report_func,
743                                  inner_dex_stats_collector,
744                                  out_dir,
745                                  apks_path=apk_path,
746                                  split_name=split_name)
747          report_func('DFM_' + split_name, 'Size with hindi', size, 'bytes')
748
749        # Measure base outside of the loop since we've already extracted it.
750        do_measure('base', on_demand=False)
751
752        for subpath, split_name in _IterSplits(z.namelist()):
753          if split_name != 'base':
754            _ExtractToTempFile(z, subpath, f)
755            _, _, on_demand = _ParseManifestAttributes(f.name)
756            do_measure(split_name, on_demand=on_demand)
757
758        report_func.DumpReports(orig_report_func)
759        report_func = orig_report_func
760  else:
761    raise Exception('Unknown file type: ' + apk_path)
762
763  # Report dex stats outside of _AnalyzeInternal() so that the "unique methods"
764  # metric is not just the sum of the base and chrome modules.
765  for metric, count in dex_stats_collector.GetTotalCounts().items():
766    report_func('Dex', metric, count, 'entries')
767  report_func('Dex', 'unique methods',
768              dex_stats_collector.GetUniqueMethodCount(), 'entries')
769  report_func('DexCache', 'DexCache',
770              dex_stats_collector.GetDexCacheSize(pre_oreo=sdk_version < 26),
771              'bytes')
772
773  return dex_stats_collector
774
775
776def _ResourceSizes(args):
777  chartjson = _BASE_CHART.copy() if args.output_format else None
778  reporter = _ChartJsonReporter(chartjson)
779  # Create DexStatsCollector here to track unique methods across trichrome APKs.
780  dex_stats_collector = method_count.DexStatsCollector()
781
782  specs = [
783      ('Chrome_', args.trichrome_chrome),
784      ('WebView_', args.trichrome_webview),
785      ('Library_', args.trichrome_library),
786  ]
787  for prefix, path in specs:
788    if path:
789      reporter.trace_title_prefix = prefix
790      child_dex_stats_collector = _AnalyzeApkOrApks(reporter, path,
791                                                    args.out_dir)
792      dex_stats_collector.MergeFrom(prefix, child_dex_stats_collector)
793
794  if any(path for _, path in specs):
795    reporter.SynthesizeTotals(dex_stats_collector.GetUniqueMethodCount())
796  else:
797    _AnalyzeApkOrApks(reporter, args.input, args.out_dir)
798
799  if chartjson:
800    _DumpChartJson(args, chartjson)
801
802
803def _DumpChartJson(args, chartjson):
804  if args.output_file == '-':
805    json_file = sys.stdout
806  elif args.output_file:
807    json_file = open(args.output_file, 'w')
808  else:
809    results_path = os.path.join(args.output_dir, 'results-chart.json')
810    logging.critical('Dumping chartjson to %s', results_path)
811    json_file = open(results_path, 'w')
812
813  json.dump(chartjson, json_file, indent=2)
814
815  if json_file is not sys.stdout:
816    json_file.close()
817
818  # We would ideally generate a histogram set directly instead of generating
819  # chartjson then converting. However, perf_tests_results_helper is in
820  # //build, which doesn't seem to have any precedent for depending on
821  # anything in Catapult. This can probably be fixed, but since this doesn't
822  # need to be super fast or anything, converting is a good enough solution
823  # for the time being.
824  if args.output_format == 'histograms':
825    histogram_result = convert_chart_json.ConvertChartJson(results_path)
826    if histogram_result.returncode != 0:
827      raise Exception('chartjson conversion failed with error: ' +
828                      histogram_result.stdout)
829
830    histogram_path = os.path.join(args.output_dir, 'perf_results.json')
831    logging.critical('Dumping histograms to %s', histogram_path)
832    with open(histogram_path, 'wb') as json_file:
833      json_file.write(histogram_result.stdout)
834
835
836def main():
837  build_utils.InitLogging('RESOURCE_SIZES_DEBUG')
838  argparser = argparse.ArgumentParser(description='Print APK size metrics.')
839  argparser.add_argument(
840      '--min-pak-resource-size',
841      type=int,
842      default=20 * 1024,
843      help='Minimum byte size of displayed pak resources.')
844  argparser.add_argument(
845      '--chromium-output-directory',
846      dest='out_dir',
847      type=os.path.realpath,
848      help='Location of the build artifacts.')
849  argparser.add_argument(
850      '--chartjson',
851      action='store_true',
852      help='DEPRECATED. Use --output-format=chartjson '
853      'instead.')
854  argparser.add_argument(
855      '--output-format',
856      choices=['chartjson', 'histograms'],
857      help='Output the results to a file in the given '
858      'format instead of printing the results.')
859  argparser.add_argument('--loadable_module', help='Obsolete (ignored).')
860
861  # Accepted to conform to the isolated script interface, but ignored.
862  argparser.add_argument(
863      '--isolated-script-test-filter', help=argparse.SUPPRESS)
864  argparser.add_argument(
865      '--isolated-script-test-perf-output',
866      type=os.path.realpath,
867      help=argparse.SUPPRESS)
868
869  output_group = argparser.add_mutually_exclusive_group()
870
871  output_group.add_argument(
872      '--output-dir', default='.', help='Directory to save chartjson to.')
873  output_group.add_argument(
874      '--output-file',
875      help='Path to output .json (replaces --output-dir). Works only for '
876      '--output-format=chartjson')
877  output_group.add_argument(
878      '--isolated-script-test-output',
879      type=os.path.realpath,
880      help='File to which results will be written in the '
881      'simplified JSON output format.')
882
883  argparser.add_argument('input', help='Path to .apk or .apks file to measure.')
884  trichrome_group = argparser.add_argument_group(
885      'Trichrome inputs',
886      description='When specified, |input| is used only as Test suite name.')
887  trichrome_group.add_argument(
888      '--trichrome-chrome', help='Path to Trichrome Chrome .apks')
889  trichrome_group.add_argument(
890      '--trichrome-webview', help='Path to Trichrome WebView .apk(s)')
891  trichrome_group.add_argument(
892      '--trichrome-library', help='Path to Trichrome Library .apk')
893  args = argparser.parse_args()
894
895  args.out_dir = _ConfigOutDir(args.out_dir)
896  devil_chromium.Initialize(output_directory=args.out_dir)
897
898  # TODO(bsheedy): Remove this once uses of --chartjson have been removed.
899  if args.chartjson:
900    args.output_format = 'chartjson'
901
902  result_sink_client = result_sink.TryInitClient()
903  isolated_script_output = {'valid': False, 'failures': []}
904
905  test_name = 'resource_sizes (%s)' % os.path.basename(args.input)
906
907  if args.isolated_script_test_output:
908    args.output_dir = os.path.join(
909        os.path.dirname(args.isolated_script_test_output), test_name)
910    if not os.path.exists(args.output_dir):
911      os.makedirs(args.output_dir)
912
913  try:
914    _ResourceSizes(args)
915    isolated_script_output = {
916        'valid': True,
917        'failures': [],
918    }
919  finally:
920    if args.isolated_script_test_output:
921      results_path = os.path.join(args.output_dir, 'test_results.json')
922      with open(results_path, 'w') as output_file:
923        json.dump(isolated_script_output, output_file)
924      with open(args.isolated_script_test_output, 'w') as output_file:
925        json.dump(isolated_script_output, output_file)
926    if result_sink_client:
927      status = result_types.PASS
928      if not isolated_script_output['valid']:
929        status = result_types.UNKNOWN
930      elif isolated_script_output['failures']:
931        status = result_types.FAIL
932      result_sink_client.Post(test_name, status, None, None, None)
933
934
935if __name__ == '__main__':
936  main()
937