testSharding.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. #!/usr/bin/env python3
  2. # Copyright Catch2 Authors
  3. # Distributed under the Boost Software License, Version 1.0.
  4. # (See accompanying file LICENSE_1_0.txt or copy at
  5. # https://www.boost.org/LICENSE_1_0.txt)
  6. # SPDX-License-Identifier: BSL-1.0
  7. """
  8. This test script verifies that sharding tests does change which tests are run.
  9. This is done by running the binary multiple times, once to list all the tests,
  10. once per shard to list the tests for that shard, and once again per shard to
  11. execute the tests. The sharded lists are compared to the full list to ensure
  12. none are skipped, duplicated, and that the order remains the same.
  13. """
  14. import random
  15. import subprocess
  16. import sys
  17. import xml.etree.ElementTree as ET
  18. from collections import namedtuple
  19. from typing import List, Dict
  20. seed = random.randint(0, 2 ** 32 - 1)
  21. number_of_shards = 5
  22. def make_base_commandline(self_test_exe):
  23. return [
  24. self_test_exe,
  25. '--reporter', 'xml',
  26. '--order', 'rand',
  27. '--rng-seed', str(seed),
  28. "[generators]~[benchmarks]~[.]"
  29. ]
  30. def list_tests(self_test_exe: str, extra_args: List[str] = None):
  31. cmd = make_base_commandline(self_test_exe) + ['--list-tests']
  32. if extra_args:
  33. cmd.extend(extra_args)
  34. try:
  35. ret = subprocess.run(cmd,
  36. stdout = subprocess.PIPE,
  37. stderr = subprocess.PIPE,
  38. timeout = 10,
  39. check = True,
  40. universal_newlines = True)
  41. except subprocess.CalledProcessError as ex:
  42. print('Could not list tests:\n{}'.format(ex.stderr))
  43. if ret.stderr:
  44. raise RuntimeError("Unexpected error output:\n" + ret.stderr)
  45. root = ET.fromstring(ret.stdout)
  46. result = [elem.text for elem in root.findall('./TestCase/Name')]
  47. if len(result) < 2:
  48. raise RuntimeError("Unexpectedly few tests listed (got {})".format(
  49. len(result)))
  50. return result
  51. def execute_tests(self_test_exe: str, extra_args: List[str] = None):
  52. cmd = make_base_commandline(self_test_exe)
  53. if extra_args:
  54. cmd.extend(extra_args)
  55. try:
  56. ret = subprocess.run(cmd,
  57. stdout = subprocess.PIPE,
  58. stderr = subprocess.PIPE,
  59. timeout = 10,
  60. check = True,
  61. universal_newlines = True)
  62. except subprocess.CalledProcessError as ex:
  63. print('Could not list tests:\n{}'.format(ex.stderr))
  64. if ret.stderr:
  65. raise RuntimeError("Unexpected error output:\n" + process.stderr)
  66. root = ET.fromstring(ret.stdout)
  67. result = [elem.attrib["name"] for elem in root.findall('./TestCase')]
  68. if len(result) < 2:
  69. raise RuntimeError("Unexpectedly few tests listed (got {})".format(
  70. len(result)))
  71. return result
  72. def test_sharded_listing(self_test_exe: str) -> Dict[int, List[str]]:
  73. """
  74. Asks the test binary for list of all tests, and also for lists of
  75. tests from shards.
  76. The combination of shards is then checked whether it corresponds to
  77. the full list of all tests.
  78. Returns the dictionary of shard-index => listed tests for later use.
  79. """
  80. all_tests = list_tests(self_test_exe)
  81. big_shard_tests = list_tests(self_test_exe, ['--shard-count', '1', '--shard-index', '0'])
  82. assert all_tests == big_shard_tests, (
  83. "No-sharding test list does not match the listing of big shard:\nNo shard:\n{}\n\nWith shard:\n{}\n".format(
  84. '\n'.join(all_tests),
  85. '\n'.join(big_shard_tests)
  86. )
  87. )
  88. shard_listings = dict()
  89. for shard_idx in range(number_of_shards):
  90. shard_listings[shard_idx] = list_tests(self_test_exe, ['--shard-count', str(number_of_shards), '--shard-index', str(shard_idx)])
  91. shard_sizes = [len(v) for v in shard_listings.values()]
  92. assert len(all_tests) == sum(shard_sizes)
  93. # Check that the shards have roughly the right sizes (e.g. we don't
  94. # have all tests in single shard and the others are empty)
  95. differences = [abs(x1 - x2) for x1, x2 in zip(shard_sizes, shard_sizes[1:])]
  96. assert all(diff <= 1 for diff in differences), "A shard has weird size: {}".format(shard_sizes)
  97. combined_shards = [inner for outer in shard_listings.values() for inner in outer]
  98. assert all_tests == combined_shards, (
  99. "All tests and combined shards disagree.\nNo shard:\n{}\n\nCombined:\n{}\n\n".format(
  100. '\n'.join(all_tests),
  101. '\n'.join(combined_shards)
  102. )
  103. )
  104. shard_listings[-1] = all_tests
  105. return shard_listings
  106. def test_sharded_execution(self_test_exe: str, listings: Dict[int, List[str]]):
  107. """
  108. Runs the test binary and checks that the executed tests match the
  109. previously listed tests.
  110. Also does this for various shard indices, and that the combination
  111. of all shards matches the full run/listing.
  112. """
  113. all_tests = execute_tests(self_test_exe)
  114. big_shard_tests = execute_tests(self_test_exe, ['--shard-count', '1', '--shard-index', '0'])
  115. assert all_tests == big_shard_tests
  116. assert listings[-1] == all_tests
  117. for shard_idx in range(number_of_shards):
  118. assert listings[shard_idx] == execute_tests(self_test_exe, ['--shard-count', str(number_of_shards), '--shard-index', str(shard_idx)])
  119. def main():
  120. self_test_exe, = sys.argv[1:]
  121. listings = test_sharded_listing(self_test_exe)
  122. test_sharded_execution(self_test_exe, listings)
  123. if __name__ == '__main__':
  124. sys.exit(main())