Reapply "Update Benchmark (#83488)" (#83916)
This reverts commit aec6a04b8e99b42eca431fc0b56947937d3a14c2.
(google/benchmark still at hash 1576991177ba97a4b2ff6c45950f1fa6e9aa678c as it was in #83488. Also reapplied same extra local diffs)
Verified locally.
diff --git a/third-party/benchmark/.pre-commit-config.yaml b/third-party/benchmark/.pre-commit-config.yaml
new file mode 100644
index 0000000..0247d1b
--- /dev/null
+++ b/third-party/benchmark/.pre-commit-config.yaml
@@ -0,0 +1,18 @@
+repos:
+ - repo: https://github.com/keith/pre-commit-buildifier
+ rev: 6.4.0
+ hooks:
+ - id: buildifier
+ - id: buildifier-lint
+ - repo: https://github.com/pre-commit/mirrors-mypy
+ rev: v1.8.0
+ hooks:
+ - id: mypy
+ types_or: [ python, pyi ]
+ args: [ "--ignore-missing-imports", "--scripts-are-modules" ]
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.1.13
+ hooks:
+ - id: ruff
+ args: [ --fix, --exit-non-zero-on-fix ]
+ - id: ruff-format
\ No newline at end of file
diff --git a/third-party/benchmark/.ycm_extra_conf.py b/third-party/benchmark/.ycm_extra_conf.py
index 1482c7b..caf257f 100644
--- a/third-party/benchmark/.ycm_extra_conf.py
+++ b/third-party/benchmark/.ycm_extra_conf.py
@@ -1,4 +1,5 @@
import os
+
import ycm_core
# These are the compilation flags that will be used in case there's no
@@ -91,7 +92,9 @@
for extension in SOURCE_EXTENSIONS:
replacement_file = basename + extension
if os.path.exists(replacement_file):
- compilation_info = database.GetCompilationInfoForFile(replacement_file)
+ compilation_info = database.GetCompilationInfoForFile(
+ replacement_file
+ )
if compilation_info.compiler_flags_:
return compilation_info
return None
@@ -107,7 +110,8 @@
return None
final_flags = MakeRelativePathsInFlagsAbsolute(
- compilation_info.compiler_flags_, compilation_info.compiler_working_dir_
+ compilation_info.compiler_flags_,
+ compilation_info.compiler_working_dir_,
)
else:
relative_to = DirectoryOfThisScript()
diff --git a/third-party/benchmark/AUTHORS b/third-party/benchmark/AUTHORS
index 54770f3..2170e46 100644
--- a/third-party/benchmark/AUTHORS
+++ b/third-party/benchmark/AUTHORS
@@ -13,6 +13,7 @@
Andriy Berestovskyy <[email protected]>
Arne Beer <[email protected]>
Carto
+Cezary Skrzyński <[email protected]>
Christian Wassermann <[email protected]>
Christopher Seymour <[email protected]>
Colin Braley <[email protected]>
@@ -27,10 +28,13 @@
Eric Fiselier <[email protected]>
Eugene Zhuk <[email protected]>
Evgeny Safronov <[email protected]>
+Fabien Pichot <[email protected]>
Federico Ficarelli <[email protected]>
Felix Homann <[email protected]>
+Gergely Meszaros <[email protected]>
Gergő Szitár <[email protected]>
Google Inc.
+Henrique Bucher <[email protected]>
International Business Machines Corporation
Ismael Jimenez Martinez <[email protected]>
Jern-Kuan Leong <[email protected]>
@@ -41,8 +45,11 @@
Kaito Udagawa <[email protected]>
Kishan Kumar <[email protected]>
Lei Xu <[email protected]>
+Marcel Jacobse <[email protected]>
Matt Clarkson <[email protected]>
Maxim Vafin <[email protected]>
+Mike Apodaca <[email protected]>
+Min-Yih Hsu <[email protected]>
MongoDB Inc.
Nick Hutchinson <[email protected]>
Norman Heino <[email protected]>
@@ -50,13 +57,16 @@
Ori Livneh <[email protected]>
Paul Redmond <[email protected]>
Radoslav Yovchev <[email protected]>
+Raghu Raja <[email protected]>
+Rainer Orth <[email protected]>
Roman Lebedev <[email protected]>
Sayan Bhattacharjee <[email protected]>
+Shapr3D <[email protected]>
Shuo Chen <[email protected]>
+Staffan Tjernstrom <[email protected]>
Steinar H. Gunderson <[email protected]>
Stripe, Inc.
Tobias Schmidt <[email protected]>
Yixuan Qiu <[email protected]>
Yusuke Suzuki <[email protected]>
Zbigniew Skowron <[email protected]>
-Min-Yih Hsu <[email protected]>
diff --git a/third-party/benchmark/CMakeLists.txt b/third-party/benchmark/CMakeLists.txt
index 8af4940..d9bcc6a 100644
--- a/third-party/benchmark/CMakeLists.txt
+++ b/third-party/benchmark/CMakeLists.txt
@@ -1,19 +1,7 @@
-cmake_minimum_required (VERSION 3.5.1)
+# Require CMake 3.10. If available, use the policies up to CMake 3.22.
+cmake_minimum_required (VERSION 3.10...3.22)
-foreach(p
- CMP0048 # OK to clear PROJECT_VERSION on project()
- CMP0054 # CMake 3.1
- CMP0056 # export EXE_LINKER_FLAGS to try_run
- CMP0057 # Support no if() IN_LIST operator
- CMP0063 # Honor visibility properties for all targets
- CMP0077 # Allow option() overrides in importing projects
- )
- if(POLICY ${p})
- cmake_policy(SET ${p} NEW)
- endif()
-endforeach()
-
-project (benchmark VERSION 1.6.0 LANGUAGES CXX)
+project (benchmark VERSION 1.8.3 LANGUAGES CXX)
option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON)
option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON)
@@ -26,11 +14,14 @@
# PGC++ maybe reporting false positives.
set(BENCHMARK_ENABLE_WERROR OFF)
endif()
+if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC")
+ set(BENCHMARK_ENABLE_WERROR OFF)
+endif()
if(BENCHMARK_FORCE_WERROR)
set(BENCHMARK_ENABLE_WERROR ON)
endif(BENCHMARK_FORCE_WERROR)
-if(NOT MSVC)
+if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF)
else()
set(BENCHMARK_BUILD_32_BITS OFF CACHE BOOL "Build a 32 bit version of the library - unsupported when using MSVC)" FORCE)
@@ -50,8 +41,11 @@
option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF)
-set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
-if(MSVC)
+# Export only public symbols
+set(CMAKE_CXX_VISIBILITY_PRESET hidden)
+set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and
# cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the
# undocumented, but working variable.
@@ -72,7 +66,7 @@
return()
endif()
endif()
- if (MSVC)
+ if (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")
return()
elseif(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
return()
@@ -111,22 +105,32 @@
# If no git version can be determined, use the version
# from the project() command
if ("${GIT_VERSION}" STREQUAL "0.0.0")
- set(VERSION "${benchmark_VERSION}")
+ set(VERSION "v${benchmark_VERSION}")
else()
set(VERSION "${GIT_VERSION}")
endif()
+
+# Normalize version: drop "v" prefix, replace first "-" with ".",
+# drop everything after second "-" (including said "-").
+string(STRIP ${VERSION} VERSION)
+if(VERSION MATCHES v[^-]*-)
+ string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" NORMALIZED_VERSION ${VERSION})
+else()
+ string(REGEX REPLACE "v(.*)" "\\1" NORMALIZED_VERSION ${VERSION})
+endif()
+
# Tell the user what versions we are using
-message(STATUS "Version: ${VERSION}")
+message(STATUS "Google Benchmark version: ${VERSION}, normalized to ${NORMALIZED_VERSION}")
# The version of the libraries
-set(GENERIC_LIB_VERSION ${VERSION})
-string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION)
+set(GENERIC_LIB_VERSION ${NORMALIZED_VERSION})
+string(SUBSTRING ${NORMALIZED_VERSION} 0 1 GENERIC_LIB_SOVERSION)
# Import our CMake modules
-include(CheckCXXCompilerFlag)
include(AddCXXCompilerFlag)
-include(CXXFeatureCheck)
+include(CheckCXXCompilerFlag)
include(CheckLibraryExists)
+include(CXXFeatureCheck)
check_library_exists(rt shm_open "" HAVE_LIB_RT)
@@ -134,6 +138,16 @@
add_required_cxx_compiler_flag(-m32)
endif()
+if (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")
+ set(BENCHMARK_CXX_STANDARD 14)
+else()
+ set(BENCHMARK_CXX_STANDARD 11)
+endif()
+
+set(CMAKE_CXX_STANDARD ${BENCHMARK_CXX_STANDARD})
+set(CMAKE_CXX_STANDARD_REQUIRED YES)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
if (MSVC)
# Turn compiler warnings up to 11
string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
@@ -166,21 +180,18 @@
set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG")
endif()
else()
- # Try and enable C++11. Don't use C++14 because it doesn't work in some
- # configurations.
- add_cxx_compiler_flag(-std=c++11)
- if (NOT HAVE_CXX_FLAG_STD_CXX11)
- add_cxx_compiler_flag(-std=c++0x)
- endif()
-
+ # Turn on Large-file Support
+ add_definitions(-D_FILE_OFFSET_BITS=64)
+ add_definitions(-D_LARGEFILE64_SOURCE)
+ add_definitions(-D_LARGEFILE_SOURCE)
# Turn compiler warnings up to 11
add_cxx_compiler_flag(-Wall)
add_cxx_compiler_flag(-Wextra)
add_cxx_compiler_flag(-Wshadow)
+ add_cxx_compiler_flag(-Wfloat-equal)
+ add_cxx_compiler_flag(-Wold-style-cast)
if(BENCHMARK_ENABLE_WERROR)
- add_cxx_compiler_flag(-Werror RELEASE)
- add_cxx_compiler_flag(-Werror RELWITHDEBINFO)
- add_cxx_compiler_flag(-Werror MINSIZEREL)
+ add_cxx_compiler_flag(-Werror)
endif()
if (NOT BENCHMARK_ENABLE_TESTING)
# Disable warning when compiling tests as gtest does not use 'override'.
@@ -193,24 +204,23 @@
# Disable warnings regarding deprecated parts of the library while building
# and testing those parts of the library.
add_cxx_compiler_flag(-Wno-deprecated-declarations)
- if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+ if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
# Intel silently ignores '-Wno-deprecated-declarations',
# warning no. 1786 must be explicitly disabled.
# See #631 for rationale.
add_cxx_compiler_flag(-wd1786)
+ add_cxx_compiler_flag(-fno-finite-math-only)
endif()
# Disable deprecation warnings for release builds (when -Werror is enabled).
if(BENCHMARK_ENABLE_WERROR)
- add_cxx_compiler_flag(-Wno-deprecated RELEASE)
- add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO)
- add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL)
+ add_cxx_compiler_flag(-Wno-deprecated)
endif()
if (NOT BENCHMARK_ENABLE_EXCEPTIONS)
add_cxx_compiler_flag(-fno-exceptions)
endif()
if (HAVE_CXX_FLAG_FSTRICT_ALIASING)
- if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") #ICC17u2: Many false positives for Wstrict-aliasing
+ if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") #ICC17u2: Many false positives for Wstrict-aliasing
add_cxx_compiler_flag(-Wstrict-aliasing)
endif()
endif()
@@ -219,12 +229,12 @@
add_cxx_compiler_flag(-wd654)
add_cxx_compiler_flag(-Wthread-safety)
if (HAVE_CXX_FLAG_WTHREAD_SAFETY)
- cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
+ cxx_feature_check(THREAD_SAFETY_ATTRIBUTES "-DINCLUDE_DIRECTORIES=${PROJECT_SOURCE_DIR}/include")
endif()
# On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a
# predefined macro, which turns on all of the wonderful libc extensions.
- # However g++ doesn't do this in Cygwin so we have to define it ourselfs
+ # However g++ doesn't do this in Cygwin so we have to define it ourselves
# since we depend on GNU/POSIX/BSD extensions.
if (CYGWIN)
add_definitions(-D_GNU_SOURCE=1)
@@ -275,7 +285,8 @@
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
add_cxx_compiler_flag(-stdlib=libc++)
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR
- "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
+ "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel" OR
+ "${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM")
add_cxx_compiler_flag(-nostdinc++)
message(WARNING "libc++ header path must be manually specified using CMAKE_CXX_FLAGS")
# Adding -nodefaultlibs directly to CMAKE_<TYPE>_LINKER_FLAGS will break
@@ -312,9 +323,10 @@
# Ensure we have pthreads
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
+cxx_feature_check(PTHREAD_AFFINITY)
if (BENCHMARK_ENABLE_LIBPFM)
- find_package(PFM)
+ find_package(PFM REQUIRED)
endif()
# Set up directories
diff --git a/third-party/benchmark/CONTRIBUTORS b/third-party/benchmark/CONTRIBUTORS
index 651fbea..9ca2caa 100644
--- a/third-party/benchmark/CONTRIBUTORS
+++ b/third-party/benchmark/CONTRIBUTORS
@@ -27,7 +27,9 @@
Alex Steele <[email protected]>
Andriy Berestovskyy <[email protected]>
Arne Beer <[email protected]>
+Bátor Tallér <[email protected]>
Billy Robert O'Neal III <[email protected]> <[email protected]>
+Cezary Skrzyński <[email protected]>
Chris Kennelly <[email protected]> <[email protected]>
Christian Wassermann <[email protected]>
Christopher Seymour <[email protected]>
@@ -44,25 +46,32 @@
Eric Fiselier <[email protected]>
Eugene Zhuk <[email protected]>
Evgeny Safronov <[email protected]>
+Fabien Pichot <[email protected]>
Fanbo Meng <[email protected]>
Federico Ficarelli <[email protected]>
Felix Homann <[email protected]>
Geoffrey Martin-Noble <[email protected]> <[email protected]>
+Gergely Meszaros <[email protected]>
Gergő Szitár <[email protected]>
Hannes Hauswedell <[email protected]>
+Henrique Bucher <[email protected]>
Ismael Jimenez Martinez <[email protected]>
+Iakov Sergeev <[email protected]>
Jern-Kuan Leong <[email protected]>
JianXiong Zhou <[email protected]>
Joao Paulo Magalhaes <[email protected]>
John Millikin <[email protected]>
Jordan Williams <[email protected]>
Jussi Knuuttila <[email protected]>
-Kai Wolf <[email protected]>
Kaito Udagawa <[email protected]>
+Kai Wolf <[email protected]>
Kishan Kumar <[email protected]>
Lei Xu <[email protected]>
+Marcel Jacobse <[email protected]>
Matt Clarkson <[email protected]>
Maxim Vafin <[email protected]>
+Mike Apodaca <[email protected]>
+Min-Yih Hsu <[email protected]>
Nick Hutchinson <[email protected]>
Norman Heino <[email protected]>
Oleksandr Sochka <[email protected]>
@@ -71,6 +80,8 @@
Paul Redmond <[email protected]>
Pierre Phaneuf <[email protected]>
Radoslav Yovchev <[email protected]>
+Raghu Raja <[email protected]>
+Rainer Orth <[email protected]>
Raul Marin <[email protected]>
Ray Glover <[email protected]>
Robert Guo <[email protected]>
@@ -84,4 +95,3 @@
Yixuan Qiu <[email protected]>
Yusuke Suzuki <[email protected]>
Zbigniew Skowron <[email protected]>
-Min-Yih Hsu <[email protected]>
diff --git a/third-party/benchmark/MODULE.bazel b/third-party/benchmark/MODULE.bazel
new file mode 100644
index 0000000..7e0e016
--- /dev/null
+++ b/third-party/benchmark/MODULE.bazel
@@ -0,0 +1,32 @@
+module(
+ name = "google_benchmark",
+ version = "1.8.3",
+)
+
+bazel_dep(name = "bazel_skylib", version = "1.5.0")
+bazel_dep(name = "platforms", version = "0.0.7")
+bazel_dep(name = "rules_foreign_cc", version = "0.10.1")
+bazel_dep(name = "rules_cc", version = "0.0.9")
+
+bazel_dep(name = "rules_python", version = "0.27.1", dev_dependency = True)
+bazel_dep(name = "googletest", version = "1.12.1", dev_dependency = True, repo_name = "com_google_googletest")
+
+bazel_dep(name = "libpfm", version = "4.11.0")
+
+# Register a toolchain for Python 3.9 to be able to build numpy. Python
+# versions >=3.10 are problematic.
+# A second reason for this is to be able to build Python hermetically instead
+# of relying on the changing default version from rules_python.
+
+python = use_extension("@rules_python//python/extensions:python.bzl", "python", dev_dependency = True)
+python.toolchain(python_version = "3.9")
+
+pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True)
+pip.parse(
+ hub_name = "tools_pip_deps",
+ python_version = "3.9",
+ requirements_lock = "//tools:requirements.txt",
+)
+use_repo(pip, "tools_pip_deps")
+
+# -- bazel_dep definitions -- #
diff --git a/third-party/benchmark/README.md b/third-party/benchmark/README.md
index 7b81d96..a5e5d39 100644
--- a/third-party/benchmark/README.md
+++ b/third-party/benchmark/README.md
@@ -4,10 +4,9 @@
[](https://github.com/google/benchmark/actions/workflows/bazel.yml)
[](https://github.com/google/benchmark/actions?query=workflow%3Apylint)
[](https://github.com/google/benchmark/actions?query=workflow%3Atest-bindings)
-
-[](https://travis-ci.org/google/benchmark)
[](https://coveralls.io/r/google/benchmark)
+[](https://discord.gg/cz7UX7wKC2)
A library to benchmark code snippets, similar to unit tests. Example:
@@ -33,7 +32,7 @@
[Installation](#installation). See [Usage](#usage) for a full example and the
[User Guide](docs/user_guide.md) for a more comprehensive feature overview.
-It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/master/docs/primer.md)
+It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/main/docs/primer.md)
as some of the structural aspects of the APIs are similar.
## Resources
@@ -47,6 +46,8 @@
[Assembly Testing Documentation](docs/AssemblyTests.md)
+[Building and installing Python bindings](docs/python_bindings.md)
+
## Requirements
The library can be used with C++03. However, it requires C++11 to build,
@@ -137,6 +138,12 @@
If you are using clang, you may need to set `LLVMAR_EXECUTABLE`,
`LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables.
+To enable sanitizer checks (eg., `asan` and `tsan`), add:
+```
+ -DCMAKE_C_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all"
+ -DCMAKE_CXX_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all "
+```
+
### Stable and Experimental Library Versions
The main branch contains the latest stable version of the benchmarking library;
diff --git a/third-party/benchmark/WORKSPACE b/third-party/benchmark/WORKSPACE
index 949eb98..2562070 100644
--- a/third-party/benchmark/WORKSPACE
+++ b/third-party/benchmark/WORKSPACE
@@ -1,44 +1,30 @@
workspace(name = "com_github_google_benchmark")
-load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
-load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
+load("//:bazel/benchmark_deps.bzl", "benchmark_deps")
-http_archive(
- name = "com_google_absl",
- sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111",
- strip_prefix = "abseil-cpp-20200225.2",
- urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"],
+benchmark_deps()
+
+load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies")
+
+rules_foreign_cc_dependencies()
+
+load("@rules_python//python:repositories.bzl", "py_repositories")
+
+py_repositories()
+
+load("@rules_python//python:pip.bzl", "pip_parse")
+
+pip_parse(
+ name = "tools_pip_deps",
+ requirements_lock = "//tools:requirements.txt",
)
-git_repository(
- name = "com_google_googletest",
- remote = "https://github.com/google/googletest.git",
- tag = "release-1.11.0",
-)
+load("@tools_pip_deps//:requirements.bzl", "install_deps")
-http_archive(
- name = "pybind11",
- build_file = "@//bindings/python:pybind11.BUILD",
- sha256 = "1eed57bc6863190e35637290f97a20c81cfe4d9090ac0a24f3bbf08f265eb71d",
- strip_prefix = "pybind11-2.4.3",
- urls = ["https://github.com/pybind/pybind11/archive/v2.4.3.tar.gz"],
-)
+install_deps()
new_local_repository(
name = "python_headers",
build_file = "@//bindings/python:python_headers.BUILD",
- path = "/usr/include/python3.6", # May be overwritten by setup.py.
-)
-
-http_archive(
- name = "rules_python",
- url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz",
- sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0",
-)
-
-load("@rules_python//python:pip.bzl", pip3_install="pip_install")
-
-pip3_install(
- name = "py_deps",
- requirements = "//:requirements.txt",
+ path = "<PYTHON_INCLUDE_PATH>", # May be overwritten by setup.py.
)
diff --git a/third-party/benchmark/WORKSPACE.bzlmod b/third-party/benchmark/WORKSPACE.bzlmod
new file mode 100644
index 0000000..9526376
--- /dev/null
+++ b/third-party/benchmark/WORKSPACE.bzlmod
@@ -0,0 +1,2 @@
+# This file marks the root of the Bazel workspace.
+# See MODULE.bazel for dependencies and setup.
diff --git a/third-party/benchmark/bazel/benchmark_deps.bzl b/third-party/benchmark/bazel/benchmark_deps.bzl
new file mode 100644
index 0000000..4fb45a5
--- /dev/null
+++ b/third-party/benchmark/bazel/benchmark_deps.bzl
@@ -0,0 +1,62 @@
+"""
+This file contains the Bazel build dependencies for Google Benchmark (both C++ source and Python bindings).
+"""
+
+load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+def benchmark_deps():
+ """Loads dependencies required to build Google Benchmark."""
+
+ if "bazel_skylib" not in native.existing_rules():
+ http_archive(
+ name = "bazel_skylib",
+ sha256 = "cd55a062e763b9349921f0f5db8c3933288dc8ba4f76dd9416aac68acee3cb94",
+ urls = [
+ "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.5.0/bazel-skylib-1.5.0.tar.gz",
+ "https://github.com/bazelbuild/bazel-skylib/releases/download/1.5.0/bazel-skylib-1.5.0.tar.gz",
+ ],
+ )
+
+ if "rules_foreign_cc" not in native.existing_rules():
+ http_archive(
+ name = "rules_foreign_cc",
+ sha256 = "476303bd0f1b04cc311fc258f1708a5f6ef82d3091e53fd1977fa20383425a6a",
+ strip_prefix = "rules_foreign_cc-0.10.1",
+ url = "https://github.com/bazelbuild/rules_foreign_cc/releases/download/0.10.1/rules_foreign_cc-0.10.1.tar.gz",
+ )
+
+ if "rules_python" not in native.existing_rules():
+ http_archive(
+ name = "rules_python",
+ sha256 = "e85ae30de33625a63eca7fc40a94fea845e641888e52f32b6beea91e8b1b2793",
+ strip_prefix = "rules_python-0.27.1",
+ url = "https://github.com/bazelbuild/rules_python/releases/download/0.27.1/rules_python-0.27.1.tar.gz",
+ )
+
+ if "com_google_googletest" not in native.existing_rules():
+ new_git_repository(
+ name = "com_google_googletest",
+ remote = "https://github.com/google/googletest.git",
+ tag = "release-1.12.1",
+ )
+
+ if "nanobind" not in native.existing_rules():
+ new_git_repository(
+ name = "nanobind",
+ remote = "https://github.com/wjakob/nanobind.git",
+ tag = "v1.8.0",
+ build_file = "@//bindings/python:nanobind.BUILD",
+ recursive_init_submodules = True,
+ )
+
+ if "libpfm" not in native.existing_rules():
+ # Downloaded from v4.9.0 tag at https://sourceforge.net/p/perfmon2/libpfm4/ref/master/tags/
+ http_archive(
+ name = "libpfm",
+ build_file = str(Label("//tools:libpfm.BUILD.bazel")),
+ sha256 = "5da5f8872bde14b3634c9688d980f68bda28b510268723cc12973eedbab9fecc",
+ type = "tar.gz",
+ strip_prefix = "libpfm-4.11.0",
+ urls = ["https://sourceforge.net/projects/perfmon2/files/libpfm4/libpfm-4.11.0.tar.gz/download"],
+ )
diff --git a/third-party/benchmark/bindings/python/build_defs.bzl b/third-party/benchmark/bindings/python/build_defs.bzl
index 45907aa..b0c1b0f 100644
--- a/third-party/benchmark/bindings/python/build_defs.bzl
+++ b/third-party/benchmark/bindings/python/build_defs.bzl
@@ -1,3 +1,7 @@
+"""
+This file contains some build definitions for C++ extensions used in the Google Benchmark Python bindings.
+"""
+
_SHARED_LIB_SUFFIX = {
"//conditions:default": ".so",
"//:windows": ".dll",
@@ -8,8 +12,8 @@
shared_lib_name = name + shared_lib_suffix
native.cc_binary(
name = shared_lib_name,
- linkshared = 1,
- linkstatic = 1,
+ linkshared = True,
+ linkstatic = True,
srcs = srcs + hdrs,
copts = copts,
features = features,
diff --git a/third-party/benchmark/bindings/python/google_benchmark/__init__.py b/third-party/benchmark/bindings/python/google_benchmark/__init__.py
index 1055bf2..e14769f 100644
--- a/third-party/benchmark/bindings/python/google_benchmark/__init__.py
+++ b/third-party/benchmark/bindings/python/google_benchmark/__init__.py
@@ -26,47 +26,29 @@
if __name__ == '__main__':
benchmark.main()
"""
+import atexit
from absl import app
+
from google_benchmark import _benchmark
from google_benchmark._benchmark import (
- Counter,
- kNanosecond,
- kMicrosecond,
- kMillisecond,
- kSecond,
- oNone,
- o1,
- oN,
- oNSquared,
- oNCubed,
- oLogN,
- oNLogN,
- oAuto,
- oLambda,
+ Counter as Counter,
+ State as State,
+ kMicrosecond as kMicrosecond,
+ kMillisecond as kMillisecond,
+ kNanosecond as kNanosecond,
+ kSecond as kSecond,
+ o1 as o1,
+ oAuto as oAuto,
+ oLambda as oLambda,
+ oLogN as oLogN,
+ oN as oN,
+ oNCubed as oNCubed,
+ oNLogN as oNLogN,
+ oNone as oNone,
+ oNSquared as oNSquared,
)
-
-
-__all__ = [
- "register",
- "main",
- "Counter",
- "kNanosecond",
- "kMicrosecond",
- "kMillisecond",
- "kSecond",
- "oNone",
- "o1",
- "oN",
- "oNSquared",
- "oNCubed",
- "oLogN",
- "oNLogN",
- "oAuto",
- "oLambda",
-]
-
-__version__ = "0.2.0"
+from google_benchmark.version import __version__ as __version__
class __OptionMaker:
@@ -94,14 +76,13 @@
# The function that get returned on @option.range(start=0, limit=1<<5).
def __builder_method(*args, **kwargs):
-
# The decorator that get called, either with the benchmared function
# or the previous Options
def __decorator(func_or_options):
options = self.make(func_or_options)
options.builder_calls.append((builder_name, args, kwargs))
# The decorator returns Options so it is not technically a decorator
- # and needs a final call to @regiser
+ # and needs a final call to @register
return options
return __decorator
@@ -156,3 +137,4 @@
# Methods for use with custom main function.
initialize = _benchmark.Initialize
run_benchmarks = _benchmark.RunSpecifiedBenchmarks
+atexit.register(_benchmark.ClearRegisteredBenchmarks)
diff --git a/third-party/benchmark/bindings/python/google_benchmark/benchmark.cc b/third-party/benchmark/bindings/python/google_benchmark/benchmark.cc
index 02b6ed7..f444769 100644
--- a/third-party/benchmark/bindings/python/google_benchmark/benchmark.cc
+++ b/third-party/benchmark/bindings/python/google_benchmark/benchmark.cc
@@ -1,20 +1,17 @@
// Benchmark for Python.
-#include <map>
-#include <string>
-#include <vector>
-
-#include "pybind11/operators.h"
-#include "pybind11/pybind11.h"
-#include "pybind11/stl.h"
-#include "pybind11/stl_bind.h"
-
#include "benchmark/benchmark.h"
-PYBIND11_MAKE_OPAQUE(benchmark::UserCounters);
+#include "nanobind/nanobind.h"
+#include "nanobind/operators.h"
+#include "nanobind/stl/bind_map.h"
+#include "nanobind/stl/string.h"
+#include "nanobind/stl/vector.h"
+
+NB_MAKE_OPAQUE(benchmark::UserCounters);
namespace {
-namespace py = ::pybind11;
+namespace nb = nanobind;
std::vector<std::string> Initialize(const std::vector<std::string>& argv) {
// The `argv` pointers here become invalid when this function returns, but
@@ -37,15 +34,16 @@
return remaining_argv;
}
-benchmark::internal::Benchmark* RegisterBenchmark(const char* name,
- py::function f) {
+benchmark::internal::Benchmark* RegisterBenchmark(const std::string& name,
+ nb::callable f) {
return benchmark::RegisterBenchmark(
name, [f](benchmark::State& state) { f(&state); });
}
-PYBIND11_MODULE(_benchmark, m) {
+NB_MODULE(_benchmark, m) {
+
using benchmark::TimeUnit;
- py::enum_<TimeUnit>(m, "TimeUnit")
+ nb::enum_<TimeUnit>(m, "TimeUnit")
.value("kNanosecond", TimeUnit::kNanosecond)
.value("kMicrosecond", TimeUnit::kMicrosecond)
.value("kMillisecond", TimeUnit::kMillisecond)
@@ -53,72 +51,74 @@
.export_values();
using benchmark::BigO;
- py::enum_<BigO>(m, "BigO")
+ nb::enum_<BigO>(m, "BigO")
.value("oNone", BigO::oNone)
.value("o1", BigO::o1)
.value("oN", BigO::oN)
.value("oNSquared", BigO::oNSquared)
.value("oNCubed", BigO::oNCubed)
.value("oLogN", BigO::oLogN)
- .value("oNLogN", BigO::oLogN)
+ .value("oNLogN", BigO::oNLogN)
.value("oAuto", BigO::oAuto)
.value("oLambda", BigO::oLambda)
.export_values();
using benchmark::internal::Benchmark;
- py::class_<Benchmark>(m, "Benchmark")
- // For methods returning a pointer tor the current object, reference
- // return policy is used to ask pybind not to take ownership oof the
+ nb::class_<Benchmark>(m, "Benchmark")
+ // For methods returning a pointer to the current object, reference
+ // return policy is used to ask nanobind not to take ownership of the
// returned object and avoid calling delete on it.
// https://pybind11.readthedocs.io/en/stable/advanced/functions.html#return-value-policies
//
// For methods taking a const std::vector<...>&, a copy is created
// because a it is bound to a Python list.
// https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html
- .def("unit", &Benchmark::Unit, py::return_value_policy::reference)
- .def("arg", &Benchmark::Arg, py::return_value_policy::reference)
- .def("args", &Benchmark::Args, py::return_value_policy::reference)
- .def("range", &Benchmark::Range, py::return_value_policy::reference,
- py::arg("start"), py::arg("limit"))
+ .def("unit", &Benchmark::Unit, nb::rv_policy::reference)
+ .def("arg", &Benchmark::Arg, nb::rv_policy::reference)
+ .def("args", &Benchmark::Args, nb::rv_policy::reference)
+ .def("range", &Benchmark::Range, nb::rv_policy::reference,
+ nb::arg("start"), nb::arg("limit"))
.def("dense_range", &Benchmark::DenseRange,
- py::return_value_policy::reference, py::arg("start"),
- py::arg("limit"), py::arg("step") = 1)
- .def("ranges", &Benchmark::Ranges, py::return_value_policy::reference)
+ nb::rv_policy::reference, nb::arg("start"),
+ nb::arg("limit"), nb::arg("step") = 1)
+ .def("ranges", &Benchmark::Ranges, nb::rv_policy::reference)
.def("args_product", &Benchmark::ArgsProduct,
- py::return_value_policy::reference)
- .def("arg_name", &Benchmark::ArgName, py::return_value_policy::reference)
+ nb::rv_policy::reference)
+ .def("arg_name", &Benchmark::ArgName, nb::rv_policy::reference)
.def("arg_names", &Benchmark::ArgNames,
- py::return_value_policy::reference)
+ nb::rv_policy::reference)
.def("range_pair", &Benchmark::RangePair,
- py::return_value_policy::reference, py::arg("lo1"), py::arg("hi1"),
- py::arg("lo2"), py::arg("hi2"))
+ nb::rv_policy::reference, nb::arg("lo1"), nb::arg("hi1"),
+ nb::arg("lo2"), nb::arg("hi2"))
.def("range_multiplier", &Benchmark::RangeMultiplier,
- py::return_value_policy::reference)
- .def("min_time", &Benchmark::MinTime, py::return_value_policy::reference)
+ nb::rv_policy::reference)
+ .def("min_time", &Benchmark::MinTime, nb::rv_policy::reference)
+ .def("min_warmup_time", &Benchmark::MinWarmUpTime,
+ nb::rv_policy::reference)
.def("iterations", &Benchmark::Iterations,
- py::return_value_policy::reference)
+ nb::rv_policy::reference)
.def("repetitions", &Benchmark::Repetitions,
- py::return_value_policy::reference)
+ nb::rv_policy::reference)
.def("report_aggregates_only", &Benchmark::ReportAggregatesOnly,
- py::return_value_policy::reference, py::arg("value") = true)
+ nb::rv_policy::reference, nb::arg("value") = true)
.def("display_aggregates_only", &Benchmark::DisplayAggregatesOnly,
- py::return_value_policy::reference, py::arg("value") = true)
+ nb::rv_policy::reference, nb::arg("value") = true)
.def("measure_process_cpu_time", &Benchmark::MeasureProcessCPUTime,
- py::return_value_policy::reference)
+ nb::rv_policy::reference)
.def("use_real_time", &Benchmark::UseRealTime,
- py::return_value_policy::reference)
+ nb::rv_policy::reference)
.def("use_manual_time", &Benchmark::UseManualTime,
- py::return_value_policy::reference)
+ nb::rv_policy::reference)
.def(
"complexity",
(Benchmark * (Benchmark::*)(benchmark::BigO)) & Benchmark::Complexity,
- py::return_value_policy::reference,
- py::arg("complexity") = benchmark::oAuto);
+ nb::rv_policy::reference,
+ nb::arg("complexity") = benchmark::oAuto);
using benchmark::Counter;
- py::class_<Counter> py_counter(m, "Counter");
+ nb::class_<Counter> py_counter(m, "Counter");
- py::enum_<Counter::Flags>(py_counter, "Flags")
+ nb::enum_<Counter::Flags>(py_counter, "Flags")
.value("kDefaults", Counter::Flags::kDefaults)
.value("kIsRate", Counter::Flags::kIsRate)
.value("kAvgThreads", Counter::Flags::kAvgThreads)
@@ -130,52 +130,55 @@
.value("kAvgIterationsRate", Counter::Flags::kAvgIterationsRate)
.value("kInvert", Counter::Flags::kInvert)
.export_values()
- .def(py::self | py::self);
+ .def(nb::self | nb::self);
- py::enum_<Counter::OneK>(py_counter, "OneK")
+ nb::enum_<Counter::OneK>(py_counter, "OneK")
.value("kIs1000", Counter::OneK::kIs1000)
.value("kIs1024", Counter::OneK::kIs1024)
.export_values();
py_counter
- .def(py::init<double, Counter::Flags, Counter::OneK>(),
- py::arg("value") = 0., py::arg("flags") = Counter::kDefaults,
- py::arg("k") = Counter::kIs1000)
- .def(py::init([](double value) { return Counter(value); }))
- .def_readwrite("value", &Counter::value)
- .def_readwrite("flags", &Counter::flags)
- .def_readwrite("oneK", &Counter::oneK);
- py::implicitly_convertible<py::float_, Counter>();
- py::implicitly_convertible<py::int_, Counter>();
+ .def(nb::init<double, Counter::Flags, Counter::OneK>(),
+ nb::arg("value") = 0., nb::arg("flags") = Counter::kDefaults,
+ nb::arg("k") = Counter::kIs1000)
+ .def("__init__", ([](Counter *c, double value) { new (c) Counter(value); }))
+ .def_rw("value", &Counter::value)
+ .def_rw("flags", &Counter::flags)
+ .def_rw("oneK", &Counter::oneK)
+ .def(nb::init_implicit<double>());
- py::bind_map<benchmark::UserCounters>(m, "UserCounters");
+ nb::implicitly_convertible<nb::int_, Counter>();
+
+ nb::bind_map<benchmark::UserCounters>(m, "UserCounters");
using benchmark::State;
- py::class_<State>(m, "State")
+ nb::class_<State>(m, "State")
.def("__bool__", &State::KeepRunning)
- .def_property_readonly("keep_running", &State::KeepRunning)
+ .def_prop_ro("keep_running", &State::KeepRunning)
.def("pause_timing", &State::PauseTiming)
.def("resume_timing", &State::ResumeTiming)
.def("skip_with_error", &State::SkipWithError)
- .def_property_readonly("error_occurred", &State::error_occurred)
+ .def_prop_ro("error_occurred", &State::error_occurred)
.def("set_iteration_time", &State::SetIterationTime)
- .def_property("bytes_processed", &State::bytes_processed,
+ .def_prop_rw("bytes_processed", &State::bytes_processed,
&State::SetBytesProcessed)
- .def_property("complexity_n", &State::complexity_length_n,
+ .def_prop_rw("complexity_n", &State::complexity_length_n,
&State::SetComplexityN)
- .def_property("items_processed", &State::items_processed,
- &State::SetItemsProcessed)
- .def("set_label", (void(State::*)(const char*)) & State::SetLabel)
- .def("range", &State::range, py::arg("pos") = 0)
- .def_property_readonly("iterations", &State::iterations)
- .def_readwrite("counters", &State::counters)
- .def_property_readonly("thread_index", &State::thread_index)
- .def_property_readonly("threads", &State::threads);
+ .def_prop_rw("items_processed", &State::items_processed,
+ &State::SetItemsProcessed)
+ .def("set_label", &State::SetLabel)
+ .def("range", &State::range, nb::arg("pos") = 0)
+ .def_prop_ro("iterations", &State::iterations)
+ .def_prop_ro("name", &State::name)
+ .def_rw("counters", &State::counters)
+ .def_prop_ro("thread_index", &State::thread_index)
+ .def_prop_ro("threads", &State::threads);
m.def("Initialize", Initialize);
m.def("RegisterBenchmark", RegisterBenchmark,
- py::return_value_policy::reference);
+ nb::rv_policy::reference);
m.def("RunSpecifiedBenchmarks",
[]() { benchmark::RunSpecifiedBenchmarks(); });
+ m.def("ClearRegisteredBenchmarks", benchmark::ClearRegisteredBenchmarks);
};
} // namespace
diff --git a/third-party/benchmark/bindings/python/google_benchmark/example.py b/third-party/benchmark/bindings/python/google_benchmark/example.py
index fb0234b..b5b2f88 100644
--- a/third-party/benchmark/bindings/python/google_benchmark/example.py
+++ b/third-party/benchmark/bindings/python/google_benchmark/example.py
@@ -73,7 +73,7 @@
@benchmark.register
def custom_counters(state):
- """Collect cutom metric using benchmark.Counter."""
+ """Collect custom metric using benchmark.Counter."""
num_foo = 0.0
while state:
# Benchmark some code here
@@ -86,7 +86,9 @@
# Set a counter as a rate.
state.counters["foo_rate"] = Counter(num_foo, Counter.kIsRate)
# Set a counter as an inverse of rate.
- state.counters["foo_inv_rate"] = Counter(num_foo, Counter.kIsRate | Counter.kInvert)
+ state.counters["foo_inv_rate"] = Counter(
+ num_foo, Counter.kIsRate | Counter.kInvert
+ )
# Set a counter as a thread-average quantity.
state.counters["foo_avg"] = Counter(num_foo, Counter.kAvgThreads)
# There's also a combined flag:
diff --git a/third-party/benchmark/bindings/python/google_benchmark/version.py b/third-party/benchmark/bindings/python/google_benchmark/version.py
new file mode 100644
index 0000000..a324693
--- /dev/null
+++ b/third-party/benchmark/bindings/python/google_benchmark/version.py
@@ -0,0 +1,7 @@
+from importlib.metadata import PackageNotFoundError, version
+
+try:
+ __version__ = version("google-benchmark")
+except PackageNotFoundError:
+ # package is not installed
+ pass
diff --git a/third-party/benchmark/bindings/python/nanobind.BUILD b/third-party/benchmark/bindings/python/nanobind.BUILD
new file mode 100644
index 0000000..9874b80
--- /dev/null
+++ b/third-party/benchmark/bindings/python/nanobind.BUILD
@@ -0,0 +1,59 @@
+load("@bazel_skylib//lib:selects.bzl", "selects")
+
+licenses(["notice"])
+
+package(default_visibility = ["//visibility:public"])
+
+config_setting(
+ name = "msvc_compiler",
+ flag_values = {"@bazel_tools//tools/cpp:compiler": "msvc-cl"},
+)
+
+selects.config_setting_group(
+ name = "winplusmsvc",
+ match_all = [
+ "@platforms//os:windows",
+ ":msvc_compiler",
+ ],
+)
+
+cc_library(
+ name = "nanobind",
+ srcs = glob([
+ "src/*.cpp",
+ ]),
+ additional_linker_inputs = select({
+ "@platforms//os:macos": [":cmake/darwin-ld-cpython.sym"],
+ "//conditions:default": [],
+ }),
+ copts = select({
+ ":msvc_compiler": [
+ "/EHsc", # exceptions
+ "/Os", # size optimizations
+ "/GL", # LTO / whole program optimization
+ ],
+ # these should work on both clang and gcc.
+ "//conditions:default": [
+ "-fexceptions",
+ "-flto",
+ "-Os",
+ ],
+ }),
+ includes = [
+ "ext/robin_map/include",
+ "include",
+ ],
+ linkopts = select({
+ ":winplusmsvc": ["/LTGC"], # Windows + MSVC.
+ "@platforms//os:macos": ["-Wl,@$(location :cmake/darwin-ld-cpython.sym)"], # Apple.
+ "//conditions:default": [],
+ }),
+ textual_hdrs = glob(
+ [
+ "include/**/*.h",
+ "src/*.h",
+ "ext/robin_map/include/tsl/*.h",
+ ],
+ ),
+ deps = ["@python_headers"],
+)
diff --git a/third-party/benchmark/bindings/python/pybind11.BUILD b/third-party/benchmark/bindings/python/pybind11.BUILD
deleted file mode 100644
index bc833500..0000000
--- a/third-party/benchmark/bindings/python/pybind11.BUILD
+++ /dev/null
@@ -1,20 +0,0 @@
-cc_library(
- name = "pybind11",
- hdrs = glob(
- include = [
- "include/pybind11/*.h",
- "include/pybind11/detail/*.h",
- ],
- exclude = [
- "include/pybind11/common.h",
- "include/pybind11/eigen.h",
- ],
- ),
- copts = [
- "-fexceptions",
- "-Wno-undefined-inline",
- "-Wno-pragma-once-outside-header",
- ],
- includes = ["include"],
- visibility = ["//visibility:public"],
-)
diff --git a/third-party/benchmark/bindings/python/python_headers.BUILD b/third-party/benchmark/bindings/python/python_headers.BUILD
index 9c34cf6..8f139f8 100644
--- a/third-party/benchmark/bindings/python/python_headers.BUILD
+++ b/third-party/benchmark/bindings/python/python_headers.BUILD
@@ -1,3 +1,7 @@
+licenses(["notice"])
+
+package(default_visibility = ["//visibility:public"])
+
cc_library(
name = "python_headers",
hdrs = glob(["**/*.h"]),
diff --git a/third-party/benchmark/bindings/python/requirements.txt b/third-party/benchmark/bindings/python/requirements.txt
deleted file mode 100644
index f5bbe7ec..0000000
--- a/third-party/benchmark/bindings/python/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-absl-py>=0.7.1
-
diff --git a/third-party/benchmark/cmake/CXXFeatureCheck.cmake b/third-party/benchmark/cmake/CXXFeatureCheck.cmake
index 62e6741..e514826 100644
--- a/third-party/benchmark/cmake/CXXFeatureCheck.cmake
+++ b/third-party/benchmark/cmake/CXXFeatureCheck.cmake
@@ -17,6 +17,8 @@
endif()
set(__cxx_feature_check INCLUDED)
+option(CXXFEATURECHECK_DEBUG OFF)
+
function(cxx_feature_check FILE)
string(TOLOWER ${FILE} FILE)
string(TOUPPER ${FILE} VAR)
@@ -27,18 +29,22 @@
return()
endif()
+ set(FEATURE_CHECK_CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS})
if (ARGC GREATER 1)
message(STATUS "Enabling additional flags: ${ARGV1}")
- list(APPEND BENCHMARK_CXX_LINKER_FLAGS ${ARGV1})
+ list(APPEND FEATURE_CHECK_CMAKE_FLAGS ${ARGV1})
endif()
if (NOT DEFINED COMPILE_${FEATURE})
- message(STATUS "Performing Test ${FEATURE}")
if(CMAKE_CROSSCOMPILING)
+ message(STATUS "Cross-compiling to test ${FEATURE}")
try_compile(COMPILE_${FEATURE}
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
- CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
- LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED ON
+ CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
+ LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
+ OUTPUT_VARIABLE COMPILE_OUTPUT_VAR)
if(COMPILE_${FEATURE})
message(WARNING
"If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0")
@@ -47,11 +53,14 @@
set(RUN_${FEATURE} 1 CACHE INTERNAL "")
endif()
else()
- message(STATUS "Performing Test ${FEATURE}")
+ message(STATUS "Compiling and running to test ${FEATURE}")
try_run(RUN_${FEATURE} COMPILE_${FEATURE}
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
- CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
- LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED ON
+ CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
+ LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
+ COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR)
endif()
endif()
@@ -61,7 +70,11 @@
add_definitions(-DHAVE_${VAR})
else()
if(NOT COMPILE_${FEATURE})
- message(STATUS "Performing Test ${FEATURE} -- failed to compile")
+ if(CXXFEATURECHECK_DEBUG)
+ message(STATUS "Performing Test ${FEATURE} -- failed to compile: ${COMPILE_OUTPUT_VAR}")
+ else()
+ message(STATUS "Performing Test ${FEATURE} -- failed to compile")
+ endif()
else()
message(STATUS "Performing Test ${FEATURE} -- compiled but failed to run")
endif()
diff --git a/third-party/benchmark/cmake/GetGitVersion.cmake b/third-party/benchmark/cmake/GetGitVersion.cmake
index 04a1f9b..b021010 100644
--- a/third-party/benchmark/cmake/GetGitVersion.cmake
+++ b/third-party/benchmark/cmake/GetGitVersion.cmake
@@ -20,38 +20,16 @@
function(get_git_version var)
if(GIT_EXECUTABLE)
- execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8
+ execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 --dirty
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
RESULT_VARIABLE status
- OUTPUT_VARIABLE GIT_DESCRIBE_VERSION
+ OUTPUT_VARIABLE GIT_VERSION
ERROR_QUIET)
if(status)
- set(GIT_DESCRIBE_VERSION "v0.0.0")
+ set(GIT_VERSION "v0.0.0")
endif()
-
- string(STRIP ${GIT_DESCRIBE_VERSION} GIT_DESCRIBE_VERSION)
- if(GIT_DESCRIBE_VERSION MATCHES v[^-]*-)
- string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" GIT_VERSION ${GIT_DESCRIBE_VERSION})
- else()
- string(REGEX REPLACE "v(.*)" "\\1" GIT_VERSION ${GIT_DESCRIBE_VERSION})
- endif()
-
- # Work out if the repository is dirty
- execute_process(COMMAND ${GIT_EXECUTABLE} update-index -q --refresh
- WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
- OUTPUT_QUIET
- ERROR_QUIET)
- execute_process(COMMAND ${GIT_EXECUTABLE} diff-index --name-only HEAD --
- WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
- OUTPUT_VARIABLE GIT_DIFF_INDEX
- ERROR_QUIET)
- string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY)
- if (${GIT_DIRTY})
- set(GIT_DESCRIBE_VERSION "${GIT_DESCRIBE_VERSION}-dirty")
- endif()
- message(STATUS "git version: ${GIT_DESCRIBE_VERSION} normalized to ${GIT_VERSION}")
else()
- set(GIT_VERSION "0.0.0")
+ set(GIT_VERSION "v0.0.0")
endif()
set(${var} ${GIT_VERSION} PARENT_SCOPE)
diff --git a/third-party/benchmark/cmake/GoogleTest.cmake b/third-party/benchmark/cmake/GoogleTest.cmake
index 66cb910..e66e9d1 100644
--- a/third-party/benchmark/cmake/GoogleTest.cmake
+++ b/third-party/benchmark/cmake/GoogleTest.cmake
@@ -29,15 +29,25 @@
include(${GOOGLETEST_PREFIX}/googletest-paths.cmake)
-# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves.
-add_compile_options(-w)
-
# Add googletest directly to our build. This defines
# the gtest and gtest_main targets.
add_subdirectory(${GOOGLETEST_SOURCE_DIR}
${GOOGLETEST_BINARY_DIR}
EXCLUDE_FROM_ALL)
+# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves.
+if (MSVC)
+ target_compile_options(gtest PRIVATE "/wd4244" "/wd4722")
+ target_compile_options(gtest_main PRIVATE "/wd4244" "/wd4722")
+ target_compile_options(gmock PRIVATE "/wd4244" "/wd4722")
+ target_compile_options(gmock_main PRIVATE "/wd4244" "/wd4722")
+else()
+ target_compile_options(gtest PRIVATE "-w")
+ target_compile_options(gtest_main PRIVATE "-w")
+ target_compile_options(gmock PRIVATE "-w")
+ target_compile_options(gmock_main PRIVATE "-w")
+endif()
+
if(NOT DEFINED GTEST_COMPILE_COMMANDS)
set(GTEST_COMPILE_COMMANDS ON)
endif()
diff --git a/third-party/benchmark/cmake/Modules/FindPFM.cmake b/third-party/benchmark/cmake/Modules/FindPFM.cmake
index cf807a1..4c1ce93 100644
--- a/third-party/benchmark/cmake/Modules/FindPFM.cmake
+++ b/third-party/benchmark/cmake/Modules/FindPFM.cmake
@@ -1,26 +1,28 @@
# If successful, the following variables will be defined:
-# HAVE_LIBPFM.
-# Set BENCHMARK_ENABLE_LIBPFM to 0 to disable, regardless of libpfm presence.
-include(CheckIncludeFile)
-include(CheckLibraryExists)
+# PFM_FOUND.
+# PFM_LIBRARIES
+# PFM_INCLUDE_DIRS
+# the following target will be defined:
+# PFM::libpfm
+
include(FeatureSummary)
-enable_language(C)
+include(FindPackageHandleStandardArgs)
set_package_properties(PFM PROPERTIES
URL http://perfmon2.sourceforge.net/
- DESCRIPTION "a helper library to develop monitoring tools"
+ DESCRIPTION "A helper library to develop monitoring tools"
PURPOSE "Used to program specific performance monitoring events")
-check_library_exists(libpfm.a pfm_initialize "" HAVE_LIBPFM_INITIALIZE)
-if(HAVE_LIBPFM_INITIALIZE)
- check_include_file(perfmon/perf_event.h HAVE_PERFMON_PERF_EVENT_H)
- check_include_file(perfmon/pfmlib.h HAVE_PERFMON_PFMLIB_H)
- check_include_file(perfmon/pfmlib_perf_event.h HAVE_PERFMON_PFMLIB_PERF_EVENT_H)
- if(HAVE_PERFMON_PERF_EVENT_H AND HAVE_PERFMON_PFMLIB_H AND HAVE_PERFMON_PFMLIB_PERF_EVENT_H)
- message("Using Perf Counters.")
- set(HAVE_LIBPFM 1)
- set(PFM_FOUND 1)
- endif()
-else()
- message("Perf Counters support requested, but was unable to find libpfm.")
+find_library(PFM_LIBRARY NAMES pfm)
+find_path(PFM_INCLUDE_DIR NAMES perfmon/pfmlib.h)
+
+find_package_handle_standard_args(PFM REQUIRED_VARS PFM_LIBRARY PFM_INCLUDE_DIR)
+
+if (PFM_FOUND AND NOT TARGET PFM::libpfm)
+ add_library(PFM::libpfm UNKNOWN IMPORTED)
+ set_target_properties(PFM::libpfm PROPERTIES
+ IMPORTED_LOCATION "${PFM_LIBRARY}"
+ INTERFACE_INCLUDE_DIRECTORIES "${PFM_INCLUDE_DIR}")
endif()
+
+mark_as_advanced(PFM_LIBRARY PFM_INCLUDE_DIR)
diff --git a/third-party/benchmark/cmake/benchmark.pc.in b/third-party/benchmark/cmake/benchmark.pc.in
index 34beb01..9dae881 100644
--- a/third-party/benchmark/cmake/benchmark.pc.in
+++ b/third-party/benchmark/cmake/benchmark.pc.in
@@ -1,7 +1,7 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=${prefix}
-libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
-includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
+libdir=@CMAKE_INSTALL_FULL_LIBDIR@
+includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
Name: @PROJECT_NAME@
Description: Google microbenchmark framework
diff --git a/third-party/benchmark/cmake/pthread_affinity.cpp b/third-party/benchmark/cmake/pthread_affinity.cpp
new file mode 100644
index 0000000..7b143bc
--- /dev/null
+++ b/third-party/benchmark/cmake/pthread_affinity.cpp
@@ -0,0 +1,16 @@
+#include <pthread.h>
+int main() {
+ cpu_set_t set;
+ CPU_ZERO(&set);
+ for (int i = 0; i < CPU_SETSIZE; ++i) {
+ CPU_SET(i, &set);
+ CPU_CLR(i, &set);
+ }
+ pthread_t self = pthread_self();
+ int ret;
+ ret = pthread_getaffinity_np(self, sizeof(set), &set);
+ if (ret != 0) return ret;
+ ret = pthread_setaffinity_np(self, sizeof(set), &set);
+ if (ret != 0) return ret;
+ return 0;
+}
diff --git a/third-party/benchmark/docs/AssemblyTests.md b/third-party/benchmark/docs/AssemblyTests.md
index 1fbdc26..89df7ca 100644
--- a/third-party/benchmark/docs/AssemblyTests.md
+++ b/third-party/benchmark/docs/AssemblyTests.md
@@ -111,6 +111,7 @@
is matching stack frame addresses. In this case regular expressions
can be used to match the differing bits of output. For example:
+<!-- {% raw %} -->
```c++
int ExternInt;
struct Point { int x, y, z; };
@@ -127,6 +128,7 @@
// CHECK: ret
}
```
+<!-- {% endraw %} -->
## Current Requirements and Limitations
diff --git a/third-party/benchmark/docs/_config.yml b/third-party/benchmark/docs/_config.yml
index 2f7efbe..32f9f2e 100644
--- a/third-party/benchmark/docs/_config.yml
+++ b/third-party/benchmark/docs/_config.yml
@@ -1 +1,3 @@
-theme: jekyll-theme-minimal
\ No newline at end of file
+theme: jekyll-theme-minimal
+logo: /assets/images/icon_black.png
+show_downloads: true
diff --git a/third-party/benchmark/docs/assets/images/icon.png b/third-party/benchmark/docs/assets/images/icon.png
new file mode 100644
index 0000000..b982604
--- /dev/null
+++ b/third-party/benchmark/docs/assets/images/icon.png
Binary files differ
diff --git a/third-party/benchmark/docs/assets/images/icon.xcf b/third-party/benchmark/docs/assets/images/icon.xcf
new file mode 100644
index 0000000..f2f0be4
--- /dev/null
+++ b/third-party/benchmark/docs/assets/images/icon.xcf
Binary files differ
diff --git a/third-party/benchmark/docs/assets/images/icon_black.png b/third-party/benchmark/docs/assets/images/icon_black.png
new file mode 100644
index 0000000..656ae79
--- /dev/null
+++ b/third-party/benchmark/docs/assets/images/icon_black.png
Binary files differ
diff --git a/third-party/benchmark/docs/assets/images/icon_black.xcf b/third-party/benchmark/docs/assets/images/icon_black.xcf
new file mode 100644
index 0000000..430e7ba
--- /dev/null
+++ b/third-party/benchmark/docs/assets/images/icon_black.xcf
Binary files differ
diff --git a/third-party/benchmark/docs/dependencies.md b/third-party/benchmark/docs/dependencies.md
index 7af52b9..07760e1 100644
--- a/third-party/benchmark/docs/dependencies.md
+++ b/third-party/benchmark/docs/dependencies.md
@@ -1,19 +1,13 @@
# Build tool dependency policy
-To ensure the broadest compatibility when building the benchmark library, but
-still allow forward progress, we require any build tooling to be available for:
+We follow the [Foundational C++ support policy](https://opensource.google/documentation/policies/cplusplus-support) for our build tools. In
+particular the ["Build Systems" section](https://opensource.google/documentation/policies/cplusplus-support#build-systems).
-* Debian stable _and_
-* The last two Ubuntu LTS releases
+## CMake
-Currently, this means using build tool versions that are available for Ubuntu
-18.04 (Bionic Beaver), Ubuntu 20.04 (Focal Fossa), and Debian 11 (bullseye).
+The current supported version is CMake 3.10 as of 2023-08-10. Most modern
+distributions include newer versions, for example:
-_Note, CI also runs ubuntu-16.04 and ubuntu-14.04 to ensure best effort support
-for older versions._
-
-## cmake
-The current supported version is cmake 3.5.1 as of 2018-06-06.
-
-_Note, this version is also available for Ubuntu 14.04, an older Ubuntu LTS
-release, as `cmake3`._
+* Ubuntu 20.04 provides CMake 3.16.3
+* Debian 11.4 provides CMake 3.18.4
+* Ubuntu 22.04 provides CMake 3.22.1
diff --git a/third-party/benchmark/docs/index.md b/third-party/benchmark/docs/index.md
index eb82eff..9cada96 100644
--- a/third-party/benchmark/docs/index.md
+++ b/third-party/benchmark/docs/index.md
@@ -4,7 +4,9 @@
* [Dependencies](dependencies.md)
* [Perf Counters](perf_counters.md)
* [Platform Specific Build Instructions](platform_specific_build_instructions.md)
+* [Python Bindings](python_bindings.md)
* [Random Interleaving](random_interleaving.md)
+* [Reducing Variance](reducing_variance.md)
* [Releasing](releasing.md)
* [Tools](tools.md)
-* [User Guide](user_guide.md)
\ No newline at end of file
+* [User Guide](user_guide.md)
diff --git a/third-party/benchmark/docs/perf_counters.md b/third-party/benchmark/docs/perf_counters.md
index 74560e9..f342092 100644
--- a/third-party/benchmark/docs/perf_counters.md
+++ b/third-party/benchmark/docs/perf_counters.md
@@ -12,16 +12,17 @@
* The benchmark is run on an architecture featuring a Performance Monitoring
Unit (PMU),
* The benchmark is compiled with support for collecting counters. Currently,
- this requires [libpfm](http://perfmon2.sourceforge.net/) be available at build
- time
+ this requires [libpfm](http://perfmon2.sourceforge.net/), which is built as a
+ dependency via Bazel.
The feature does not require modifying benchmark code. Counter collection is
handled at the boundaries where timer collection is also handled.
To opt-in:
-
-* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`.
-* Enable the cmake flag BENCHMARK_ENABLE_LIBPFM.
+* If using a Bazel build, add `--define pfm=1` to your build flags
+* If using CMake:
+ * Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`.
+ * Enable the CMake flag `BENCHMARK_ENABLE_LIBPFM` in `CMakeLists.txt`.
To use, pass a comma-separated list of counter names through the
`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning,
@@ -31,4 +32,4 @@
The counter values are reported back through the [User Counters](../README.md#custom-counters)
mechanism, meaning, they are available in all the formats (e.g. JSON) supported
-by User Counters.
\ No newline at end of file
+by User Counters.
diff --git a/third-party/benchmark/docs/python_bindings.md b/third-party/benchmark/docs/python_bindings.md
new file mode 100644
index 0000000..d9c5d2d
--- /dev/null
+++ b/third-party/benchmark/docs/python_bindings.md
@@ -0,0 +1,34 @@
+# Building and installing Python bindings
+
+Python bindings are available as wheels on [PyPI](https://pypi.org/project/google-benchmark/) for importing and
+using Google Benchmark directly in Python.
+Currently, pre-built wheels exist for macOS (both ARM64 and Intel x86), Linux x86-64 and 64-bit Windows.
+Supported Python versions are Python 3.8 - 3.12.
+
+To install Google Benchmark's Python bindings, run:
+
+```bash
+python -m pip install --upgrade pip # for manylinux2014 support
+python -m pip install google-benchmark
+```
+
+In order to keep your system Python interpreter clean, it is advisable to run these commands in a virtual
+environment. See the [official Python documentation](https://docs.python.org/3/library/venv.html)
+on how to create virtual environments.
+
+To build a wheel directly from source, you can follow these steps:
+```bash
+git clone https://github.com/google/benchmark.git
+cd benchmark
+# create a virtual environment and activate it
+python3 -m venv venv --system-site-packages
+source venv/bin/activate # .\venv\Scripts\Activate.ps1 on Windows
+
+# upgrade Python's system-wide packages
+python -m pip install --upgrade pip build
+# builds the wheel and stores it in the directory "dist".
+python -m build
+```
+
+NB: Building wheels from source requires Bazel. For platform-specific instructions on how to install Bazel,
+refer to the [Bazel installation docs](https://bazel.build/install).
diff --git a/third-party/benchmark/docs/reducing_variance.md b/third-party/benchmark/docs/reducing_variance.md
new file mode 100644
index 0000000..105f96e
--- /dev/null
+++ b/third-party/benchmark/docs/reducing_variance.md
@@ -0,0 +1,98 @@
+# Reducing Variance
+
+<a name="disabling-cpu-frequency-scaling" />
+
+## Disabling CPU Frequency Scaling
+
+If you see this error:
+
+```
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+```
+
+you might want to disable the CPU frequency scaling while running the
+benchmark, as well as consider other ways to stabilize the performance of
+your system while benchmarking.
+
+Exactly how to do this depends on the Linux distribution,
+desktop environment, and installed programs. Specific details are a moving
+target, so we will not attempt to exhaustively document them here.
+
+One simple option is to use the `cpupower` program to change the
+performance governor to "performance". This tool is maintained along with
+the Linux kernel and provided by your distribution.
+
+It must be run as root, like this:
+
+```bash
+sudo cpupower frequency-set --governor performance
+```
+
+After this you can verify that all CPUs are using the performance governor
+by running this command:
+
+```bash
+cpupower frequency-info -o proc
+```
+
+The benchmarks you subsequently run will have less variance.
+
+<a name="reducing-variance" />
+
+## Reducing Variance in Benchmarks
+
+The Linux CPU frequency governor [discussed
+above](user_guide#disabling-cpu-frequency-scaling) is not the only source
+of noise in benchmarks. Some, but not all, of the sources of variance
+include:
+
+1. On multi-core machines not all CPUs/CPU cores/CPU threads run the same
+ speed, so running a benchmark one time and then again may give a
+ different result depending on which CPU it ran on.
+2. CPU scaling features that run on the CPU, like Intel's Turbo Boost and
+ AMD Turbo Core and Precision Boost, can temporarily change the CPU
+ frequency even when the using the "performance" governor on Linux.
+3. Context switching between CPUs, or scheduling competition on the CPU the
+ benchmark is running on.
+4. Intel Hyperthreading or AMD SMT causing the same issue as above.
+5. Cache effects caused by code running on other CPUs.
+6. Non-uniform memory architectures (NUMA).
+
+These can cause variance in benchmarks results within a single run
+(`--benchmark_repetitions=N`) or across multiple runs of the benchmark
+program.
+
+Reducing sources of variance is OS and architecture dependent, which is one
+reason some companies maintain machines dedicated to performance testing.
+
+Some of the easier and effective ways of reducing variance on a typical
+Linux workstation are:
+
+1. Use the performance governor as [discussed
+above](user_guide#disabling-cpu-frequency-scaling).
+1. Disable processor boosting by:
+ ```sh
+ echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost
+ ```
+ See the Linux kernel's
+ [boost.txt](https://www.kernel.org/doc/Documentation/cpu-freq/boost.txt)
+ for more information.
+2. Set the benchmark program's task affinity to a fixed cpu. For example:
+ ```sh
+ taskset -c 0 ./mybenchmark
+ ```
+3. Disabling Hyperthreading/SMT. This can be done in the Bios or using the
+ `/sys` file system (see the LLVM project's [Benchmarking
+ tips](https://llvm.org/docs/Benchmarking.html)).
+4. Close other programs that do non-trivial things based on timers, such as
+ your web browser, desktop environment, etc.
+5. Reduce the working set of your benchmark to fit within the L1 cache, but
+ do be aware that this may lead you to optimize for an unrealistic
+ situation.
+
+Further resources on this topic:
+
+1. The LLVM project's [Benchmarking
+ tips](https://llvm.org/docs/Benchmarking.html).
+1. The Arch Wiki [Cpu frequency
+scaling](https://wiki.archlinux.org/title/CPU_frequency_scaling) page.
diff --git a/third-party/benchmark/docs/releasing.md b/third-party/benchmark/docs/releasing.md
index 334f935..09bf937 100644
--- a/third-party/benchmark/docs/releasing.md
+++ b/third-party/benchmark/docs/releasing.md
@@ -1,30 +1,23 @@
# How to release
* Make sure you're on main and synced to HEAD
-* Ensure the project builds and tests run (sanity check only, obviously)
+* Ensure the project builds and tests run
* `parallel -j0 exec ::: test/*_test` can help ensure everything at least
passes
* Prepare release notes
* `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of
commits between the last annotated tag and HEAD
* Pick the most interesting.
-* Create one last commit that updates the version saved in `CMakeLists.txt` and the
- `__version__` variable in `bindings/python/google_benchmark/__init__.py`to the release
- version you're creating. (This version will be used if benchmark is installed from the
- archive you'll be creating in the next step.)
+* Create one last commit that updates the version saved in `CMakeLists.txt` and `MODULE.bazel`
+ to the release version you're creating. (This version will be used if benchmark is installed
+ from the archive you'll be creating in the next step.)
```
-project (benchmark VERSION 1.6.0 LANGUAGES CXX)
+project (benchmark VERSION 1.8.0 LANGUAGES CXX)
```
-```python
-# bindings/python/google_benchmark/__init__.py
-
-# ...
-
-__version__ = "1.6.0" # <-- change this to the release version you are creating
-
-# ...
+```
+module(name = "com_github_google_benchmark", version="1.8.0")
```
* Create a release through github's interface
@@ -33,3 +26,6 @@
* `git pull --tags`
* `git tag -a -f <tag> <tag>`
* `git push --force --tags origin`
+* Confirm that the "Build and upload Python wheels" action runs to completion
+ * Run it manually if it hasn't run.
+ * IMPORTANT: When re-running manually, make sure to select the newly created `<tag>` as the workflow version in the "Run workflow" tab on the GitHub Actions page.
diff --git a/third-party/benchmark/docs/tools.md b/third-party/benchmark/docs/tools.md
index f2d0c49..411f41d 100644
--- a/third-party/benchmark/docs/tools.md
+++ b/third-party/benchmark/docs/tools.md
@@ -186,6 +186,146 @@
This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one.
As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
+### Note: Interpreting the output
+
+Performance measurements are an art, and performance comparisons are doubly so.
+Results are often noisy and don't necessarily have large absolute differences to
+them, so just by visual inspection, it is not at all apparent if two
+measurements are actually showing a performance change or not. It is even more
+confusing with multiple benchmark repetitions.
+
+Thankfully, what we can do, is use statistical tests on the results to determine
+whether the performance has statistically-significantly changed. `compare.py`
+uses [Mann–Whitney U
+test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), with a null
+hypothesis being that there's no difference in performance.
+
+**The below output is a summary of a benchmark comparison with statistics
+provided for a multi-threaded process.**
+```
+Benchmark Time CPU Time Old Time New CPU Old CPU New
+-----------------------------------------------------------------------------------------------------------------------------
+benchmark/threads:1/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 27 vs 27
+benchmark/threads:1/process_time/real_time_mean -0.1442 -0.1442 90 77 90 77
+benchmark/threads:1/process_time/real_time_median -0.1444 -0.1444 90 77 90 77
+benchmark/threads:1/process_time/real_time_stddev +0.3974 +0.3933 0 0 0 0
+benchmark/threads:1/process_time/real_time_cv +0.6329 +0.6280 0 0 0 0
+OVERALL_GEOMEAN -0.1442 -0.1442 0 0 0 0
+```
+--------------------------------------------
+Here's a breakdown of each row:
+
+**benchmark/threads:1/process_time/real_time_pvalue**: This shows the _p-value_ for
+the statistical test comparing the performance of the process running with one
+thread. A value of 0.0000 suggests a statistically significant difference in
+performance. The comparison was conducted using the U Test (Mann-Whitney
+U Test) with 27 repetitions for each case.
+
+**benchmark/threads:1/process_time/real_time_mean**: This shows the relative
+difference in mean execution time between two different cases. The negative
+value (-0.1442) implies that the new process is faster by about 14.42%. The old
+time was 90 units, while the new time is 77 units.
+
+**benchmark/threads:1/process_time/real_time_median**: Similarly, this shows the
+relative difference in the median execution time. Again, the new process is
+faster by 14.44%.
+
+**benchmark/threads:1/process_time/real_time_stddev**: This is the relative
+difference in the standard deviation of the execution time, which is a measure
+of how much variation or dispersion there is from the mean. A positive value
+(+0.3974) implies there is more variance in the execution time in the new
+process.
+
+**benchmark/threads:1/process_time/real_time_cv**: CV stands for Coefficient of
+Variation. It is the ratio of the standard deviation to the mean. It provides a
+standardized measure of dispersion. An increase (+0.6329) indicates more
+relative variability in the new process.
+
+**OVERALL_GEOMEAN**: Geomean stands for geometric mean, a type of average that is
+less influenced by outliers. The negative value indicates a general improvement
+in the new process. However, given the values are all zero for the old and new
+times, this seems to be a mistake or placeholder in the output.
+
+-----------------------------------------
+
+
+
+Let's first try to see what the different columns represent in the above
+`compare.py` benchmarking output:
+
+ 1. **Benchmark:** The name of the function being benchmarked, along with the
+ size of the input (after the slash).
+
+ 2. **Time:** The average time per operation, across all iterations.
+
+ 3. **CPU:** The average CPU time per operation, across all iterations.
+
+ 4. **Iterations:** The number of iterations the benchmark was run to get a
+ stable estimate.
+
+ 5. **Time Old and Time New:** These represent the average time it takes for a
+ function to run in two different scenarios or versions. For example, you
+ might be comparing how fast a function runs before and after you make some
+ changes to it.
+
+ 6. **CPU Old and CPU New:** These show the average amount of CPU time that the
+ function uses in two different scenarios or versions. This is similar to
+ Time Old and Time New, but focuses on CPU usage instead of overall time.
+
+In the comparison section, the relative differences in both time and CPU time
+are displayed for each input size.
+
+
+A statistically-significant difference is determined by a **p-value**, which is
+a measure of the probability that the observed difference could have occurred
+just by random chance. A smaller p-value indicates stronger evidence against the
+null hypothesis.
+
+**Therefore:**
+ 1. If the p-value is less than the chosen significance level (alpha), we
+ reject the null hypothesis and conclude the benchmarks are significantly
+ different.
+ 2. If the p-value is greater than or equal to alpha, we fail to reject the
+ null hypothesis and treat the two benchmarks as similar.
+
+
+
+The result of said the statistical test is additionally communicated through color coding:
+```diff
++ Green:
+```
+ The benchmarks are _**statistically different**_. This could mean the
+ performance has either **significantly improved** or **significantly
+ deteriorated**. You should look at the actual performance numbers to see which
+ is the case.
+```diff
+- Red:
+```
+ The benchmarks are _**statistically similar**_. This means the performance
+ **hasn't significantly changed**.
+
+In statistical terms, **'green'** means we reject the null hypothesis that
+there's no difference in performance, and **'red'** means we fail to reject the
+null hypothesis. This might seem counter-intuitive if you're expecting 'green'
+to mean 'improved performance' and 'red' to mean 'worsened performance'.
+```bash
+ But remember, in this context:
+
+ 'Success' means 'successfully finding a difference'.
+ 'Failure' means 'failing to find a difference'.
+```
+
+
+Also, please note that **even if** we determine that there **is** a
+statistically-significant difference between the two measurements, it does not
+_necessarily_ mean that the actual benchmarks that were measured **are**
+different, or vice versa, even if we determine that there is **no**
+statistically-significant difference between the two measurements, it does not
+necessarily mean that the actual benchmarks that were measured **are not**
+different.
+
+
+
### U test
If there is a sufficient repetition count of the benchmarks, the tool can do
diff --git a/third-party/benchmark/docs/user_guide.md b/third-party/benchmark/docs/user_guide.md
index 34bea69..d22a906 100644
--- a/third-party/benchmark/docs/user_guide.md
+++ b/third-party/benchmark/docs/user_guide.md
@@ -28,6 +28,8 @@
[Templated Benchmarks](#templated-benchmarks)
+[Templated Benchmarks that take arguments](#templated-benchmarks-with-arguments)
+
[Fixtures](#fixtures)
[Custom Counters](#custom-counters)
@@ -50,14 +52,19 @@
[Custom Statistics](#custom-statistics)
+[Memory Usage](#memory-usage)
+
[Using RegisterBenchmark](#using-register-benchmark)
[Exiting with an Error](#exiting-with-an-error)
-[A Faster KeepRunning Loop](#a-faster-keep-running-loop)
+[A Faster `KeepRunning` Loop](#a-faster-keep-running-loop)
+
+## Benchmarking Tips
[Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling)
+[Reducing Variance in Benchmarks](reducing_variance.md)
<a name="output-formats" />
@@ -180,6 +187,12 @@
BM_memcpy/32k 1834 ns 1837 ns 357143
```
+## Disabling Benchmarks
+
+It is possible to temporarily disable benchmarks by renaming the benchmark
+function to have the prefix "DISABLED_". This will cause the benchmark to
+be skipped at runtime.
+
<a name="result-comparison" />
## Result comparison
@@ -232,6 +245,19 @@
the minimum time, or the wallclock time is 5x minimum time. The minimum time is
set per benchmark by calling `MinTime` on the registered benchmark object.
+Furthermore warming up a benchmark might be necessary in order to get
+stable results because of e.g caching effects of the code under benchmark.
+Warming up means running the benchmark a given amount of time, before
+results are actually taken into account. The amount of time for which
+the warmup should be run can be set per benchmark by calling
+`MinWarmUpTime` on the registered benchmark object or for all benchmarks
+using the `--benchmark_min_warmup_time` command-line option. Note that
+`MinWarmUpTime` will overwrite the value of `--benchmark_min_warmup_time`
+for the single benchmark. How many iterations the warmup run of each
+benchmark takes is determined the same way as described in the paragraph
+above. Per default the warmup phase is set to 0 seconds and is therefore
+disabled.
+
Average timings are then reported over the iterations run. If multiple
repetitions are requested using the `--benchmark_repetitions` command-line
option, or at registration time, the benchmark function will be run several
@@ -247,10 +273,12 @@
Global setup/teardown specific to each benchmark can be done by
passing a callback to Setup/Teardown:
-The setup/teardown callbacks will be invoked once for each benchmark.
-If the benchmark is multi-threaded (will run in k threads), they will be invoked exactly once before
-each run with k threads.
-If the benchmark uses different size groups of threads, the above will be true for each size group.
+The setup/teardown callbacks will be invoked once for each benchmark. If the
+benchmark is multi-threaded (will run in k threads), they will be invoked
+exactly once before each run with k threads.
+
+If the benchmark uses different size groups of threads, the above will be true
+for each size group.
Eg.,
@@ -293,7 +321,7 @@
delete[] src;
delete[] dst;
}
-BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
+BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(4<<10)->Arg(8<<10);
```
The preceding code is quite repetitive, and can be replaced with the following
@@ -322,7 +350,8 @@
static void BM_DenseRange(benchmark::State& state) {
for(auto _ : state) {
std::vector<int> v(state.range(0), state.range(0));
- benchmark::DoNotOptimize(v.data());
+ auto data = v.data();
+ benchmark::DoNotOptimize(data);
benchmark::ClobberMemory();
}
}
@@ -362,17 +391,17 @@
product of the two specified ranges and will generate a benchmark for each such
pair.
-{% raw %}
+<!-- {% raw %} -->
```c++
BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
```
-{% endraw %}
+<!-- {% endraw %} -->
Some benchmarks may require specific argument values that cannot be expressed
with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a
benchmark input for each combination in the product of the supplied vectors.
-{% raw %}
+<!-- {% raw %} -->
```c++
BENCHMARK(BM_SetInsert)
->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}})
@@ -391,7 +420,7 @@
->Args({3<<10, 80})
->Args({8<<10, 80});
```
-{% endraw %}
+<!-- {% endraw %} -->
For the most common scenarios, helper methods for creating a list of
integers for a given sparse or dense range are provided.
@@ -434,13 +463,22 @@
should describe the values passed.
```c++
-template <class ...ExtraArgs>
-void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
- [...]
+template <class ...Args>
+void BM_takes_args(benchmark::State& state, Args&&... args) {
+ auto args_tuple = std::make_tuple(std::move(args)...);
+ for (auto _ : state) {
+ std::cout << std::get<0>(args_tuple) << ": " << std::get<1>(args_tuple)
+ << '\n';
+ [...]
+ }
}
// Registers a benchmark named "BM_takes_args/int_string_test" that passes
-// the specified values to `extra_args`.
+// the specified values to `args`.
BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
+
+// Registers the same benchmark "BM_takes_args/int_test" that passes
+// the specified values to `args`.
+BENCHMARK_CAPTURE(BM_takes_args, int_test, 42, 43);
```
Note that elements of `...args` may refer to global variables. Users should
@@ -459,7 +497,8 @@
std::string s1(state.range(0), '-');
std::string s2(state.range(0), '-');
for (auto _ : state) {
- benchmark::DoNotOptimize(s1.compare(s2));
+ auto comparison_result = s1.compare(s2);
+ benchmark::DoNotOptimize(comparison_result);
}
state.SetComplexityN(state.range(0));
}
@@ -537,6 +576,30 @@
#define BENCHMARK_TEMPLATE2(func, arg1, arg2)
```
+<a name="templated-benchmarks-with-arguments" />
+
+## Templated Benchmarks that take arguments
+
+Sometimes there is a need to template benchmarks, and provide arguments to them.
+
+```c++
+template <class Q> void BM_Sequential_With_Step(benchmark::State& state, int step) {
+ Q q;
+ typename Q::value_type v;
+ for (auto _ : state) {
+ for (int i = state.range(0); i-=step; )
+ q.push(v);
+ for (int e = state.range(0); e-=step; )
+ q.Wait(&v);
+ }
+ // actually messages, not bytes:
+ state.SetBytesProcessed(
+ static_cast<int64_t>(state.iterations())*state.range(0));
+}
+
+BENCHMARK_TEMPLATE1_CAPTURE(BM_Sequential, WaitQueue<int>, Step1, 1)->Range(1<<0, 1<<10);
+```
+
<a name="fixtures" />
## Fixtures
@@ -554,10 +617,10 @@
```c++
class MyFixture : public benchmark::Fixture {
public:
- void SetUp(const ::benchmark::State& state) {
+ void SetUp(::benchmark::State& state) {
}
- void TearDown(const ::benchmark::State& state) {
+ void TearDown(::benchmark::State& state) {
}
};
@@ -668,7 +731,7 @@
When you're compiling in C++11 mode or later you can use `insert()` with
`std::initializer_list`:
-{% raw %}
+<!-- {% raw %} -->
```c++
// With C++11, this can be done:
state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}});
@@ -677,7 +740,7 @@
state.counters["Bar"] = numBars;
state.counters["Baz"] = numBazs;
```
-{% endraw %}
+<!-- {% endraw %} -->
### Counter Reporting
@@ -773,6 +836,16 @@
BENCHMARK(BM_MultiThreaded)->Threads(2);
```
+To run the benchmark across a range of thread counts, instead of `Threads`, use
+`ThreadRange`. This takes two parameters (`min_threads` and `max_threads`) and
+runs the benchmark once for values in the inclusive range. For example:
+
+```c++
+BENCHMARK(BM_MultiThreaded)->ThreadRange(1, 8);
+```
+
+will run `BM_MultiThreaded` with thread counts 1, 2, 4, and 8.
+
If the benchmarked code itself uses threads and you want to compare it to
single-threaded code, you may want to use real-time ("wallclock") measurements
for latency comparisons:
@@ -814,7 +887,7 @@
// Measure the user-visible time, the wall clock (literally, the time that
// has passed on the clock on the wall), use it to decide for how long to
-// run the benchmark loop. This will always be meaningful, an will match the
+// run the benchmark loop. This will always be meaningful, and will match the
// time spent by the main thread in single-threaded case, in general decreasing
// with the number of internal threads doing the work.
BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime();
@@ -836,7 +909,7 @@
that loop, every iteration, but without counting that time to the benchmark time.
That is possible, although it is not recommended, since it has high overhead.
-{% raw %}
+<!-- {% raw %} -->
```c++
static void BM_SetInsert_With_Timer_Control(benchmark::State& state) {
std::set<int> data;
@@ -851,7 +924,7 @@
}
BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}});
```
-{% endraw %}
+<!-- {% endraw %} -->
<a name="manual-timing" />
@@ -906,6 +979,10 @@
BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
```
+Additionally the default time unit can be set globally with the
+`--benchmark_time_unit={ns|us|ms|s}` command line argument. The argument only
+affects benchmarks where the time unit is not set explicitly.
+
<a name="preventing-optimization" />
## Preventing Optimization
@@ -958,7 +1035,8 @@
for (auto _ : state) {
std::vector<int> v;
v.reserve(1);
- benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered.
+ auto data = v.data(); // Allow v.data() to be clobbered. Pass as non-const
+ benchmark::DoNotOptimize(data); // lvalue to avoid undesired compiler optimizations
v.push_back(42);
benchmark::ClobberMemory(); // Force 42 to be written to memory.
}
@@ -1037,10 +1115,25 @@
BENCHMARK(BM_spin_empty)
->ComputeStatistics("ratio", [](const std::vector<double>& v) -> double {
return std::begin(v) / std::end(v);
- }, benchmark::StatisticUnit::Percentage)
+ }, benchmark::StatisticUnit::kPercentage)
->Arg(512);
```
+<a name="memory-usage" />
+
+## Memory Usage
+
+It's often useful to also track memory usage for benchmarks, alongside CPU
+performance. For this reason, benchmark offers the `RegisterMemoryManager`
+method that allows a custom `MemoryManager` to be injected.
+
+If set, the `MemoryManager::Start` and `MemoryManager::Stop` methods will be
+called at the start and end of benchmark runs to allow user code to fill out
+a report on the number of allocations, bytes used, etc.
+
+This data will then be reported alongside other performance data, currently
+only when using JSON output.
+
<a name="using-register-benchmark" />
## Using RegisterBenchmark(name, fn, args...)
@@ -1077,7 +1170,7 @@
When errors caused by external influences, such as file I/O and network
communication, occur within a benchmark the
-`State::SkipWithError(const char* msg)` function can be used to skip that run
+`State::SkipWithError(const std::string& msg)` function can be used to skip that run
of benchmark and report the error. Note that only future iterations of the
`KeepRunning()` are skipped. For the ranged-for version of the benchmark loop
Users must explicitly exit the loop, otherwise all iterations will be performed.
@@ -1188,13 +1281,12 @@
If you see this error:
```
-***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may
+be noisy and will incur extra overhead.
```
-you might want to disable the CPU frequency scaling while running the benchmark:
+you might want to disable the CPU frequency scaling while running the
+benchmark, as well as consider other ways to stabilize the performance of
+your system while benchmarking.
-```bash
-sudo cpupower frequency-set --governor performance
-./mybench
-sudo cpupower frequency-set --governor powersave
-```
+See [Reducing Variance](reducing_variance.md) for more information.
diff --git a/third-party/benchmark/include/benchmark/benchmark.h b/third-party/benchmark/include/benchmark/benchmark.h
index 6287c0a..08cfe29 100644
--- a/third-party/benchmark/include/benchmark/benchmark.h
+++ b/third-party/benchmark/include/benchmark/benchmark.h
@@ -187,6 +187,8 @@
#include <utility>
#include <vector>
+#include "benchmark/export.h"
+
#if defined(BENCHMARK_HAS_CXX11)
#include <atomic>
#include <initializer_list>
@@ -216,37 +218,45 @@
#define BENCHMARK_UNUSED
#endif
+// Used to annotate functions, methods and classes so they
+// are not optimized by the compiler. Useful for tests
+// where you expect loops to stay in place churning cycles
+#if defined(__clang__)
+#define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone))
+#elif defined(__GNUC__) || defined(__GNUG__)
+#define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0)))
+#else
+// MSVC & Intel do not have a no-optimize attribute, only line pragmas
+#define BENCHMARK_DONT_OPTIMIZE
+#endif
+
#if defined(__GNUC__) || defined(__clang__)
#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
-#define BENCHMARK_NOEXCEPT noexcept
-#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
#elif defined(_MSC_VER) && !defined(__clang__)
#define BENCHMARK_ALWAYS_INLINE __forceinline
-#if _MSC_VER >= 1900
-#define BENCHMARK_NOEXCEPT noexcept
-#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
-#else
-#define BENCHMARK_NOEXCEPT
-#define BENCHMARK_NOEXCEPT_OP(x)
-#endif
#define __func__ __FUNCTION__
#else
#define BENCHMARK_ALWAYS_INLINE
-#define BENCHMARK_NOEXCEPT
-#define BENCHMARK_NOEXCEPT_OP(x)
#endif
#define BENCHMARK_INTERNAL_TOSTRING2(x) #x
#define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
// clang-format off
-#if defined(__GNUC__) || defined(__clang__)
+#if (defined(__GNUC__) && !defined(__NVCC__) && !defined(__NVCOMPILER)) || defined(__clang__)
#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop")
+#elif defined(__NVCOMPILER)
+#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
+#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
+#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
+ _Pragma("diagnostic push") \
+ _Pragma("diag_suppress deprecated_entity_with_custom_message")
+#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("diagnostic pop")
#else
#define BENCHMARK_BUILTIN_EXPECT(x, y) x
#define BENCHMARK_DEPRECATED_MSG(msg)
@@ -280,18 +290,47 @@
#define BENCHMARK_OVERRIDE
#endif
+#if defined(_MSC_VER)
+#pragma warning(push)
+// C4251: <symbol> needs to have dll-interface to be used by clients of class
+#pragma warning(disable : 4251)
+#endif
+
namespace benchmark {
class BenchmarkReporter;
-void Initialize(int* argc, char** argv);
-void Shutdown();
+// Default number of minimum benchmark running time in seconds.
+const char kDefaultMinTimeStr[] = "0.5s";
+
+// Returns the version of the library.
+BENCHMARK_EXPORT std::string GetBenchmarkVersion();
+
+BENCHMARK_EXPORT void PrintDefaultHelp();
+
+BENCHMARK_EXPORT void Initialize(int* argc, char** argv,
+ void (*HelperPrinterf)() = PrintDefaultHelp);
+BENCHMARK_EXPORT void Shutdown();
// Report to stdout all arguments in 'argv' as unrecognized except the first.
// Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
-bool ReportUnrecognizedArguments(int argc, char** argv);
+BENCHMARK_EXPORT bool ReportUnrecognizedArguments(int argc, char** argv);
// Returns the current value of --benchmark_filter.
-std::string GetBenchmarkFilter();
+BENCHMARK_EXPORT std::string GetBenchmarkFilter();
+
+// Sets a new value to --benchmark_filter. (This will override this flag's
+// current value).
+// Should be called after `benchmark::Initialize()`, as
+// `benchmark::Initialize()` will override the flag's value.
+BENCHMARK_EXPORT void SetBenchmarkFilter(std::string value);
+
+// Returns the current value of --v (command line value for verbosity).
+BENCHMARK_EXPORT int32_t GetBenchmarkVerbosity();
+
+// Creates a default display reporter. Used by the library when no display
+// reporter is provided, but also made available for external use in case a
+// custom reporter should respect the `--benchmark_format` flag as a fallback
+BENCHMARK_EXPORT BenchmarkReporter* CreateDefaultDisplayReporter();
// Generate a list of benchmarks matching the specified --benchmark_filter flag
// and if --benchmark_list_tests is specified return after printing the name
@@ -305,22 +344,33 @@
// The second and third overload use the specified 'display_reporter' and
// 'file_reporter' respectively. 'file_reporter' will write to the file
// specified
-// by '--benchmark_output'. If '--benchmark_output' is not given the
+// by '--benchmark_out'. If '--benchmark_out' is not given the
// 'file_reporter' is ignored.
//
// RETURNS: The number of matching benchmarks.
-size_t RunSpecifiedBenchmarks();
-size_t RunSpecifiedBenchmarks(std::string spec);
+BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks();
+BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(std::string spec);
-size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
-size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
- std::string spec);
+BENCHMARK_EXPORT size_t
+RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
+BENCHMARK_EXPORT size_t
+RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, std::string spec);
-size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
- BenchmarkReporter* file_reporter);
-size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
- BenchmarkReporter* file_reporter,
- std::string spec);
+BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(
+ BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter);
+BENCHMARK_EXPORT size_t
+RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
+ BenchmarkReporter* file_reporter, std::string spec);
+
+// TimeUnit is passed to a benchmark in order to specify the order of magnitude
+// for the measured time.
+enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
+
+BENCHMARK_EXPORT TimeUnit GetDefaultTimeUnit();
+
+// Sets the default time unit the benchmarks use
+// Has to be called before the benchmark loop to take effect
+BENCHMARK_EXPORT void SetDefaultTimeUnit(TimeUnit unit);
// If a MemoryManager is registered (via RegisterMemoryManager()),
// it can be used to collect and report allocation metrics for a run of the
@@ -358,20 +408,16 @@
virtual void Start() = 0;
// Implement this to stop recording and fill out the given Result structure.
- BENCHMARK_DEPRECATED_MSG("Use Stop(Result&) instead")
- virtual void Stop(Result* result) = 0;
-
- // FIXME(vyng): Make this pure virtual once we've migrated current users.
- BENCHMARK_DISABLE_DEPRECATED_WARNING
- virtual void Stop(Result& result) { Stop(&result); }
- BENCHMARK_RESTORE_DEPRECATED_WARNING
+ virtual void Stop(Result& result) = 0;
};
// Register a MemoryManager instance that will be used to collect and report
// allocation measurements for benchmark runs.
+BENCHMARK_EXPORT
void RegisterMemoryManager(MemoryManager* memory_manager);
// Add a key-value pair to output as part of the context stanza in the report.
+BENCHMARK_EXPORT
void AddCustomContext(const std::string& key, const std::string& value);
namespace internal {
@@ -379,14 +425,17 @@
class BenchmarkImp;
class BenchmarkFamilies;
+BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext();
+
+BENCHMARK_EXPORT
void UseCharPointer(char const volatile*);
// Take ownership of the pointer and register the benchmark. Return the
// registered benchmark.
-Benchmark* RegisterBenchmarkInternal(Benchmark*);
+BENCHMARK_EXPORT Benchmark* RegisterBenchmarkInternal(Benchmark*);
// Ensure that the standard streams are properly initialized in every TU.
-int InitializeStreams();
+BENCHMARK_EXPORT int InitializeStreams();
BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
} // namespace internal
@@ -409,7 +458,11 @@
// intended to add little to no overhead.
// See: https://youtu.be/nXaxk27zwlk?t=2441
#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
+#if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
asm volatile("" : : "r,m"(value) : "memory");
}
@@ -423,6 +476,98 @@
#endif
}
+#ifdef BENCHMARK_HAS_CXX11
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
+#if defined(__clang__)
+ asm volatile("" : "+r,m"(value) : : "memory");
+#else
+ asm volatile("" : "+m,r"(value) : : "memory");
+#endif
+}
+#endif
+#elif defined(BENCHMARK_HAS_CXX11) && (__GNUC__ >= 5)
+// Workaround for a bug with full argument copy overhead with GCC.
+// See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519
+template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
+inline BENCHMARK_ALWAYS_INLINE
+ typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
+ (sizeof(Tp) <= sizeof(Tp*))>::type
+ DoNotOptimize(Tp const& value) {
+ asm volatile("" : : "r,m"(value) : "memory");
+}
+
+template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
+inline BENCHMARK_ALWAYS_INLINE
+ typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
+ (sizeof(Tp) > sizeof(Tp*))>::type
+ DoNotOptimize(Tp const& value) {
+ asm volatile("" : : "m"(value) : "memory");
+}
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE
+ typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
+ (sizeof(Tp) <= sizeof(Tp*))>::type
+ DoNotOptimize(Tp& value) {
+ asm volatile("" : "+m,r"(value) : : "memory");
+}
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE
+ typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
+ (sizeof(Tp) > sizeof(Tp*))>::type
+ DoNotOptimize(Tp& value) {
+ asm volatile("" : "+m"(value) : : "memory");
+}
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE
+ typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
+ (sizeof(Tp) <= sizeof(Tp*))>::type
+ DoNotOptimize(Tp&& value) {
+ asm volatile("" : "+m,r"(value) : : "memory");
+}
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE
+ typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
+ (sizeof(Tp) > sizeof(Tp*))>::type
+ DoNotOptimize(Tp&& value) {
+ asm volatile("" : "+m"(value) : : "memory");
+}
+
+#else
+// Fallback for GCC < 5. Can add some overhead because the compiler is forced
+// to use memory operations instead of operations with registers.
+// TODO: Remove if GCC < 5 will be unsupported.
+template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
+ asm volatile("" : : "m"(value) : "memory");
+}
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
+ asm volatile("" : "+m"(value) : : "memory");
+}
+
+#ifdef BENCHMARK_HAS_CXX11
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
+ asm volatile("" : "+m"(value) : : "memory");
+}
+#endif
+#endif
+
#ifndef BENCHMARK_HAS_CXX11
inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
asm volatile("" : : : "memory");
@@ -430,6 +575,9 @@
#endif
#elif defined(_MSC_VER)
template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
_ReadWriteBarrier();
@@ -439,10 +587,25 @@
inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
#endif
#else
+#ifdef BENCHMARK_HAS_CXX11
template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
+ internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
+}
+#else
+template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
}
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
+ internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
+}
+#endif
// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
#endif
@@ -506,23 +669,21 @@
// This is the container for the user-defined counters.
typedef std::map<std::string, Counter> UserCounters;
-// TimeUnit is passed to a benchmark in order to specify the order of magnitude
-// for the measured time.
-enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
-
// BigO is passed to a benchmark in order to specify the asymptotic
// computational
// complexity for the benchmark. In case oAuto is selected, complexity will be
// calculated automatically to the best fit.
enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
-typedef uint64_t IterationCount;
+typedef int64_t ComplexityN;
+
+typedef int64_t IterationCount;
enum StatisticUnit { kTime, kPercentage };
// BigOFunc is passed to a benchmark in order to specify the asymptotic
// computational complexity for the benchmark.
-typedef double(BigOFunc)(IterationCount);
+typedef double(BigOFunc)(ComplexityN);
// StatisticsFunc is passed to a benchmark in order to compute some descriptive
// statistics over all the measurements of some type
@@ -564,11 +725,21 @@
ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
};
+enum Skipped
+#if defined(BENCHMARK_HAS_CXX11)
+ : unsigned
+#endif
+{
+ NotSkipped = 0,
+ SkippedWithMessage,
+ SkippedWithError
+};
+
} // namespace internal
// State is passed to a running Benchmark and contains state for the
// benchmark to use.
-class State {
+class BENCHMARK_EXPORT State {
public:
struct StateIterator;
friend struct StateIterator;
@@ -580,13 +751,13 @@
// have been called previously.
//
// NOTE: KeepRunning may not be used after calling either of these functions.
- BENCHMARK_ALWAYS_INLINE StateIterator begin();
- BENCHMARK_ALWAYS_INLINE StateIterator end();
+ inline BENCHMARK_ALWAYS_INLINE StateIterator begin();
+ inline BENCHMARK_ALWAYS_INLINE StateIterator end();
// Returns true if the benchmark should continue through another iteration.
// NOTE: A benchmark may not return from the test until KeepRunning() has
// returned false.
- bool KeepRunning();
+ inline bool KeepRunning();
// Returns true iff the benchmark should run n more iterations.
// REQUIRES: 'n' > 0.
@@ -598,10 +769,10 @@
// while (state.KeepRunningBatch(1000)) {
// // process 1000 elements
// }
- bool KeepRunningBatch(IterationCount n);
+ inline bool KeepRunningBatch(IterationCount n);
- // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
- // by the current thread.
+ // REQUIRES: timer is running and 'SkipWithMessage(...)' or
+ // 'SkipWithError(...)' has not been called by the current thread.
// Stop the benchmark timer. If not called, the timer will be
// automatically stopped after the last iteration of the benchmark loop.
//
@@ -616,8 +787,8 @@
// within each benchmark iteration, if possible.
void PauseTiming();
- // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
- // by the current thread.
+ // REQUIRES: timer is not running and 'SkipWithMessage(...)' or
+ // 'SkipWithError(...)' has not been called by the current thread.
// Start the benchmark timer. The timer is NOT running on entrance to the
// benchmark function. It begins running after control flow enters the
// benchmark loop.
@@ -627,8 +798,30 @@
// within each benchmark iteration, if possible.
void ResumeTiming();
- // REQUIRES: 'SkipWithError(...)' has not been called previously by the
- // current thread.
+ // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
+ // called previously by the current thread.
+ // Report the benchmark as resulting in being skipped with the specified
+ // 'msg'.
+ // After this call the user may explicitly 'return' from the benchmark.
+ //
+ // If the ranged-for style of benchmark loop is used, the user must explicitly
+ // break from the loop, otherwise all future iterations will be run.
+ // If the 'KeepRunning()' loop is used the current thread will automatically
+ // exit the loop at the end of the current iteration.
+ //
+ // For threaded benchmarks only the current thread stops executing and future
+ // calls to `KeepRunning()` will block until all threads have completed
+ // the `KeepRunning()` loop. If multiple threads report being skipped only the
+ // first skip message is used.
+ //
+ // NOTE: Calling 'SkipWithMessage(...)' does not cause the benchmark to exit
+ // the current scope immediately. If the function is called from within
+ // the 'KeepRunning()' loop the current iteration will finish. It is the users
+ // responsibility to exit the scope as needed.
+ void SkipWithMessage(const std::string& msg);
+
+ // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
+ // called previously by the current thread.
// Report the benchmark as resulting in an error with the specified 'msg'.
// After this call the user may explicitly 'return' from the benchmark.
//
@@ -646,10 +839,13 @@
// the current scope immediately. If the function is called from within
// the 'KeepRunning()' loop the current iteration will finish. It is the users
// responsibility to exit the scope as needed.
- void SkipWithError(const char* msg);
+ void SkipWithError(const std::string& msg);
+
+ // Returns true if 'SkipWithMessage(...)' or 'SkipWithError(...)' was called.
+ bool skipped() const { return internal::NotSkipped != skipped_; }
// Returns true if an error has been reported with 'SkipWithError(...)'.
- bool error_occurred() const { return error_occurred_; }
+ bool error_occurred() const { return internal::SkippedWithError == skipped_; }
// REQUIRES: called exactly once per iteration of the benchmarking loop.
// Set the manually measured time for this benchmark iteration, which
@@ -684,10 +880,12 @@
// and complexity_n will
// represent the length of N.
BENCHMARK_ALWAYS_INLINE
- void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }
+ void SetComplexityN(ComplexityN complexity_n) {
+ complexity_n_ = complexity_n;
+ }
BENCHMARK_ALWAYS_INLINE
- int64_t complexity_length_n() const { return complexity_n_; }
+ ComplexityN complexity_length_n() const { return complexity_n_; }
// If this routine is called with items > 0, then an items/s
// label is printed on the benchmark report line for the currently
@@ -720,11 +918,7 @@
// BM_Compress 50 50 14115038 compress:27.3%
//
// REQUIRES: a benchmark has exited its benchmarking loop.
- void SetLabel(const char* label);
-
- void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
- this->SetLabel(str.c_str());
- }
+ void SetLabel(const std::string& label);
// Range arguments for this run. CHECKs if the argument has been set.
BENCHMARK_ALWAYS_INLINE
@@ -755,6 +949,9 @@
return max_iterations - total_iterations_ + batch_leftover_;
}
+ BENCHMARK_ALWAYS_INLINE
+ std::string name() const { return name_; }
+
private:
// items we expect on the first cache line (ie 64 bytes of the struct)
// When total_iterations_ is 0, KeepRunning() and friends will return false.
@@ -772,29 +969,30 @@
private:
bool started_;
bool finished_;
- bool error_occurred_;
+ internal::Skipped skipped_;
// items we don't need on the first cache line
std::vector<int64_t> range_;
- int64_t complexity_n_;
+ ComplexityN complexity_n_;
public:
// Container for user-defined counters.
UserCounters counters;
private:
- State(IterationCount max_iters, const std::vector<int64_t>& ranges,
- int thread_i, int n_threads, internal::ThreadTimer* timer,
- internal::ThreadManager* manager,
+ State(std::string name, IterationCount max_iters,
+ const std::vector<int64_t>& ranges, int thread_i, int n_threads,
+ internal::ThreadTimer* timer, internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement);
void StartKeepRunning();
// Implementation of KeepRunning() and KeepRunningBatch().
// is_batch must be true unless n is 1.
- bool KeepRunningInternal(IterationCount n, bool is_batch);
+ inline bool KeepRunningInternal(IterationCount n, bool is_batch);
void FinishKeepRunning();
+ const std::string name_;
const int thread_index_;
const int threads_;
@@ -826,7 +1024,7 @@
}
if (!started_) {
StartKeepRunning();
- if (!error_occurred_ && total_iterations_ >= n) {
+ if (!skipped() && total_iterations_ >= n) {
total_iterations_ -= n;
return true;
}
@@ -856,7 +1054,7 @@
BENCHMARK_ALWAYS_INLINE
explicit StateIterator(State* st)
- : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
+ : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {}
public:
BENCHMARK_ALWAYS_INLINE
@@ -899,7 +1097,7 @@
// be called on this object to change the properties of the benchmark.
// Each method returns "this" so that multiple method calls can
// chained into one expression.
-class Benchmark {
+class BENCHMARK_EXPORT Benchmark {
public:
virtual ~Benchmark();
@@ -971,7 +1169,7 @@
// Have "setup" and/or "teardown" invoked once for every benchmark run.
// If the benchmark is multi-threaded (will run in k threads concurrently),
- // the setup callback will be invoked exactly once (not k times) before
+ // the setup callback will be be invoked exactly once (not k times) before
// each run with k threads. Time allowing (e.g. for a short benchmark), there
// may be multiple such runs per benchmark, each run with its own
// "setup"/"teardown".
@@ -1000,12 +1198,19 @@
// REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
Benchmark* MinTime(double t);
+ // Set the minimum amount of time to run the benchmark before taking runtimes
+ // of this benchmark into account. This
+ // option overrides the `benchmark_min_warmup_time` flag.
+ // REQUIRES: `t >= 0` and `Iterations` has not been called on this benchmark.
+ Benchmark* MinWarmUpTime(double t);
+
// Specify the amount of iterations that should be run by this benchmark.
+ // This option overrides the `benchmark_min_time` flag.
// REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
//
// NOTE: This function should only be used when *exact* iteration control is
// needed and never to control or limit how long a benchmark runs, where
- // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
+ // `--benchmark_min_time=<N>s` or `MinTime(...)` should be used instead.
Benchmark* Iterations(IterationCount n);
// Specify the amount of times to repeat this benchmark. This option overrides
@@ -1025,7 +1230,7 @@
// By default, the CPU time is measured only for the main thread, which may
// be unrepresentative if the benchmark uses threads internally. If called,
// the total CPU time spent by all the threads will be measured instead.
- // By default, the only the main thread CPU time will be measured.
+ // By default, only the main thread CPU time will be measured.
Benchmark* MeasureProcessCPUTime();
// If a particular benchmark should use the Wall clock instead of the CPU time
@@ -1090,12 +1295,16 @@
virtual void Run(State& state) = 0;
- protected:
- explicit Benchmark(const char* name);
- Benchmark(Benchmark const&);
- void SetName(const char* name);
+ TimeUnit GetTimeUnit() const;
+ protected:
+ explicit Benchmark(const std::string& name);
+ void SetName(const std::string& name);
+
+ public:
+ const char* GetName() const;
int ArgsCnt() const;
+ const char* GetArgName(int arg) const;
private:
friend class BenchmarkFamilies;
@@ -1105,9 +1314,13 @@
AggregationReportMode aggregation_report_mode_;
std::vector<std::string> arg_names_; // Args for all benchmark runs
std::vector<std::vector<int64_t> > args_; // Args for all benchmark runs
+
TimeUnit time_unit_;
+ bool use_default_time_unit_;
+
int range_multiplier_;
double min_time_;
+ double min_warmup_time_;
IterationCount iterations_;
int repetitions_;
bool measure_process_cpu_time_;
@@ -1122,7 +1335,17 @@
callback_function setup_;
callback_function teardown_;
- Benchmark& operator=(Benchmark const&);
+ Benchmark(Benchmark const&)
+#if defined(BENCHMARK_HAS_CXX11)
+ = delete
+#endif
+ ;
+
+ Benchmark& operator=(Benchmark const&)
+#if defined(BENCHMARK_HAS_CXX11)
+ = delete
+#endif
+ ;
};
} // namespace internal
@@ -1131,27 +1354,27 @@
// the specified functor 'fn'.
//
// RETURNS: A pointer to the registered benchmark.
-internal::Benchmark* RegisterBenchmark(const char* name,
+internal::Benchmark* RegisterBenchmark(const std::string& name,
internal::Function* fn);
#if defined(BENCHMARK_HAS_CXX11)
template <class Lambda>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
+internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn);
#endif
// Remove all registered benchmarks. All pointers to previously registered
// benchmarks are invalidated.
-void ClearRegisteredBenchmarks();
+BENCHMARK_EXPORT void ClearRegisteredBenchmarks();
namespace internal {
// The class used to hold all Benchmarks created from static function.
// (ie those created using the BENCHMARK(...) macros.
-class FunctionBenchmark : public Benchmark {
+class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark {
public:
- FunctionBenchmark(const char* name, Function* func)
+ FunctionBenchmark(const std::string& name, Function* func)
: Benchmark(name), func_(func) {}
- virtual void Run(State& st) BENCHMARK_OVERRIDE;
+ void Run(State& st) BENCHMARK_OVERRIDE;
private:
Function* func_;
@@ -1161,35 +1384,38 @@
template <class Lambda>
class LambdaBenchmark : public Benchmark {
public:
- virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
+ void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
private:
template <class OLambda>
- LambdaBenchmark(const char* name, OLambda&& lam)
+ LambdaBenchmark(const std::string& name, OLambda&& lam)
: Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
LambdaBenchmark(LambdaBenchmark const&) = delete;
template <class Lam> // NOLINTNEXTLINE(readability-redundant-declaration)
- friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
+ friend Benchmark* ::benchmark::RegisterBenchmark(const std::string&, Lam&&);
Lambda lambda_;
};
#endif
-
} // namespace internal
-inline internal::Benchmark* RegisterBenchmark(const char* name,
+inline internal::Benchmark* RegisterBenchmark(const std::string& name,
internal::Function* fn) {
+ // FIXME: this should be a `std::make_unique<>()` but we don't have C++14.
+ // codechecker_intentional [cplusplus.NewDeleteLeaks]
return internal::RegisterBenchmarkInternal(
::new internal::FunctionBenchmark(name, fn));
}
#ifdef BENCHMARK_HAS_CXX11
template <class Lambda>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
+internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) {
using BenchType =
internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
+ // FIXME: this should be a `std::make_unique<>()` but we don't have C++14.
+ // codechecker_intentional [cplusplus.NewDeleteLeaks]
return internal::RegisterBenchmarkInternal(
::new BenchType(name, std::forward<Lambda>(fn)));
}
@@ -1198,7 +1424,7 @@
#if defined(BENCHMARK_HAS_CXX11) && \
(!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
template <class Lambda, class... Args>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
+internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn,
Args&&... args) {
return benchmark::RegisterBenchmark(
name, [=](benchmark::State& st) { fn(st, args...); });
@@ -1212,7 +1438,7 @@
public:
Fixture() : internal::Benchmark("") {}
- virtual void Run(State& st) BENCHMARK_OVERRIDE {
+ void Run(State& st) BENCHMARK_OVERRIDE {
this->SetUp(st);
this->BenchmarkCase(st);
this->TearDown(st);
@@ -1228,7 +1454,6 @@
protected:
virtual void BenchmarkCase(State&) = 0;
};
-
} // namespace benchmark
// ------------------------------------------------------
@@ -1268,7 +1493,7 @@
BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
(::benchmark::internal::RegisterBenchmarkInternal( \
new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \
- &__VA_ARGS__)))
+ __VA_ARGS__)))
#else
#define BENCHMARK(n) \
BENCHMARK_PRIVATE_DECLARE(n) = \
@@ -1298,7 +1523,7 @@
// /* Registers a benchmark named "BM_takes_args/int_string_test` */
// BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
#define BENCHMARK_CAPTURE(func, test_case_name, ...) \
- BENCHMARK_PRIVATE_DECLARE(func) = \
+ BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
(::benchmark::internal::RegisterBenchmarkInternal( \
new ::benchmark::internal::FunctionBenchmark( \
#func "/" #test_case_name, \
@@ -1335,37 +1560,62 @@
#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
#endif
-#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
- class BaseClass##_##Method##_Benchmark : public BaseClass { \
- public: \
- BaseClass##_##Method##_Benchmark() { \
- this->SetName(#BaseClass "/" #Method); \
- } \
- \
- protected: \
- virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
+#ifdef BENCHMARK_HAS_CXX11
+// This will register a benchmark for a templatized function,
+// with the additional arguments specified by `...`.
+//
+// For example:
+//
+// template <typename T, class ...ExtraArgs>`
+// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
+// [...]
+//}
+// /* Registers a benchmark named "BM_takes_args<void>/int_string_test` */
+// BENCHMARK_TEMPLATE1_CAPTURE(BM_takes_args, void, int_string_test, 42,
+// std::string("abc"));
+#define BENCHMARK_TEMPLATE1_CAPTURE(func, a, test_case_name, ...) \
+ BENCHMARK_CAPTURE(func<a>, test_case_name, __VA_ARGS__)
+
+#define BENCHMARK_TEMPLATE2_CAPTURE(func, a, b, test_case_name, ...) \
+ BENCHMARK_PRIVATE_DECLARE(func) = \
+ (::benchmark::internal::RegisterBenchmarkInternal( \
+ new ::benchmark::internal::FunctionBenchmark( \
+ #func "<" #a "," #b ">" \
+ "/" #test_case_name, \
+ [](::benchmark::State& st) { func<a, b>(st, __VA_ARGS__); })))
+#endif // BENCHMARK_HAS_CXX11
+
+#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
+ class BaseClass##_##Method##_Benchmark : public BaseClass { \
+ public: \
+ BaseClass##_##Method##_Benchmark() { \
+ this->SetName(#BaseClass "/" #Method); \
+ } \
+ \
+ protected: \
+ void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
};
-#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
- class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
- public: \
- BaseClass##_##Method##_Benchmark() { \
- this->SetName(#BaseClass "<" #a ">/" #Method); \
- } \
- \
- protected: \
- virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
+#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
+ class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
+ public: \
+ BaseClass##_##Method##_Benchmark() { \
+ this->SetName(#BaseClass "<" #a ">/" #Method); \
+ } \
+ \
+ protected: \
+ void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
};
-#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
- class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
- public: \
- BaseClass##_##Method##_Benchmark() { \
- this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
- } \
- \
- protected: \
- virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
+#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
+ class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
+ public: \
+ BaseClass##_##Method##_Benchmark() { \
+ this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
+ } \
+ \
+ protected: \
+ void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
};
#ifdef BENCHMARK_HAS_CXX11
@@ -1377,7 +1627,7 @@
} \
\
protected: \
- virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
+ void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
};
#else
#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
@@ -1439,8 +1689,15 @@
#endif
// Helper macro to create a main routine in a test that runs the benchmarks
+// Note the workaround for Hexagon simulator passing argc != 0, argv = NULL.
#define BENCHMARK_MAIN() \
int main(int argc, char** argv) { \
+ char arg0_default[] = "benchmark"; \
+ char* args_default = arg0_default; \
+ if (!argv) { \
+ argc = 1; \
+ argv = &args_default; \
+ } \
::benchmark::Initialize(&argc, argv); \
if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
::benchmark::RunSpecifiedBenchmarks(); \
@@ -1454,7 +1711,7 @@
namespace benchmark {
-struct CPUInfo {
+struct BENCHMARK_EXPORT CPUInfo {
struct CacheInfo {
std::string type;
int level;
@@ -1478,7 +1735,7 @@
};
// Adding Struct for System Information
-struct SystemInfo {
+struct BENCHMARK_EXPORT SystemInfo {
std::string name;
static const SystemInfo& Get();
@@ -1490,10 +1747,11 @@
// BenchmarkName contains the components of the Benchmark's name
// which allows individual fields to be modified or cleared before
// building the final name using 'str()'.
-struct BenchmarkName {
+struct BENCHMARK_EXPORT BenchmarkName {
std::string function_name;
std::string args;
std::string min_time;
+ std::string min_warmup_time;
std::string iterations;
std::string repetitions;
std::string time_type;
@@ -1509,7 +1767,7 @@
// can control the destination of the reports by calling
// RunSpecifiedBenchmarks and passing it a custom reporter object.
// The reporter object must implement the following interface.
-class BenchmarkReporter {
+class BENCHMARK_EXPORT BenchmarkReporter {
public:
struct Context {
CPUInfo const& cpu_info;
@@ -1520,20 +1778,21 @@
Context();
};
- struct Run {
+ struct BENCHMARK_EXPORT Run {
static const int64_t no_repetition_index = -1;
enum RunType { RT_Iteration, RT_Aggregate };
Run()
: run_type(RT_Iteration),
aggregate_unit(kTime),
- error_occurred(false),
+ skipped(internal::NotSkipped),
iterations(1),
threads(1),
- time_unit(kNanosecond),
+ time_unit(GetDefaultTimeUnit()),
real_accumulated_time(0),
cpu_accumulated_time(0),
max_heapbytes_used(0),
+ use_real_time_for_initial_big_o(false),
complexity(oNone),
complexity_lambda(),
complexity_n(0),
@@ -1550,8 +1809,8 @@
std::string aggregate_name;
StatisticUnit aggregate_unit;
std::string report_label; // Empty if not set by benchmark.
- bool error_occurred;
- std::string error_message;
+ internal::Skipped skipped;
+ std::string skip_message;
IterationCount iterations;
int64_t threads;
@@ -1576,10 +1835,14 @@
// This is set to 0.0 if memory tracing is not enabled.
double max_heapbytes_used;
+ // By default Big-O is computed for CPU time, but that is not what you want
+ // to happen when manual time was requested, which is stored as real time.
+ bool use_real_time_for_initial_big_o;
+
// Keep track of arguments to compute asymptotic complexity
BigO complexity;
BigOFunc* complexity_lambda;
- int64_t complexity_n;
+ ComplexityN complexity_n;
// what statistics to compute from the measurements
const std::vector<internal::Statistics>* statistics;
@@ -1621,6 +1884,12 @@
virtual bool ReportContext(const Context& context) = 0;
// Called once for each group of benchmark runs, gives information about
+ // the configurations of the runs.
+ virtual void ReportRunsConfig(double /*min_time*/,
+ bool /*has_explicit_iters*/,
+ IterationCount /*iters*/) {}
+
+ // Called once for each group of benchmark runs, gives information about
// cpu-time and heap memory usage during the benchmark run. If the group
// of runs contained more than two entries then 'report' contains additional
// elements representing the mean and standard deviation of those runs.
@@ -1665,7 +1934,7 @@
// Simple reporter that outputs benchmark data to the console. This is the
// default reporter used by RunSpecifiedBenchmarks().
-class ConsoleReporter : public BenchmarkReporter {
+class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter {
public:
enum OutputOptions {
OO_None = 0,
@@ -1677,8 +1946,8 @@
explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
: output_options_(opts_), name_field_width_(0), printed_header_(false) {}
- virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
- virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
+ bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
+ void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
protected:
virtual void PrintRunData(const Run& report);
@@ -1690,12 +1959,12 @@
bool printed_header_;
};
-class JSONReporter : public BenchmarkReporter {
+class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter {
public:
JSONReporter() : first_report_(true) {}
- virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
- virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
- virtual void Finalize() BENCHMARK_OVERRIDE;
+ bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
+ void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
+ void Finalize() BENCHMARK_OVERRIDE;
private:
void PrintRunData(const Run& report);
@@ -1703,13 +1972,13 @@
bool first_report_;
};
-class BENCHMARK_DEPRECATED_MSG(
+class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG(
"The CSV Reporter will be removed in a future release") CSVReporter
: public BenchmarkReporter {
public:
CSVReporter() : printed_header_(false) {}
- virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
- virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
+ bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
+ void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
private:
void PrintRunData(const Run& report);
@@ -1748,18 +2017,24 @@
// Creates a list of integer values for the given range and multiplier.
// This can be used together with ArgsProduct() to allow multiple ranges
-// with different multiplers.
+// with different multipliers.
// Example:
// ArgsProduct({
// CreateRange(0, 1024, /*multi=*/32),
// CreateRange(0, 100, /*multi=*/4),
// CreateDenseRange(0, 4, /*step=*/1),
// });
+BENCHMARK_EXPORT
std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi);
// Creates a list of integer values for the given range and step.
+BENCHMARK_EXPORT
std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step);
} // namespace benchmark
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
#endif // BENCHMARK_BENCHMARK_H_
diff --git a/third-party/benchmark/include/benchmark/export.h b/third-party/benchmark/include/benchmark/export.h
new file mode 100644
index 0000000..f96f859
--- /dev/null
+++ b/third-party/benchmark/include/benchmark/export.h
@@ -0,0 +1,47 @@
+#ifndef BENCHMARK_EXPORT_H
+#define BENCHMARK_EXPORT_H
+
+#if defined(_WIN32)
+#define EXPORT_ATTR __declspec(dllexport)
+#define IMPORT_ATTR __declspec(dllimport)
+#define NO_EXPORT_ATTR
+#define DEPRECATED_ATTR __declspec(deprecated)
+#else // _WIN32
+#define EXPORT_ATTR __attribute__((visibility("default")))
+#define IMPORT_ATTR __attribute__((visibility("default")))
+#define NO_EXPORT_ATTR __attribute__((visibility("hidden")))
+#define DEPRECATE_ATTR __attribute__((__deprecated__))
+#endif // _WIN32
+
+#ifdef BENCHMARK_STATIC_DEFINE
+#define BENCHMARK_EXPORT
+#define BENCHMARK_NO_EXPORT
+#else // BENCHMARK_STATIC_DEFINE
+#ifndef BENCHMARK_EXPORT
+#ifdef benchmark_EXPORTS
+/* We are building this library */
+#define BENCHMARK_EXPORT EXPORT_ATTR
+#else // benchmark_EXPORTS
+/* We are using this library */
+#define BENCHMARK_EXPORT IMPORT_ATTR
+#endif // benchmark_EXPORTS
+#endif // !BENCHMARK_EXPORT
+
+#ifndef BENCHMARK_NO_EXPORT
+#define BENCHMARK_NO_EXPORT NO_EXPORT_ATTR
+#endif // !BENCHMARK_NO_EXPORT
+#endif // BENCHMARK_STATIC_DEFINE
+
+#ifndef BENCHMARK_DEPRECATED
+#define BENCHMARK_DEPRECATED DEPRECATE_ATTR
+#endif // BENCHMARK_DEPRECATED
+
+#ifndef BENCHMARK_DEPRECATED_EXPORT
+#define BENCHMARK_DEPRECATED_EXPORT BENCHMARK_EXPORT BENCHMARK_DEPRECATED
+#endif // BENCHMARK_DEPRECATED_EXPORT
+
+#ifndef BENCHMARK_DEPRECATED_NO_EXPORT
+#define BENCHMARK_DEPRECATED_NO_EXPORT BENCHMARK_NO_EXPORT BENCHMARK_DEPRECATED
+#endif // BENCHMARK_DEPRECATED_EXPORT
+
+#endif /* BENCHMARK_EXPORT_H */
diff --git a/third-party/benchmark/pyproject.toml b/third-party/benchmark/pyproject.toml
new file mode 100644
index 0000000..aa24ae8
--- /dev/null
+++ b/third-party/benchmark/pyproject.toml
@@ -0,0 +1,85 @@
+[build-system]
+requires = ["setuptools", "setuptools-scm[toml]", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "google_benchmark"
+description = "A library to benchmark code snippets."
+requires-python = ">=3.8"
+license = {file = "LICENSE"}
+keywords = ["benchmark"]
+
+authors = [
+ {name = "Google", email = "[email protected]"},
+]
+
+classifiers = [
+ "Development Status :: 4 - Beta",
+ "Intended Audience :: Developers",
+ "Intended Audience :: Science/Research",
+ "License :: OSI Approved :: Apache Software License",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Topic :: Software Development :: Testing",
+ "Topic :: System :: Benchmark",
+]
+
+dynamic = ["readme", "version"]
+
+dependencies = [
+ "absl-py>=0.7.1",
+]
+
+[project.optional-dependencies]
+dev = [
+ "pre-commit>=3.3.3",
+]
+
+[project.urls]
+Homepage = "https://github.com/google/benchmark"
+Documentation = "https://github.com/google/benchmark/tree/main/docs"
+Repository = "https://github.com/google/benchmark.git"
+Discord = "https://discord.gg/cz7UX7wKC2"
+
+[tool.setuptools]
+package-dir = {"" = "bindings/python"}
+zip-safe = false
+
+[tool.setuptools.packages.find]
+where = ["bindings/python"]
+
+[tool.setuptools.dynamic]
+readme = { file = "README.md", content-type = "text/markdown" }
+
+[tool.setuptools_scm]
+
+[tool.mypy]
+check_untyped_defs = true
+disallow_incomplete_defs = true
+pretty = true
+python_version = "3.11"
+strict_optional = false
+warn_unreachable = true
+
+[[tool.mypy.overrides]]
+module = ["yaml"]
+ignore_missing_imports = true
+
+[tool.ruff]
+# explicitly tell ruff the source directory to correctly identify first-party package.
+src = ["bindings/python"]
+
+line-length = 80
+target-version = "py311"
+
+# Enable pycodestyle (`E`, `W`), Pyflakes (`F`), and isort (`I`) codes by default.
+select = ["E", "F", "I", "W"]
+ignore = [
+ "E501", # line too long
+]
+
+[tool.ruff.isort]
+combine-as-imports = true
diff --git a/third-party/benchmark/requirements.txt b/third-party/benchmark/requirements.txt
deleted file mode 100644
index e451894..0000000
--- a/third-party/benchmark/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-numpy == 1.19.4
-scipy == 1.5.4
-pandas == 1.1.5
diff --git a/third-party/benchmark/setup.py b/third-party/benchmark/setup.py
index 83069e5..cb20042 100644
--- a/third-party/benchmark/setup.py
+++ b/third-party/benchmark/setup.py
@@ -1,56 +1,50 @@
+import contextlib
import os
-import posixpath
import platform
-import re
import shutil
-import sys
+import sysconfig
+from pathlib import Path
+from typing import Generator
-from distutils import sysconfig
import setuptools
from setuptools.command import build_ext
+PYTHON_INCLUDE_PATH_PLACEHOLDER = "<PYTHON_INCLUDE_PATH>"
-HERE = os.path.dirname(os.path.abspath(__file__))
+IS_WINDOWS = platform.system() == "Windows"
+IS_MAC = platform.system() == "Darwin"
-IS_WINDOWS = sys.platform.startswith("win")
-
-
-def _get_version():
- """Parse the version string from __init__.py."""
- with open(
- os.path.join(HERE, "bindings", "python", "google_benchmark", "__init__.py")
- ) as init_file:
[email protected]
+def temp_fill_include_path(fp: str) -> Generator[None, None, None]:
+ """Temporarily set the Python include path in a file."""
+ with open(fp, "r+") as f:
try:
- version_line = next(
- line for line in init_file if line.startswith("__version__")
+ content = f.read()
+ replaced = content.replace(
+ PYTHON_INCLUDE_PATH_PLACEHOLDER,
+ Path(sysconfig.get_paths()["include"]).as_posix(),
)
- except StopIteration:
- raise ValueError("__version__ not defined in __init__.py")
- else:
- namespace = {}
- exec(version_line, namespace) # pylint: disable=exec-used
- return namespace["__version__"]
-
-
-def _parse_requirements(path):
- with open(os.path.join(HERE, path)) as requirements:
- return [
- line.rstrip()
- for line in requirements
- if not (line.isspace() or line.startswith("#"))
- ]
+ f.seek(0)
+ f.write(replaced)
+ f.truncate()
+ yield
+ finally:
+ # revert to the original content after exit
+ f.seek(0)
+ f.write(content)
+ f.truncate()
class BazelExtension(setuptools.Extension):
"""A C/C++ extension that is defined as a Bazel BUILD target."""
- def __init__(self, name, bazel_target):
+ def __init__(self, name: str, bazel_target: str):
+ super().__init__(name=name, sources=[])
+
self.bazel_target = bazel_target
- self.relpath, self.target_name = posixpath.relpath(bazel_target, "//").split(
- ":"
- )
- setuptools.Extension.__init__(self, name, sources=[])
+ stripped_target = bazel_target.split("//")[-1]
+ self.relpath, self.target_name = stripped_target.split(":")
class BuildBazelExtension(build_ext.build_ext):
@@ -59,88 +53,71 @@
def run(self):
for ext in self.extensions:
self.bazel_build(ext)
- build_ext.build_ext.run(self)
+ super().run()
+ # explicitly call `bazel shutdown` for graceful exit
+ self.spawn(["bazel", "shutdown"])
- def bazel_build(self, ext):
+ def copy_extensions_to_source(self):
+ """
+ Copy generated extensions into the source tree.
+ This is done in the ``bazel_build`` method, so it's not necessary to
+ do again in the `build_ext` base class.
+ """
+ pass
+
+ def bazel_build(self, ext: BazelExtension) -> None:
"""Runs the bazel build to create the package."""
- with open("WORKSPACE", "r") as workspace:
- workspace_contents = workspace.read()
+ with temp_fill_include_path("WORKSPACE"):
+ temp_path = Path(self.build_temp)
- with open("WORKSPACE", "w") as workspace:
- workspace.write(
- re.sub(
- r'(?<=path = ").*(?=", # May be overwritten by setup\.py\.)',
- sysconfig.get_python_inc().replace(os.path.sep, posixpath.sep),
- workspace_contents,
- )
+ bazel_argv = [
+ "bazel",
+ "build",
+ ext.bazel_target,
+ "--enable_bzlmod=false",
+ f"--symlink_prefix={temp_path / 'bazel-'}",
+ f"--compilation_mode={'dbg' if self.debug else 'opt'}",
+ # C++17 is required by nanobind
+ f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}",
+ ]
+
+ if IS_WINDOWS:
+ # Link with python*.lib.
+ for library_dir in self.library_dirs:
+ bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
+ elif IS_MAC:
+ if platform.machine() == "x86_64":
+ # C++17 needs macOS 10.14 at minimum
+ bazel_argv.append("--macos_minimum_os=10.14")
+
+ # cross-compilation for Mac ARM64 on GitHub Mac x86 runners.
+ # ARCHFLAGS is set by cibuildwheel before macOS wheel builds.
+ archflags = os.getenv("ARCHFLAGS", "")
+ if "arm64" in archflags:
+ bazel_argv.append("--cpu=darwin_arm64")
+ bazel_argv.append("--macos_cpus=arm64")
+
+ elif platform.machine() == "arm64":
+ bazel_argv.append("--macos_minimum_os=11.0")
+
+ self.spawn(bazel_argv)
+
+ shared_lib_suffix = ".dll" if IS_WINDOWS else ".so"
+ ext_name = ext.target_name + shared_lib_suffix
+ ext_bazel_bin_path = (
+ temp_path / "bazel-bin" / ext.relpath / ext_name
)
- if not os.path.exists(self.build_temp):
- os.makedirs(self.build_temp)
-
- bazel_argv = [
- "bazel",
- "build",
- ext.bazel_target,
- "--symlink_prefix=" + os.path.join(self.build_temp, "bazel-"),
- "--compilation_mode=" + ("dbg" if self.debug else "opt"),
- ]
-
- if IS_WINDOWS:
- # Link with python*.lib.
- for library_dir in self.library_dirs:
- bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
- elif sys.platform == "darwin" and platform.machine() == "x86_64":
- bazel_argv.append("--macos_minimum_os=10.9")
-
- self.spawn(bazel_argv)
-
- shared_lib_suffix = ".dll" if IS_WINDOWS else ".so"
- ext_bazel_bin_path = os.path.join(
- self.build_temp,
- "bazel-bin",
- ext.relpath,
- ext.target_name + shared_lib_suffix,
- )
-
- ext_dest_path = self.get_ext_fullpath(ext.name)
- ext_dest_dir = os.path.dirname(ext_dest_path)
- if not os.path.exists(ext_dest_dir):
- os.makedirs(ext_dest_dir)
- shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
+ ext_dest_path = Path(self.get_ext_fullpath(ext.name))
+ shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
setuptools.setup(
- name="google_benchmark",
- version=_get_version(),
- url="https://github.com/google/benchmark",
- description="A library to benchmark code snippets.",
- author="Google",
- author_email="[email protected]",
- # Contained modules and scripts.
- package_dir={"": "bindings/python"},
- packages=setuptools.find_packages("bindings/python"),
- install_requires=_parse_requirements("bindings/python/requirements.txt"),
cmdclass=dict(build_ext=BuildBazelExtension),
ext_modules=[
BazelExtension(
- "google_benchmark._benchmark",
- "//bindings/python/google_benchmark:_benchmark",
+ name="google_benchmark._benchmark",
+ bazel_target="//bindings/python/google_benchmark:_benchmark",
)
],
- zip_safe=False,
- # PyPI package information.
- classifiers=[
- "Development Status :: 4 - Beta",
- "Intended Audience :: Developers",
- "Intended Audience :: Science/Research",
- "License :: OSI Approved :: Apache Software License",
- "Programming Language :: Python :: 3.6",
- "Programming Language :: Python :: 3.7",
- "Programming Language :: Python :: 3.8",
- "Topic :: Software Development :: Testing",
- "Topic :: System :: Benchmark",
- ],
- license="Apache 2.0",
- keywords="benchmark",
)
diff --git a/third-party/benchmark/src/CMakeLists.txt b/third-party/benchmark/src/CMakeLists.txt
index e814a4e..943594b 100644
--- a/third-party/benchmark/src/CMakeLists.txt
+++ b/third-party/benchmark/src/CMakeLists.txt
@@ -25,12 +25,25 @@
SOVERSION ${GENERIC_LIB_SOVERSION}
)
target_include_directories(benchmark PUBLIC
- $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>)
+ $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
+)
+
+set_property(
+ SOURCE benchmark.cc
+ APPEND
+ PROPERTY COMPILE_DEFINITIONS
+ BENCHMARK_VERSION="${VERSION}"
+)
# libpfm, if available
-if (HAVE_LIBPFM)
- target_link_libraries(benchmark PRIVATE pfm)
- add_definitions(-DHAVE_LIBPFM)
+if (PFM_FOUND)
+ target_link_libraries(benchmark PRIVATE PFM::libpfm)
+ target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM)
+endif()
+
+# pthread affinity, if available
+if(HAVE_PTHREAD_AFFINITY)
+ target_compile_definitions(benchmark PRIVATE -DBENCHMARK_HAS_PTHREAD_AFFINITY)
endif()
# Link threads.
@@ -53,6 +66,10 @@
target_link_libraries(benchmark PRIVATE kstat)
endif()
+if (NOT BUILD_SHARED_LIBS)
+ target_compile_definitions(benchmark PUBLIC -DBENCHMARK_STATIC_DEFINE)
+endif()
+
# Benchmark main library
add_library(benchmark_main "benchmark_main.cc")
add_library(benchmark::benchmark_main ALIAS benchmark_main)
@@ -60,10 +77,10 @@
OUTPUT_NAME "benchmark_main"
VERSION ${GENERIC_LIB_VERSION}
SOVERSION ${GENERIC_LIB_SOVERSION}
+ DEFINE_SYMBOL benchmark_EXPORTS
)
target_link_libraries(benchmark_main PUBLIC benchmark::benchmark)
-
set(generated_dir "${PROJECT_BINARY_DIR}")
set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake")
@@ -107,6 +124,7 @@
install(
DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
+ "${PROJECT_BINARY_DIR}/include/benchmark"
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
FILES_MATCHING PATTERN "*.*h")
diff --git a/third-party/benchmark/src/benchmark.cc b/third-party/benchmark/src/benchmark.cc
index 4731511..495944d 100644
--- a/third-party/benchmark/src/benchmark.cc
+++ b/third-party/benchmark/src/benchmark.cc
@@ -19,7 +19,7 @@
#include "internal_macros.h"
#ifndef BENCHMARK_OS_WINDOWS
-#ifndef BENCHMARK_OS_FUCHSIA
+#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -65,12 +65,28 @@
// linked into the binary are run.
BM_DEFINE_string(benchmark_filter, "");
-// Minimum number of seconds we should run benchmark before results are
-// considered significant. For cpu-time based tests, this is the lower bound
+// Specification of how long to run the benchmark.
+//
+// It can be either an exact number of iterations (specified as `<integer>x`),
+// or a minimum number of seconds (specified as `<float>s`). If the latter
+// format (ie., min seconds) is used, the system may run the benchmark longer
+// until the results are considered significant.
+//
+// For backward compatibility, the `s` suffix may be omitted, in which case,
+// the specified number is interpreted as the number of seconds.
+//
+// For cpu-time based tests, this is the lower bound
// on the total cpu time used by all threads that make up the test. For
// real-time based tests, this is the lower bound on the elapsed time of the
// benchmark execution, regardless of number of threads.
-BM_DEFINE_double(benchmark_min_time, 0.5);
+BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);
+
+// Minimum number of seconds a benchmark should be run before results should be
+// taken into account. This e.g can be necessary for benchmarks of code which
+// needs to fill some form of cache before performance is of interest.
+// Note: results gathered within this period are discarded and not used for
+// reported result.
+BM_DEFINE_double(benchmark_min_warmup_time, 0.0);
// The number of runs of each benchmark. If greater than 1, the mean and
// standard deviation of the runs will be reported.
@@ -121,6 +137,10 @@
// pairs. Kept internal as it's only used for parsing from env/command line.
BM_DEFINE_kvpairs(benchmark_context, {});
+// Set the default time unit to use for reports
+// Valid values are 'ns', 'us', 'ms' or 's'
+BM_DEFINE_string(benchmark_time_unit, "");
+
// The level of verbose logging to output
BM_DEFINE_int32(v, 0);
@@ -128,23 +148,28 @@
std::map<std::string, std::string>* global_context = nullptr;
+BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext() {
+ return global_context;
+}
+
// FIXME: wouldn't LTO mess this up?
void UseCharPointer(char const volatile*) {}
} // namespace internal
-State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
- int thread_i, int n_threads, internal::ThreadTimer* timer,
- internal::ThreadManager* manager,
+State::State(std::string name, IterationCount max_iters,
+ const std::vector<int64_t>& ranges, int thread_i, int n_threads,
+ internal::ThreadTimer* timer, internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement)
: total_iterations_(0),
batch_leftover_(0),
max_iterations(max_iters),
started_(false),
finished_(false),
- error_occurred_(false),
+ skipped_(internal::NotSkipped),
range_(ranges),
complexity_n_(0),
+ name_(std::move(name)),
thread_index_(thread_i),
threads_(n_threads),
timer_(timer),
@@ -154,6 +179,17 @@
BM_CHECK_LT(thread_index_, threads_)
<< "thread_index must be less than threads";
+ // Add counters with correct flag now. If added with `counters[name]` in
+ // `PauseTiming`, a new `Counter` will be inserted the first time, which
+ // won't have the flag. Inserting them now also reduces the allocations
+ // during the benchmark.
+ if (perf_counters_measurement_) {
+ for (const std::string& counter_name :
+ perf_counters_measurement_->names()) {
+ counters[counter_name] = Counter(0.0, Counter::kAvgIterations);
+ }
+ }
+
// Note: The use of offsetof below is technically undefined until C++17
// because State is not a standard layout type. However, all compilers
// currently provide well-defined behavior as an extension (which is
@@ -170,11 +206,18 @@
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Winvalid-offsetof"
#endif
+#if defined(__NVCC__)
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress 1427
+#endif
+#if defined(__NVCOMPILER)
+#pragma diagnostic push
+#pragma diag_suppress offset_in_non_POD_nonstandard
+#endif
// Offset tests to ensure commonly accessed data is on the first cache line.
const int cache_line_size = 64;
- static_assert(offsetof(State, error_occurred_) <=
- (cache_line_size - sizeof(error_occurred_)),
- "");
+ static_assert(
+ offsetof(State, skipped_) <= (cache_line_size - sizeof(skipped_)), "");
#if defined(__INTEL_COMPILER)
#pragma warning pop
#elif defined(__GNUC__)
@@ -182,39 +225,61 @@
#elif defined(__clang__)
#pragma clang diagnostic pop
#endif
+#if defined(__NVCC__)
+#pragma nv_diagnostic pop
+#endif
+#if defined(__NVCOMPILER)
+#pragma diagnostic pop
+#endif
}
void State::PauseTiming() {
// Add in time accumulated so far
- BM_CHECK(started_ && !finished_ && !error_occurred_);
+ BM_CHECK(started_ && !finished_ && !skipped());
timer_->StopTimer();
if (perf_counters_measurement_) {
- auto measurements = perf_counters_measurement_->StopAndGetMeasurements();
+ std::vector<std::pair<std::string, double>> measurements;
+ if (!perf_counters_measurement_->Stop(measurements)) {
+ BM_CHECK(false) << "Perf counters read the value failed.";
+ }
for (const auto& name_and_measurement : measurements) {
- auto name = name_and_measurement.first;
- auto measurement = name_and_measurement.second;
- BM_CHECK_EQ(counters[name], 0.0);
- counters[name] = Counter(measurement, Counter::kAvgIterations);
+ const std::string& name = name_and_measurement.first;
+ const double measurement = name_and_measurement.second;
+ // Counter was inserted with `kAvgIterations` flag by the constructor.
+ assert(counters.find(name) != counters.end());
+ counters[name].value += measurement;
}
}
}
void State::ResumeTiming() {
- BM_CHECK(started_ && !finished_ && !error_occurred_);
+ BM_CHECK(started_ && !finished_ && !skipped());
timer_->StartTimer();
if (perf_counters_measurement_) {
perf_counters_measurement_->Start();
}
}
-void State::SkipWithError(const char* msg) {
- BM_CHECK(msg);
- error_occurred_ = true;
+void State::SkipWithMessage(const std::string& msg) {
+ skipped_ = internal::SkippedWithMessage;
{
MutexLock l(manager_->GetBenchmarkMutex());
- if (manager_->results.has_error_ == false) {
- manager_->results.error_message_ = msg;
- manager_->results.has_error_ = true;
+ if (internal::NotSkipped == manager_->results.skipped_) {
+ manager_->results.skip_message_ = msg;
+ manager_->results.skipped_ = skipped_;
+ }
+ }
+ total_iterations_ = 0;
+ if (timer_->running()) timer_->StopTimer();
+}
+
+void State::SkipWithError(const std::string& msg) {
+ skipped_ = internal::SkippedWithError;
+ {
+ MutexLock l(manager_->GetBenchmarkMutex());
+ if (internal::NotSkipped == manager_->results.skipped_) {
+ manager_->results.skip_message_ = msg;
+ manager_->results.skipped_ = skipped_;
}
}
total_iterations_ = 0;
@@ -225,7 +290,7 @@
timer_->SetIterationTime(seconds);
}
-void State::SetLabel(const char* label) {
+void State::SetLabel(const std::string& label) {
MutexLock l(manager_->GetBenchmarkMutex());
manager_->results.report_label_ = label;
}
@@ -233,14 +298,14 @@
void State::StartKeepRunning() {
BM_CHECK(!started_ && !finished_);
started_ = true;
- total_iterations_ = error_occurred_ ? 0 : max_iterations;
+ total_iterations_ = skipped() ? 0 : max_iterations;
manager_->StartStopBarrier();
- if (!error_occurred_) ResumeTiming();
+ if (!skipped()) ResumeTiming();
}
void State::FinishKeepRunning() {
- BM_CHECK(started_ && (!finished_ || error_occurred_));
- if (!error_occurred_) {
+ BM_CHECK(started_ && (!finished_ || skipped()));
+ if (!skipped()) {
PauseTiming();
}
// Total iterations has now wrapped around past 0. Fix this.
@@ -318,14 +383,26 @@
size_t num_repetitions_total = 0;
+ // This perfcounters object needs to be created before the runners vector
+ // below so it outlasts their lifetime.
+ PerfCountersMeasurement perfcounters(
+ StrSplit(FLAGS_benchmark_perf_counters, ','));
+
+ // Vector of benchmarks to run
std::vector<internal::BenchmarkRunner> runners;
runners.reserve(benchmarks.size());
+
+ // Count the number of benchmarks with threads to warn the user in case
+ // performance counters are used.
+ int benchmarks_with_threads = 0;
+
+ // Loop through all benchmarks
for (const BenchmarkInstance& benchmark : benchmarks) {
BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
if (benchmark.complexity() != oNone)
reports_for_family = &per_family_reports[benchmark.family_index()];
-
- runners.emplace_back(benchmark, reports_for_family);
+ benchmarks_with_threads += (benchmark.threads() > 1);
+ runners.emplace_back(benchmark, &perfcounters, reports_for_family);
int num_repeats_of_this_instance = runners.back().GetNumRepeats();
num_repetitions_total += num_repeats_of_this_instance;
if (reports_for_family)
@@ -333,6 +410,17 @@
}
assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
+ // The use of performance counters with threads would be unintuitive for
+ // the average user so we need to warn them about this case
+ if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) {
+ GetErrorLogInstance()
+ << "***WARNING*** There are " << benchmarks_with_threads
+ << " benchmarks with threads and " << perfcounters.num_counters()
+ << " performance counters were requested. Beware counters will "
+ "reflect the combined usage across all "
+ "threads.\n";
+ }
+
std::vector<size_t> repetition_indices;
repetition_indices.reserve(num_repetitions_total);
for (size_t runner_index = 0, num_runners = runners.size();
@@ -356,6 +444,12 @@
if (runner.HasRepeatsRemaining()) continue;
// FIXME: report each repetition separately, not all of them in bulk.
+ display_reporter->ReportRunsConfig(
+ runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
+ if (file_reporter)
+ file_reporter->ReportRunsConfig(
+ runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
+
RunResults run_results = runner.GetResults();
// Maybe calculate complexity report
@@ -389,14 +483,15 @@
typedef std::unique_ptr<BenchmarkReporter> PtrType;
if (name == "console") {
return PtrType(new ConsoleReporter(output_opts));
- } else if (name == "json") {
- return PtrType(new JSONReporter);
- } else if (name == "csv") {
- return PtrType(new CSVReporter);
- } else {
- std::cerr << "Unexpected format: '" << name << "'\n";
- std::exit(1);
}
+ if (name == "json") {
+ return PtrType(new JSONReporter());
+ }
+ if (name == "csv") {
+ return PtrType(new CSVReporter());
+ }
+ std::cerr << "Unexpected format: '" << name << "'\n";
+ std::exit(1);
}
BENCHMARK_RESTORE_DEPRECATED_WARNING
@@ -433,6 +528,14 @@
} // end namespace internal
+BenchmarkReporter* CreateDefaultDisplayReporter() {
+ static auto default_display_reporter =
+ internal::CreateReporter(FLAGS_benchmark_format,
+ internal::GetOutputOptions())
+ .release();
+ return default_display_reporter;
+}
+
size_t RunSpecifiedBenchmarks() {
return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter);
}
@@ -468,8 +571,7 @@
std::unique_ptr<BenchmarkReporter> default_display_reporter;
std::unique_ptr<BenchmarkReporter> default_file_reporter;
if (!display_reporter) {
- default_display_reporter = internal::CreateReporter(
- FLAGS_benchmark_format, internal::GetOutputOptions());
+ default_display_reporter.reset(CreateDefaultDisplayReporter());
display_reporter = default_display_reporter.get();
}
auto& Out = display_reporter->GetOutputStream();
@@ -480,17 +582,23 @@
Err << "A custom file reporter was provided but "
"--benchmark_out=<file> was not specified."
<< std::endl;
+ Out.flush();
+ Err.flush();
std::exit(1);
}
if (!fname.empty()) {
output_file.open(fname);
if (!output_file.is_open()) {
Err << "invalid file name: '" << fname << "'" << std::endl;
+ Out.flush();
+ Err.flush();
std::exit(1);
}
if (!file_reporter) {
default_file_reporter = internal::CreateReporter(
- FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
+ FLAGS_benchmark_out_format, FLAGS_benchmark_counters_tabular
+ ? ConsoleReporter::OO_Tabular
+ : ConsoleReporter::OO_None);
file_reporter = default_file_reporter.get();
}
file_reporter->SetOutputStream(&output_file);
@@ -498,10 +606,16 @@
}
std::vector<internal::BenchmarkInstance> benchmarks;
- if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) return 0;
+ if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) {
+ Out.flush();
+ Err.flush();
+ return 0;
+ }
if (benchmarks.empty()) {
Err << "Failed to match any benchmarks against regex: " << spec << "\n";
+ Out.flush();
+ Err.flush();
return 0;
}
@@ -512,11 +626,28 @@
internal::RunBenchmarks(benchmarks, display_reporter, file_reporter);
}
+ Out.flush();
+ Err.flush();
return benchmarks.size();
}
+namespace {
+// stores the time unit benchmarks use by default
+TimeUnit default_time_unit = kNanosecond;
+} // namespace
+
+TimeUnit GetDefaultTimeUnit() { return default_time_unit; }
+
+void SetDefaultTimeUnit(TimeUnit unit) { default_time_unit = unit; }
+
std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; }
+void SetBenchmarkFilter(std::string value) {
+ FLAGS_benchmark_filter = std::move(value);
+}
+
+int32_t GetBenchmarkVerbosity() { return FLAGS_v; }
+
void RegisterMemoryManager(MemoryManager* manager) {
internal::memory_manager = manager;
}
@@ -533,27 +664,31 @@
namespace internal {
+void (*HelperPrintf)();
+
void PrintUsageAndExit() {
- fprintf(stdout,
- "benchmark"
- " [--benchmark_list_tests={true|false}]\n"
- " [--benchmark_filter=<regex>]\n"
- " [--benchmark_min_time=<min_time>]\n"
- " [--benchmark_repetitions=<num_repetitions>]\n"
- " [--benchmark_enable_random_interleaving={true|false}]\n"
- " [--benchmark_report_aggregates_only={true|false}]\n"
- " [--benchmark_display_aggregates_only={true|false}]\n"
- " [--benchmark_format=<console|json|csv>]\n"
- " [--benchmark_out=<filename>]\n"
- " [--benchmark_out_format=<json|console|csv>]\n"
- " [--benchmark_color={auto|true|false}]\n"
- " [--benchmark_counters_tabular={true|false}]\n"
- " [--benchmark_perf_counters=<counter>,...]\n"
- " [--benchmark_context=<key>=<value>,...]\n"
- " [--v=<verbosity>]\n");
+ HelperPrintf();
exit(0);
}
+void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) {
+ if (time_unit_flag == "s") {
+ return SetDefaultTimeUnit(kSecond);
+ }
+ if (time_unit_flag == "ms") {
+ return SetDefaultTimeUnit(kMillisecond);
+ }
+ if (time_unit_flag == "us") {
+ return SetDefaultTimeUnit(kMicrosecond);
+ }
+ if (time_unit_flag == "ns") {
+ return SetDefaultTimeUnit(kNanosecond);
+ }
+ if (!time_unit_flag.empty()) {
+ PrintUsageAndExit();
+ }
+}
+
void ParseCommandLineFlags(int* argc, char** argv) {
using namespace benchmark;
BenchmarkReporter::Context::executable_name =
@@ -562,8 +697,10 @@
if (ParseBoolFlag(argv[i], "benchmark_list_tests",
&FLAGS_benchmark_list_tests) ||
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
- ParseDoubleFlag(argv[i], "benchmark_min_time",
+ ParseStringFlag(argv[i], "benchmark_min_time",
&FLAGS_benchmark_min_time) ||
+ ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
+ &FLAGS_benchmark_min_warmup_time) ||
ParseInt32Flag(argv[i], "benchmark_repetitions",
&FLAGS_benchmark_repetitions) ||
ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
@@ -583,6 +720,8 @@
&FLAGS_benchmark_perf_counters) ||
ParseKeyValueFlag(argv[i], "benchmark_context",
&FLAGS_benchmark_context) ||
+ ParseStringFlag(argv[i], "benchmark_time_unit",
+ &FLAGS_benchmark_time_unit) ||
ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
@@ -598,6 +737,7 @@
PrintUsageAndExit();
}
}
+ SetDefaultTimeUnitFromFlag(FLAGS_benchmark_time_unit);
if (FLAGS_benchmark_color.empty()) {
PrintUsageAndExit();
}
@@ -613,7 +753,34 @@
} // end namespace internal
-void Initialize(int* argc, char** argv) {
+std::string GetBenchmarkVersion() { return {BENCHMARK_VERSION}; }
+
+void PrintDefaultHelp() {
+ fprintf(stdout,
+ "benchmark"
+ " [--benchmark_list_tests={true|false}]\n"
+ " [--benchmark_filter=<regex>]\n"
+ " [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
+ " [--benchmark_min_warmup_time=<min_warmup_time>]\n"
+ " [--benchmark_repetitions=<num_repetitions>]\n"
+ " [--benchmark_enable_random_interleaving={true|false}]\n"
+ " [--benchmark_report_aggregates_only={true|false}]\n"
+ " [--benchmark_display_aggregates_only={true|false}]\n"
+ " [--benchmark_format=<console|json|csv>]\n"
+ " [--benchmark_out=<filename>]\n"
+ " [--benchmark_out_format=<json|console|csv>]\n"
+ " [--benchmark_color={auto|true|false}]\n"
+ " [--benchmark_counters_tabular={true|false}]\n"
+#if defined HAVE_LIBPFM
+ " [--benchmark_perf_counters=<counter>,...]\n"
+#endif
+ " [--benchmark_context=<key>=<value>,...]\n"
+ " [--benchmark_time_unit={ns|us|ms|s}]\n"
+ " [--v=<verbosity>]\n");
+}
+
+void Initialize(int* argc, char** argv, void (*HelperPrintf)()) {
+ internal::HelperPrintf = HelperPrintf;
internal::ParseCommandLineFlags(argc, argv);
internal::LogLevel() = FLAGS_v;
}
diff --git a/third-party/benchmark/src/benchmark_api_internal.cc b/third-party/benchmark/src/benchmark_api_internal.cc
index 4de36e3..286f986 100644
--- a/third-party/benchmark/src/benchmark_api_internal.cc
+++ b/third-party/benchmark/src/benchmark_api_internal.cc
@@ -16,7 +16,7 @@
per_family_instance_index_(per_family_instance_idx),
aggregation_report_mode_(benchmark_.aggregation_report_mode_),
args_(args),
- time_unit_(benchmark_.time_unit_),
+ time_unit_(benchmark_.GetTimeUnit()),
measure_process_cpu_time_(benchmark_.measure_process_cpu_time_),
use_real_time_(benchmark_.use_real_time_),
use_manual_time_(benchmark_.use_manual_time_),
@@ -25,6 +25,7 @@
statistics_(benchmark_.statistics_),
repetitions_(benchmark_.repetitions_),
min_time_(benchmark_.min_time_),
+ min_warmup_time_(benchmark_.min_warmup_time_),
iterations_(benchmark_.iterations_),
threads_(thread_count) {
name_.function_name = benchmark_.name_;
@@ -50,6 +51,11 @@
name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
}
+ if (!IsZero(benchmark->min_warmup_time_)) {
+ name_.min_warmup_time =
+ StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_);
+ }
+
if (benchmark_.iterations_ != 0) {
name_.iterations = StrFormat(
"iterations:%lu", static_cast<unsigned long>(benchmark_.iterations_));
@@ -87,24 +93,24 @@
IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement) const {
- State st(iters, args_, thread_id, threads_, timer, manager,
- perf_counters_measurement);
+ State st(name_.function_name, iters, args_, thread_id, threads_, timer,
+ manager, perf_counters_measurement);
benchmark_.Run(st);
return st;
}
void BenchmarkInstance::Setup() const {
if (setup_) {
- State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr,
- nullptr);
+ State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
+ nullptr, nullptr, nullptr);
setup_(st);
}
}
void BenchmarkInstance::Teardown() const {
if (teardown_) {
- State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr,
- nullptr);
+ State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
+ nullptr, nullptr, nullptr);
teardown_(st);
}
}
diff --git a/third-party/benchmark/src/benchmark_api_internal.h b/third-party/benchmark/src/benchmark_api_internal.h
index 94c2b29..94f5165 100644
--- a/third-party/benchmark/src/benchmark_api_internal.h
+++ b/third-party/benchmark/src/benchmark_api_internal.h
@@ -36,6 +36,7 @@
const std::vector<Statistics>& statistics() const { return statistics_; }
int repetitions() const { return repetitions_; }
double min_time() const { return min_time_; }
+ double min_warmup_time() const { return min_warmup_time_; }
IterationCount iterations() const { return iterations_; }
int threads() const { return threads_; }
void Setup() const;
@@ -62,6 +63,7 @@
const std::vector<Statistics>& statistics_;
int repetitions_;
double min_time_;
+ double min_warmup_time_;
IterationCount iterations_;
int threads_; // Number of concurrent threads to us
@@ -76,6 +78,7 @@
bool IsZero(double n);
+BENCHMARK_EXPORT
ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
} // end namespace internal
diff --git a/third-party/benchmark/src/benchmark_main.cc b/third-party/benchmark/src/benchmark_main.cc
index b3b2478..cd61cd2 100644
--- a/third-party/benchmark/src/benchmark_main.cc
+++ b/third-party/benchmark/src/benchmark_main.cc
@@ -14,4 +14,5 @@
#include "benchmark/benchmark.h"
+BENCHMARK_EXPORT int main(int, char**);
BENCHMARK_MAIN();
diff --git a/third-party/benchmark/src/benchmark_name.cc b/third-party/benchmark/src/benchmark_name.cc
index 2a17ebc..01676bb 100644
--- a/third-party/benchmark/src/benchmark_name.cc
+++ b/third-party/benchmark/src/benchmark_name.cc
@@ -51,8 +51,9 @@
}
} // namespace
+BENCHMARK_EXPORT
std::string BenchmarkName::str() const {
- return join('/', function_name, args, min_time, iterations, repetitions,
- time_type, threads);
+ return join('/', function_name, args, min_time, min_warmup_time, iterations,
+ repetitions, time_type, threads);
}
} // namespace benchmark
diff --git a/third-party/benchmark/src/benchmark_register.cc b/third-party/benchmark/src/benchmark_register.cc
index 61a0c26..e447c9a 100644
--- a/third-party/benchmark/src/benchmark_register.cc
+++ b/third-party/benchmark/src/benchmark_register.cc
@@ -15,7 +15,7 @@
#include "benchmark_register.h"
#ifndef BENCHMARK_OS_WINDOWS
-#ifndef BENCHMARK_OS_FUCHSIA
+#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -53,10 +53,13 @@
namespace {
// For non-dense Range, intermediate values are powers of kRangeMultiplier.
-static const int kRangeMultiplier = 8;
+static constexpr int kRangeMultiplier = 8;
+
// The size of a benchmark family determines is the number of inputs to repeat
// the benchmark on. If this is "large" then warn the user during configuration.
-static const size_t kMaxFamilySize = 100;
+static constexpr size_t kMaxFamilySize = 100;
+
+static constexpr char kDisabledPrefix[] = "DISABLED_";
} // end namespace
namespace internal {
@@ -116,10 +119,10 @@
// Make regular expression out of command-line flag
std::string error_msg;
Regex re;
- bool isNegativeFilter = false;
+ bool is_negative_filter = false;
if (spec[0] == '-') {
spec.replace(0, 1, "");
- isNegativeFilter = true;
+ is_negative_filter = true;
}
if (!re.Init(spec, &error_msg)) {
Err << "Could not compile benchmark re: " << error_msg << std::endl;
@@ -154,7 +157,8 @@
<< " will be repeated at least " << family_size << " times.\n";
}
// reserve in the special case the regex ".", since we know the final
- // family size.
+ // family size. this doesn't take into account any disabled benchmarks
+ // so worst case we reserve more than we need.
if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size);
for (auto const& args : family->args_) {
@@ -164,8 +168,9 @@
num_threads);
const auto full_name = instance.name().str();
- if ((re.Match(full_name) && !isNegativeFilter) ||
- (!re.Match(full_name) && isNegativeFilter)) {
+ if (full_name.rfind(kDisabledPrefix, 0) != 0 &&
+ ((re.Match(full_name) && !is_negative_filter) ||
+ (!re.Match(full_name) && is_negative_filter))) {
benchmarks->push_back(std::move(instance));
++per_family_instance_index;
@@ -199,12 +204,14 @@
// Benchmark
//=============================================================================//
-Benchmark::Benchmark(const char* name)
+Benchmark::Benchmark(const std::string& name)
: name_(name),
aggregation_report_mode_(ARM_Unspecified),
- time_unit_(kNanosecond),
+ time_unit_(GetDefaultTimeUnit()),
+ use_default_time_unit_(true),
range_multiplier_(kRangeMultiplier),
min_time_(0),
+ min_warmup_time_(0),
iterations_(0),
repetitions_(0),
measure_process_cpu_time_(false),
@@ -223,7 +230,7 @@
Benchmark::~Benchmark() {}
Benchmark* Benchmark::Name(const std::string& name) {
- SetName(name.c_str());
+ SetName(name);
return this;
}
@@ -235,6 +242,7 @@
Benchmark* Benchmark::Unit(TimeUnit unit) {
time_unit_ = unit;
+ use_default_time_unit_ = false;
return this;
}
@@ -348,9 +356,17 @@
return this;
}
+Benchmark* Benchmark::MinWarmUpTime(double t) {
+ BM_CHECK(t >= 0.0);
+ BM_CHECK(iterations_ == 0);
+ min_warmup_time_ = t;
+ return this;
+}
+
Benchmark* Benchmark::Iterations(IterationCount n) {
BM_CHECK(n > 0);
BM_CHECK(IsZero(min_time_));
+ BM_CHECK(IsZero(min_warmup_time_));
iterations_ = n;
return this;
}
@@ -452,7 +468,9 @@
return this;
}
-void Benchmark::SetName(const char* name) { name_ = name; }
+void Benchmark::SetName(const std::string& name) { name_ = name; }
+
+const char* Benchmark::GetName() const { return name_.c_str(); }
int Benchmark::ArgsCnt() const {
if (args_.empty()) {
@@ -462,6 +480,16 @@
return static_cast<int>(args_.front().size());
}
+const char* Benchmark::GetArgName(int arg) const {
+ BM_CHECK_GE(arg, 0);
+ BM_CHECK_LT(arg, static_cast<int>(arg_names_.size()));
+ return arg_names_[arg].c_str();
+}
+
+TimeUnit Benchmark::GetTimeUnit() const {
+ return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_;
+}
+
//=============================================================================//
// FunctionBenchmark
//=============================================================================//
diff --git a/third-party/benchmark/src/benchmark_register.h b/third-party/benchmark/src/benchmark_register.h
index d3f4974..53367c7 100644
--- a/third-party/benchmark/src/benchmark_register.h
+++ b/third-party/benchmark/src/benchmark_register.h
@@ -1,6 +1,7 @@
#ifndef BENCHMARK_REGISTER_H
#define BENCHMARK_REGISTER_H
+#include <algorithm>
#include <limits>
#include <vector>
@@ -23,7 +24,7 @@
static const T kmax = std::numeric_limits<T>::max();
// Space out the values in multiples of "mult"
- for (T i = static_cast<T>(1); i <= hi; i *= mult) {
+ for (T i = static_cast<T>(1); i <= hi; i *= static_cast<T>(mult)) {
if (i >= lo) {
dst->push_back(i);
}
@@ -32,7 +33,7 @@
if (i > kmax / mult) break;
}
- return dst->begin() + start_offset;
+ return dst->begin() + static_cast<int>(start_offset);
}
template <typename T>
diff --git a/third-party/benchmark/src/benchmark_runner.cc b/third-party/benchmark/src/benchmark_runner.cc
index eac807b..dcddb43 100644
--- a/third-party/benchmark/src/benchmark_runner.cc
+++ b/third-party/benchmark/src/benchmark_runner.cc
@@ -19,7 +19,7 @@
#include "internal_macros.h"
#ifndef BENCHMARK_OS_WINDOWS
-#ifndef BENCHMARK_OS_FUCHSIA
+#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -28,11 +28,14 @@
#include <algorithm>
#include <atomic>
+#include <climits>
+#include <cmath>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
+#include <limits>
#include <memory>
#include <string>
#include <thread>
@@ -61,7 +64,9 @@
namespace {
-static constexpr IterationCount kMaxIterations = 1000000000;
+static constexpr IterationCount kMaxIterations = 1000000000000;
+const double kDefaultMinTime =
+ std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr);
BenchmarkReporter::Run CreateRunReport(
const benchmark::internal::BenchmarkInstance& b,
@@ -75,8 +80,8 @@
report.run_name = b.name();
report.family_index = b.family_index();
report.per_family_instance_index = b.per_family_instance_index();
- report.error_occurred = results.has_error_;
- report.error_message = results.error_message_;
+ report.skipped = results.skipped_;
+ report.skip_message = results.skip_message_;
report.report_label = results.report_label_;
// This is the total iterations across all threads.
report.iterations = results.iterations;
@@ -85,12 +90,13 @@
report.repetition_index = repetition_index;
report.repetitions = repeats;
- if (!report.error_occurred) {
+ if (!report.skipped) {
if (b.use_manual_time()) {
report.real_accumulated_time = results.manual_time_used;
} else {
report.real_accumulated_time = results.real_time_used;
}
+ report.use_real_time_for_initial_big_o = b.use_manual_time();
report.cpu_accumulated_time = results.cpu_time_used;
report.complexity_n = results.complexity_n;
report.complexity = b.complexity();
@@ -103,7 +109,7 @@
report.memory_result = memory_result;
report.allocs_per_iter =
memory_iterations ? static_cast<double>(memory_result->num_allocs) /
- memory_iterations
+ static_cast<double>(memory_iterations)
: 0;
}
@@ -122,9 +128,10 @@
b->measure_process_cpu_time()
? internal::ThreadTimer::CreateProcessCpuTime()
: internal::ThreadTimer::Create());
+
State st =
b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
- BM_CHECK(st.error_occurred() || st.iterations() >= st.max_iterations)
+ BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations)
<< "Benchmark returned before State::KeepRunning() returned false!";
{
MutexLock l(manager->GetBenchmarkMutex());
@@ -139,24 +146,100 @@
manager->NotifyThreadComplete();
}
+double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b,
+ const BenchTimeType& iters_or_time) {
+ if (!IsZero(b.min_time())) return b.min_time();
+ // If the flag was used to specify number of iters, then return the default
+ // min_time.
+ if (iters_or_time.tag == BenchTimeType::ITERS) return kDefaultMinTime;
+
+ return iters_or_time.time;
+}
+
+IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b,
+ const BenchTimeType& iters_or_time) {
+ if (b.iterations() != 0) return b.iterations();
+
+ // We've already concluded that this flag is currently used to pass
+ // iters but do a check here again anyway.
+ BM_CHECK(iters_or_time.tag == BenchTimeType::ITERS);
+ return iters_or_time.iters;
+}
+
} // end namespace
+BenchTimeType ParseBenchMinTime(const std::string& value) {
+ BenchTimeType ret;
+
+ if (value.empty()) {
+ ret.tag = BenchTimeType::TIME;
+ ret.time = 0.0;
+ return ret;
+ }
+
+ if (value.back() == 'x') {
+ char* p_end;
+ // Reset errno before it's changed by strtol.
+ errno = 0;
+ IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10);
+
+ // After a valid parse, p_end should have been set to
+ // point to the 'x' suffix.
+ BM_CHECK(errno == 0 && p_end != nullptr && *p_end == 'x')
+ << "Malformed iters value passed to --benchmark_min_time: `" << value
+ << "`. Expected --benchmark_min_time=<integer>x.";
+
+ ret.tag = BenchTimeType::ITERS;
+ ret.iters = num_iters;
+ return ret;
+ }
+
+ bool has_suffix = value.back() == 's';
+ if (!has_suffix) {
+ BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. "
+ "Eg., `30s` for 30-seconds.";
+ }
+
+ char* p_end;
+ // Reset errno before it's changed by strtod.
+ errno = 0;
+ double min_time = std::strtod(value.c_str(), &p_end);
+
+ // After a successful parse, p_end should point to the suffix 's',
+ // or the end of the string if the suffix was omitted.
+ BM_CHECK(errno == 0 && p_end != nullptr &&
+ ((has_suffix && *p_end == 's') || *p_end == '\0'))
+ << "Malformed seconds value passed to --benchmark_min_time: `" << value
+ << "`. Expected --benchmark_min_time=<float>x.";
+
+ ret.tag = BenchTimeType::TIME;
+ ret.time = min_time;
+
+ return ret;
+}
+
BenchmarkRunner::BenchmarkRunner(
const benchmark::internal::BenchmarkInstance& b_,
+ PerfCountersMeasurement* pcm_,
BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
: b(b_),
reports_for_family(reports_for_family_),
- min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time),
+ parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)),
+ min_time(ComputeMinTime(b_, parsed_benchtime_flag)),
+ min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0)
+ ? b.min_warmup_time()
+ : FLAGS_benchmark_min_warmup_time),
+ warmup_done(!(min_warmup_time > 0.0)),
repeats(b.repetitions() != 0 ? b.repetitions()
: FLAGS_benchmark_repetitions),
- has_explicit_iteration_count(b.iterations() != 0),
+ has_explicit_iteration_count(b.iterations() != 0 ||
+ parsed_benchtime_flag.tag ==
+ BenchTimeType::ITERS),
pool(b.threads() - 1),
- iters(has_explicit_iteration_count ? b.iterations() : 1),
- perf_counters_measurement(
- PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))),
- perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
- ? &perf_counters_measurement
- : nullptr) {
+ iters(has_explicit_iteration_count
+ ? ComputeIters(b_, parsed_benchtime_flag)
+ : 1),
+ perf_counters_measurement_ptr(pcm_) {
run_results.display_report_aggregates_only =
(FLAGS_benchmark_report_aggregates_only ||
FLAGS_benchmark_display_aggregates_only);
@@ -169,7 +252,7 @@
run_results.file_report_aggregates_only =
(b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
BM_CHECK(FLAGS_benchmark_perf_counters.empty() ||
- perf_counters_measurement.IsValid())
+ (perf_counters_measurement_ptr->num_counters() == 0))
<< "Perf counters were requested but could not be set up.";
}
}
@@ -232,20 +315,20 @@
const IterationResults& i) const {
// See how much iterations should be increased by.
// Note: Avoid division by zero with max(seconds, 1ns).
- double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9);
+ double multiplier = GetMinTimeToApply() * 1.4 / std::max(i.seconds, 1e-9);
// If our last run was at least 10% of FLAGS_benchmark_min_time then we
// use the multiplier directly.
// Otherwise we use at most 10 times expansion.
// NOTE: When the last run was at least 10% of the min time the max
// expansion should be 14x.
- bool is_significant = (i.seconds / min_time) > 0.1;
+ const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1;
multiplier = is_significant ? multiplier : 10.0;
// So what seems to be the sufficiently-large iteration count? Round up.
const IterationCount max_next_iters = static_cast<IterationCount>(
- std::lround(std::max(multiplier * static_cast<double>(i.iters),
- static_cast<double>(i.iters) + 1.0)));
- // But we do have *some* sanity limits though..
+ std::llround(std::max(multiplier * static_cast<double>(i.iters),
+ static_cast<double>(i.iters) + 1.0)));
+ // But we do have *some* limits though..
const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
@@ -257,21 +340,80 @@
// Determine if this run should be reported;
// Either it has run for a sufficient amount of time
// or because an error was reported.
- return i.results.has_error_ ||
+ return i.results.skipped_ ||
i.iters >= kMaxIterations || // Too many iterations already.
- i.seconds >= min_time || // The elapsed time is large enough.
+ i.seconds >=
+ GetMinTimeToApply() || // The elapsed time is large enough.
// CPU time is specified but the elapsed real time greatly exceeds
// the minimum time.
- // Note that user provided timers are except from this sanity check.
- ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time());
+ // Note that user provided timers are except from this test.
+ ((i.results.real_time_used >= 5 * GetMinTimeToApply()) &&
+ !b.use_manual_time());
+}
+
+double BenchmarkRunner::GetMinTimeToApply() const {
+ // In order to re-use functionality to run and measure benchmarks for running
+ // a warmup phase of the benchmark, we need a way of telling whether to apply
+ // min_time or min_warmup_time. This function will figure out if we are in the
+ // warmup phase and therefore need to apply min_warmup_time or if we already
+ // in the benchmarking phase and min_time needs to be applied.
+ return warmup_done ? min_time : min_warmup_time;
+}
+
+void BenchmarkRunner::FinishWarmUp(const IterationCount& i) {
+ warmup_done = true;
+ iters = i;
+}
+
+void BenchmarkRunner::RunWarmUp() {
+ // Use the same mechanisms for warming up the benchmark as used for actually
+ // running and measuring the benchmark.
+ IterationResults i_warmup;
+ // Dont use the iterations determined in the warmup phase for the actual
+ // measured benchmark phase. While this may be a good starting point for the
+ // benchmark and it would therefore get rid of the need to figure out how many
+ // iterations are needed if min_time is set again, this may also be a complete
+ // wrong guess since the warmup loops might be considerably slower (e.g
+ // because of caching effects).
+ const IterationCount i_backup = iters;
+
+ for (;;) {
+ b.Setup();
+ i_warmup = DoNIterations();
+ b.Teardown();
+
+ const bool finish = ShouldReportIterationResults(i_warmup);
+
+ if (finish) {
+ FinishWarmUp(i_backup);
+ break;
+ }
+
+ // Although we are running "only" a warmup phase where running enough
+ // iterations at once without measuring time isn't as important as it is for
+ // the benchmarking phase, we still do it the same way as otherwise it is
+ // very confusing for the user to know how to choose a proper value for
+ // min_warmup_time if a different approach on running it is used.
+ iters = PredictNumItersNeeded(i_warmup);
+ assert(iters > i_warmup.iters &&
+ "if we did more iterations than we want to do the next time, "
+ "then we should have accepted the current iteration run.");
+ }
}
void BenchmarkRunner::DoOneRepetition() {
assert(HasRepeatsRemaining() && "Already done all repetitions?");
const bool is_the_first_repetition = num_repetitions_done == 0;
- IterationResults i;
+ // In case a warmup phase is requested by the benchmark, run it now.
+ // After running the warmup phase the BenchmarkRunner should be in a state as
+ // this warmup never happened except the fact that warmup_done is set. Every
+ // other manipulation of the BenchmarkRunner instance would be a bug! Please
+ // fix it.
+ if (!warmup_done) RunWarmUp();
+
+ IterationResults i;
// We *may* be gradually increasing the length (iteration count)
// of the benchmark until we decide the results are significant.
// And once we do, we report those last results and exit.
@@ -324,10 +466,7 @@
manager->WaitForAllThreads();
manager.reset();
b.Teardown();
-
- BENCHMARK_DISABLE_DEPRECATED_WARNING
- memory_manager->Stop(memory_result);
- BENCHMARK_RESTORE_DEPRECATED_WARNING
+ memory_manager->Stop(*memory_result);
}
// Ok, now actually report.
@@ -337,7 +476,7 @@
if (reports_for_family) {
++reports_for_family->num_runs_done;
- if (!report.error_occurred) reports_for_family->Runs.push_back(report);
+ if (!report.skipped) reports_for_family->Runs.push_back(report);
}
run_results.non_aggregates.push_back(report);
diff --git a/third-party/benchmark/src/benchmark_runner.h b/third-party/benchmark/src/benchmark_runner.h
index 752eefd..db2fa04 100644
--- a/third-party/benchmark/src/benchmark_runner.h
+++ b/third-party/benchmark/src/benchmark_runner.h
@@ -25,7 +25,8 @@
namespace benchmark {
-BM_DECLARE_double(benchmark_min_time);
+BM_DECLARE_string(benchmark_min_time);
+BM_DECLARE_double(benchmark_min_warmup_time);
BM_DECLARE_int32(benchmark_repetitions);
BM_DECLARE_bool(benchmark_report_aggregates_only);
BM_DECLARE_bool(benchmark_display_aggregates_only);
@@ -43,9 +44,21 @@
bool file_report_aggregates_only = false;
};
+struct BENCHMARK_EXPORT BenchTimeType {
+ enum { ITERS, TIME } tag;
+ union {
+ IterationCount iters;
+ double time;
+ };
+};
+
+BENCHMARK_EXPORT
+BenchTimeType ParseBenchMinTime(const std::string& value);
+
class BenchmarkRunner {
public:
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
+ benchmark::internal::PerfCountersMeasurement* pmc_,
BenchmarkReporter::PerFamilyRunReports* reports_for_family);
int GetNumRepeats() const { return repeats; }
@@ -62,13 +75,22 @@
return reports_for_family;
}
+ double GetMinTime() const { return min_time; }
+
+ bool HasExplicitIters() const { return has_explicit_iteration_count; }
+
+ IterationCount GetIters() const { return iters; }
+
private:
RunResults run_results;
const benchmark::internal::BenchmarkInstance& b;
BenchmarkReporter::PerFamilyRunReports* reports_for_family;
+ BenchTimeType parsed_benchtime_flag;
const double min_time;
+ const double min_warmup_time;
+ bool warmup_done;
const int repeats;
const bool has_explicit_iteration_count;
@@ -82,8 +104,7 @@
// So only the first repetition has to find/calculate it,
// the other repetitions will just use that precomputed iteration count.
- PerfCountersMeasurement perf_counters_measurement;
- PerfCountersMeasurement* const perf_counters_measurement_ptr;
+ PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr;
struct IterationResults {
internal::ThreadManager::Result results;
@@ -95,6 +116,12 @@
IterationCount PredictNumItersNeeded(const IterationResults& i) const;
bool ShouldReportIterationResults(const IterationResults& i) const;
+
+ double GetMinTimeToApply() const;
+
+ void FinishWarmUp(const IterationCount& i);
+
+ void RunWarmUp();
};
} // namespace internal
diff --git a/third-party/benchmark/src/check.cc b/third-party/benchmark/src/check.cc
new file mode 100644
index 0000000..5f7526e
--- /dev/null
+++ b/third-party/benchmark/src/check.cc
@@ -0,0 +1,11 @@
+#include "check.h"
+
+namespace benchmark {
+namespace internal {
+
+static AbortHandlerT* handler = &std::abort;
+
+BENCHMARK_EXPORT AbortHandlerT*& GetAbortHandler() { return handler; }
+
+} // namespace internal
+} // namespace benchmark
diff --git a/third-party/benchmark/src/check.h b/third-party/benchmark/src/check.h
index 0efd13f..c1cd5e8 100644
--- a/third-party/benchmark/src/check.h
+++ b/third-party/benchmark/src/check.h
@@ -5,18 +5,34 @@
#include <cstdlib>
#include <ostream>
+#include "benchmark/export.h"
#include "internal_macros.h"
#include "log.h"
+#if defined(__GNUC__) || defined(__clang__)
+#define BENCHMARK_NOEXCEPT noexcept
+#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
+#elif defined(_MSC_VER) && !defined(__clang__)
+#if _MSC_VER >= 1900
+#define BENCHMARK_NOEXCEPT noexcept
+#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
+#else
+#define BENCHMARK_NOEXCEPT
+#define BENCHMARK_NOEXCEPT_OP(x)
+#endif
+#define __func__ __FUNCTION__
+#else
+#define BENCHMARK_NOEXCEPT
+#define BENCHMARK_NOEXCEPT_OP(x)
+#endif
+
namespace benchmark {
namespace internal {
typedef void(AbortHandlerT)();
-inline AbortHandlerT*& GetAbortHandler() {
- static AbortHandlerT* handler = &std::abort;
- return handler;
-}
+BENCHMARK_EXPORT
+AbortHandlerT*& GetAbortHandler();
BENCHMARK_NORETURN inline void CallAbortHandler() {
GetAbortHandler()();
@@ -36,10 +52,17 @@
LogType& GetLog() { return log_; }
+#if defined(COMPILER_MSVC)
+#pragma warning(push)
+#pragma warning(disable : 4722)
+#endif
BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
log_ << std::endl;
CallAbortHandler();
}
+#if defined(COMPILER_MSVC)
+#pragma warning(pop)
+#endif
CheckHandler& operator=(const CheckHandler&) = delete;
CheckHandler(const CheckHandler&) = delete;
diff --git a/third-party/benchmark/src/colorprint.cc b/third-party/benchmark/src/colorprint.cc
index 1a000a0..abc71492 100644
--- a/third-party/benchmark/src/colorprint.cc
+++ b/third-party/benchmark/src/colorprint.cc
@@ -96,18 +96,18 @@
// currently there is no error handling for failure, so this is hack.
BM_CHECK(ret >= 0);
- if (ret == 0) // handle empty expansion
+ if (ret == 0) { // handle empty expansion
return {};
- else if (static_cast<size_t>(ret) < size)
- return local_buff;
- else {
- // we did not provide a long enough buffer on our first attempt.
- size = static_cast<size_t>(ret) + 1; // + 1 for the null byte
- std::unique_ptr<char[]> buff(new char[size]);
- ret = vsnprintf(buff.get(), size, msg, args);
- BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size);
- return buff.get();
}
+ if (static_cast<size_t>(ret) < size) {
+ return local_buff;
+ }
+ // we did not provide a long enough buffer on our first attempt.
+ size = static_cast<size_t>(ret) + 1; // + 1 for the null byte
+ std::unique_ptr<char[]> buff(new char[size]);
+ ret = vsnprintf(buff.get(), size, msg, args);
+ BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size);
+ return buff.get();
}
std::string FormatString(const char* msg, ...) {
@@ -140,12 +140,12 @@
// We need to flush the stream buffers into the console before each
// SetConsoleTextAttribute call lest it affect the text that is already
// printed but has not yet reached the console.
- fflush(stdout);
+ out.flush();
SetConsoleTextAttribute(stdout_handle,
GetPlatformColorCode(color) | FOREGROUND_INTENSITY);
- vprintf(fmt, args);
+ out << FormatString(fmt, args);
- fflush(stdout);
+ out.flush();
// Restores the text color.
SetConsoleTextAttribute(stdout_handle, old_color_attrs);
#else
@@ -163,12 +163,24 @@
#else
// On non-Windows platforms, we rely on the TERM variable. This list of
// supported TERM values is copied from Google Test:
- // <https://github.com/google/googletest/blob/master/googletest/src/gtest.cc#L2925>.
+ // <https://github.com/google/googletest/blob/v1.13.0/googletest/src/gtest.cc#L3225-L3259>.
const char* const SUPPORTED_TERM_VALUES[] = {
- "xterm", "xterm-color", "xterm-256color",
- "screen", "screen-256color", "tmux",
- "tmux-256color", "rxvt-unicode", "rxvt-unicode-256color",
- "linux", "cygwin",
+ "xterm",
+ "xterm-color",
+ "xterm-256color",
+ "screen",
+ "screen-256color",
+ "tmux",
+ "tmux-256color",
+ "rxvt-unicode",
+ "rxvt-unicode-256color",
+ "linux",
+ "cygwin",
+ "xterm-kitty",
+ "alacritty",
+ "foot",
+ "foot-extra",
+ "wezterm",
};
const char* const term = getenv("TERM");
diff --git a/third-party/benchmark/src/commandlineflags.cc b/third-party/benchmark/src/commandlineflags.cc
index 9615e35..dcb4149 100644
--- a/third-party/benchmark/src/commandlineflags.cc
+++ b/third-party/benchmark/src/commandlineflags.cc
@@ -121,12 +121,14 @@
} // namespace
+BENCHMARK_EXPORT
bool BoolFromEnv(const char* flag, bool default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str);
}
+BENCHMARK_EXPORT
int32_t Int32FromEnv(const char* flag, int32_t default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
@@ -139,6 +141,7 @@
return value;
}
+BENCHMARK_EXPORT
double DoubleFromEnv(const char* flag, double default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
@@ -151,12 +154,14 @@
return value;
}
+BENCHMARK_EXPORT
const char* StringFromEnv(const char* flag, const char* default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value = getenv(env_var.c_str());
return value == nullptr ? default_val : value;
}
+BENCHMARK_EXPORT
std::map<std::string, std::string> KvPairsFromEnv(
const char* flag, std::map<std::string, std::string> default_val) {
const std::string env_var = FlagToEnvVar(flag);
@@ -201,6 +206,7 @@
return flag_end + 1;
}
+BENCHMARK_EXPORT
bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, true);
@@ -213,6 +219,7 @@
return true;
}
+BENCHMARK_EXPORT
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
@@ -225,6 +232,7 @@
value);
}
+BENCHMARK_EXPORT
bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
@@ -237,6 +245,7 @@
value);
}
+BENCHMARK_EXPORT
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
@@ -248,6 +257,7 @@
return true;
}
+BENCHMARK_EXPORT
bool ParseKeyValueFlag(const char* str, const char* flag,
std::map<std::string, std::string>* value) {
const char* const value_str = ParseFlagValue(str, flag, false);
@@ -263,23 +273,26 @@
return true;
}
+BENCHMARK_EXPORT
bool IsFlag(const char* str, const char* flag) {
return (ParseFlagValue(str, flag, true) != nullptr);
}
+BENCHMARK_EXPORT
bool IsTruthyFlagValue(const std::string& value) {
if (value.size() == 1) {
char v = value[0];
return isalnum(v) &&
!(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N');
- } else if (!value.empty()) {
+ }
+ if (!value.empty()) {
std::string value_lower(value);
std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(),
[](char c) { return static_cast<char>(::tolower(c)); });
return !(value_lower == "false" || value_lower == "no" ||
value_lower == "off");
- } else
- return true;
+ }
+ return true;
}
} // end namespace benchmark
diff --git a/third-party/benchmark/src/commandlineflags.h b/third-party/benchmark/src/commandlineflags.h
index 5baaf11..7882628 100644
--- a/third-party/benchmark/src/commandlineflags.h
+++ b/third-party/benchmark/src/commandlineflags.h
@@ -5,28 +5,33 @@
#include <map>
#include <string>
+#include "benchmark/export.h"
+
// Macro for referencing flags.
#define FLAG(name) FLAGS_##name
// Macros for declaring flags.
-#define BM_DECLARE_bool(name) extern bool FLAG(name)
-#define BM_DECLARE_int32(name) extern int32_t FLAG(name)
-#define BM_DECLARE_double(name) extern double FLAG(name)
-#define BM_DECLARE_string(name) extern std::string FLAG(name)
+#define BM_DECLARE_bool(name) BENCHMARK_EXPORT extern bool FLAG(name)
+#define BM_DECLARE_int32(name) BENCHMARK_EXPORT extern int32_t FLAG(name)
+#define BM_DECLARE_double(name) BENCHMARK_EXPORT extern double FLAG(name)
+#define BM_DECLARE_string(name) BENCHMARK_EXPORT extern std::string FLAG(name)
#define BM_DECLARE_kvpairs(name) \
- extern std::map<std::string, std::string> FLAG(name)
+ BENCHMARK_EXPORT extern std::map<std::string, std::string> FLAG(name)
// Macros for defining flags.
#define BM_DEFINE_bool(name, default_val) \
- bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val)
+ BENCHMARK_EXPORT bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val)
#define BM_DEFINE_int32(name, default_val) \
- int32_t FLAG(name) = benchmark::Int32FromEnv(#name, default_val)
+ BENCHMARK_EXPORT int32_t FLAG(name) = \
+ benchmark::Int32FromEnv(#name, default_val)
#define BM_DEFINE_double(name, default_val) \
- double FLAG(name) = benchmark::DoubleFromEnv(#name, default_val)
+ BENCHMARK_EXPORT double FLAG(name) = \
+ benchmark::DoubleFromEnv(#name, default_val)
#define BM_DEFINE_string(name, default_val) \
- std::string FLAG(name) = benchmark::StringFromEnv(#name, default_val)
-#define BM_DEFINE_kvpairs(name, default_val) \
- std::map<std::string, std::string> FLAG(name) = \
+ BENCHMARK_EXPORT std::string FLAG(name) = \
+ benchmark::StringFromEnv(#name, default_val)
+#define BM_DEFINE_kvpairs(name, default_val) \
+ BENCHMARK_EXPORT std::map<std::string, std::string> FLAG(name) = \
benchmark::KvPairsFromEnv(#name, default_val)
namespace benchmark {
@@ -35,6 +40,7 @@
//
// If the variable exists, returns IsTruthyFlagValue() value; if not,
// returns the given default value.
+BENCHMARK_EXPORT
bool BoolFromEnv(const char* flag, bool default_val);
// Parses an Int32 from the environment variable corresponding to the given
@@ -42,6 +48,7 @@
//
// If the variable exists, returns ParseInt32() value; if not, returns
// the given default value.
+BENCHMARK_EXPORT
int32_t Int32FromEnv(const char* flag, int32_t default_val);
// Parses an Double from the environment variable corresponding to the given
@@ -49,6 +56,7 @@
//
// If the variable exists, returns ParseDouble(); if not, returns
// the given default value.
+BENCHMARK_EXPORT
double DoubleFromEnv(const char* flag, double default_val);
// Parses a string from the environment variable corresponding to the given
@@ -56,6 +64,7 @@
//
// If variable exists, returns its value; if not, returns
// the given default value.
+BENCHMARK_EXPORT
const char* StringFromEnv(const char* flag, const char* default_val);
// Parses a set of kvpairs from the environment variable corresponding to the
@@ -63,6 +72,7 @@
//
// If variable exists, returns its value; if not, returns
// the given default value.
+BENCHMARK_EXPORT
std::map<std::string, std::string> KvPairsFromEnv(
const char* flag, std::map<std::string, std::string> default_val);
@@ -75,40 +85,47 @@
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
+BENCHMARK_EXPORT
bool ParseBoolFlag(const char* str, const char* flag, bool* value);
// Parses a string for an Int32 flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
+BENCHMARK_EXPORT
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
// Parses a string for a Double flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
+BENCHMARK_EXPORT
bool ParseDoubleFlag(const char* str, const char* flag, double* value);
// Parses a string for a string flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
+BENCHMARK_EXPORT
bool ParseStringFlag(const char* str, const char* flag, std::string* value);
// Parses a string for a kvpairs flag in the form "--flag=key=value,key=value"
//
// On success, stores the value of the flag in *value and returns true. On
// failure returns false, though *value may have been mutated.
+BENCHMARK_EXPORT
bool ParseKeyValueFlag(const char* str, const char* flag,
std::map<std::string, std::string>* value);
// Returns true if the string matches the flag.
+BENCHMARK_EXPORT
bool IsFlag(const char* str, const char* flag);
// Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or
// some non-alphanumeric character. Also returns false if the value matches
// one of 'no', 'false', 'off' (case-insensitive). As a special case, also
// returns true if value is the empty string.
+BENCHMARK_EXPORT
bool IsTruthyFlagValue(const std::string& value);
} // end namespace benchmark
diff --git a/third-party/benchmark/src/complexity.cc b/third-party/benchmark/src/complexity.cc
index 825c573..eee3122 100644
--- a/third-party/benchmark/src/complexity.cc
+++ b/third-party/benchmark/src/complexity.cc
@@ -37,12 +37,14 @@
return [](IterationCount n) -> double { return std::pow(n, 3); };
case oLogN:
/* Note: can't use log2 because Android's GNU STL lacks it */
- return
- [](IterationCount n) { return kLog2E * log(static_cast<double>(n)); };
+ return [](IterationCount n) {
+ return kLog2E * std::log(static_cast<double>(n));
+ };
case oNLogN:
/* Note: can't use log2 because Android's GNU STL lacks it */
return [](IterationCount n) {
- return kLog2E * n * log(static_cast<double>(n));
+ return kLog2E * static_cast<double>(n) *
+ std::log(static_cast<double>(n));
};
case o1:
default:
@@ -75,12 +77,12 @@
// given by the lambda expression.
// - n : Vector containing the size of the benchmark tests.
// - time : Vector containing the times for the benchmark tests.
-// - fitting_curve : lambda expression (e.g. [](int64_t n) {return n; };).
+// - fitting_curve : lambda expression (e.g. [](ComplexityN n) {return n; };).
// For a deeper explanation on the algorithm logic, please refer to
// https://en.wikipedia.org/wiki/Least_squares#Least_squares,_regression_analysis_and_statistics
-LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
+LeastSq MinimalLeastSq(const std::vector<ComplexityN>& n,
const std::vector<double>& time,
BigOFunc* fitting_curve) {
double sigma_gn_squared = 0.0;
@@ -105,12 +107,12 @@
double rms = 0.0;
for (size_t i = 0; i < n.size(); ++i) {
double fit = result.coef * fitting_curve(n[i]);
- rms += pow((time[i] - fit), 2);
+ rms += std::pow((time[i] - fit), 2);
}
// Normalized RMS by the mean of the observed values
- double mean = sigma_time / n.size();
- result.rms = sqrt(rms / n.size()) / mean;
+ double mean = sigma_time / static_cast<double>(n.size());
+ result.rms = std::sqrt(rms / static_cast<double>(n.size())) / mean;
return result;
}
@@ -122,7 +124,7 @@
// - complexity : If different than oAuto, the fitting curve will stick to
// this one. If it is oAuto, it will be calculated the best
// fitting curve.
-LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
+LeastSq MinimalLeastSq(const std::vector<ComplexityN>& n,
const std::vector<double>& time, const BigO complexity) {
BM_CHECK_EQ(n.size(), time.size());
BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two
@@ -162,7 +164,7 @@
if (reports.size() < 2) return results;
// Accumulators.
- std::vector<int64_t> n;
+ std::vector<ComplexityN> n;
std::vector<double> real_time;
std::vector<double> cpu_time;
@@ -171,8 +173,10 @@
BM_CHECK_GT(run.complexity_n, 0)
<< "Did you forget to call SetComplexityN?";
n.push_back(run.complexity_n);
- real_time.push_back(run.real_accumulated_time / run.iterations);
- cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
+ real_time.push_back(run.real_accumulated_time /
+ static_cast<double>(run.iterations));
+ cpu_time.push_back(run.cpu_accumulated_time /
+ static_cast<double>(run.iterations));
}
LeastSq result_cpu;
@@ -182,8 +186,19 @@
result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda);
result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda);
} else {
- result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity);
- result_real = MinimalLeastSq(n, real_time, result_cpu.complexity);
+ const BigO* InitialBigO = &reports[0].complexity;
+ const bool use_real_time_for_initial_big_o =
+ reports[0].use_real_time_for_initial_big_o;
+ if (use_real_time_for_initial_big_o) {
+ result_real = MinimalLeastSq(n, real_time, *InitialBigO);
+ InitialBigO = &result_real.complexity;
+ // The Big-O complexity for CPU time must have the same Big-O function!
+ }
+ result_cpu = MinimalLeastSq(n, cpu_time, *InitialBigO);
+ InitialBigO = &result_cpu.complexity;
+ if (!use_real_time_for_initial_big_o) {
+ result_real = MinimalLeastSq(n, real_time, *InitialBigO);
+ }
}
// Drop the 'args' when reporting complexity.
diff --git a/third-party/benchmark/src/complexity.h b/third-party/benchmark/src/complexity.h
index df29b48..0a0679b 100644
--- a/third-party/benchmark/src/complexity.h
+++ b/third-party/benchmark/src/complexity.h
@@ -31,7 +31,7 @@
const std::vector<BenchmarkReporter::Run>& reports);
// This data structure will contain the result returned by MinimalLeastSq
-// - coef : Estimated coeficient for the high-order term as
+// - coef : Estimated coefficient for the high-order term as
// interpolated from data.
// - rms : Normalized Root Mean Squared Error.
// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability
diff --git a/third-party/benchmark/src/console_reporter.cc b/third-party/benchmark/src/console_reporter.cc
index 04cc0b7..35c3de2 100644
--- a/third-party/benchmark/src/console_reporter.cc
+++ b/third-party/benchmark/src/console_reporter.cc
@@ -33,6 +33,7 @@
namespace benchmark {
+BENCHMARK_EXPORT
bool ConsoleReporter::ReportContext(const Context& context) {
name_field_width_ = context.name_field_width;
printed_header_ = false;
@@ -41,17 +42,22 @@
PrintBasicContext(&GetErrorStream(), context);
#ifdef BENCHMARK_OS_WINDOWS
- if ((output_options_ & OO_Color) && &std::cout != &GetOutputStream()) {
- GetErrorStream()
- << "Color printing is only supported for stdout on windows."
- " Disabling color printing\n";
- output_options_ = static_cast<OutputOptions>(output_options_ & ~OO_Color);
+ if ((output_options_ & OO_Color)) {
+ auto stdOutBuf = std::cout.rdbuf();
+ auto outStreamBuf = GetOutputStream().rdbuf();
+ if (stdOutBuf != outStreamBuf) {
+ GetErrorStream()
+ << "Color printing is only supported for stdout on windows."
+ " Disabling color printing\n";
+ output_options_ = static_cast<OutputOptions>(output_options_ & ~OO_Color);
+ }
}
#endif
return true;
}
+BENCHMARK_EXPORT
void ConsoleReporter::PrintHeader(const Run& run) {
std::string str =
FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_),
@@ -69,6 +75,7 @@
GetOutputStream() << line << "\n" << str << "\n" << line << "\n";
}
+BENCHMARK_EXPORT
void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
for (const auto& run : reports) {
// print the header:
@@ -99,6 +106,9 @@
}
static std::string FormatTime(double time) {
+ // For the time columns of the console printer 13 digits are reserved. One of
+ // them is a space and max two of them are the time unit (e.g ns). That puts
+ // us at 10 digits usable for the number.
// Align decimal places...
if (time < 1.0) {
return FormatString("%10.3f", time);
@@ -109,9 +119,15 @@
if (time < 100.0) {
return FormatString("%10.1f", time);
}
+ // Assuming the time is at max 9.9999e+99 and we have 10 digits for the
+ // number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print.
+ if (time > 9999999999 /*max 10 digit number*/) {
+ return FormatString("%1.4e", time);
+ }
return FormatString("%10.0f", time);
}
+BENCHMARK_EXPORT
void ConsoleReporter::PrintRunData(const Run& result) {
typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
auto& Out = GetOutputStream();
@@ -123,9 +139,13 @@
printer(Out, name_color, "%-*s ", name_field_width_,
result.benchmark_name().c_str());
- if (result.error_occurred) {
+ if (internal::SkippedWithError == result.skipped) {
printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
- result.error_message.c_str());
+ result.skip_message.c_str());
+ printer(Out, COLOR_DEFAULT, "\n");
+ return;
+ } else if (internal::SkippedWithMessage == result.skipped) {
+ printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str());
printer(Out, COLOR_DEFAULT, "\n");
return;
}
diff --git a/third-party/benchmark/src/counter.cc b/third-party/benchmark/src/counter.cc
index cf5b78e..aa14cd8 100644
--- a/third-party/benchmark/src/counter.cc
+++ b/third-party/benchmark/src/counter.cc
@@ -27,10 +27,10 @@
v /= num_threads;
}
if (c.flags & Counter::kIsIterationInvariant) {
- v *= iterations;
+ v *= static_cast<double>(iterations);
}
if (c.flags & Counter::kAvgIterations) {
- v /= iterations;
+ v /= static_cast<double>(iterations);
}
if (c.flags & Counter::kInvert) { // Invert is *always* last.
diff --git a/third-party/benchmark/src/csv_reporter.cc b/third-party/benchmark/src/csv_reporter.cc
index 1c5e9fa..4b39e2c 100644
--- a/third-party/benchmark/src/csv_reporter.cc
+++ b/third-party/benchmark/src/csv_reporter.cc
@@ -52,11 +52,13 @@
return '"' + tmp + '"';
}
+BENCHMARK_EXPORT
bool CSVReporter::ReportContext(const Context& context) {
PrintBasicContext(&GetErrorStream(), context);
return true;
}
+BENCHMARK_EXPORT
void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
std::ostream& Out = GetOutputStream();
@@ -103,13 +105,14 @@
}
}
+BENCHMARK_EXPORT
void CSVReporter::PrintRunData(const Run& run) {
std::ostream& Out = GetOutputStream();
Out << CsvEscape(run.benchmark_name()) << ",";
- if (run.error_occurred) {
+ if (run.skipped) {
Out << std::string(elements.size() - 3, ',');
- Out << "true,";
- Out << CsvEscape(run.error_message) << "\n";
+ Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ",";
+ Out << CsvEscape(run.skip_message) << "\n";
return;
}
@@ -119,13 +122,21 @@
}
Out << ",";
- Out << run.GetAdjustedRealTime() << ",";
- Out << run.GetAdjustedCPUTime() << ",";
+ if (run.run_type != Run::RT_Aggregate ||
+ run.aggregate_unit == StatisticUnit::kTime) {
+ Out << run.GetAdjustedRealTime() << ",";
+ Out << run.GetAdjustedCPUTime() << ",";
+ } else {
+ assert(run.aggregate_unit == StatisticUnit::kPercentage);
+ Out << run.real_accumulated_time << ",";
+ Out << run.cpu_accumulated_time << ",";
+ }
// Do not print timeLabel on bigO and RMS report
if (run.report_big_o) {
Out << GetBigOString(run.complexity);
- } else if (!run.report_rms) {
+ } else if (!run.report_rms &&
+ run.aggregate_unit != StatisticUnit::kPercentage) {
Out << GetTimeUnitString(run.time_unit);
}
Out << ",";
diff --git a/third-party/benchmark/src/cycleclock.h b/third-party/benchmark/src/cycleclock.h
index d65d32a..eff563e 100644
--- a/third-party/benchmark/src/cycleclock.h
+++ b/third-party/benchmark/src/cycleclock.h
@@ -36,7 +36,8 @@
// declarations of some other intrinsics, breaking compilation.
// Therefore, we simply declare __rdtsc ourselves. See also
// http://connect.microsoft.com/VisualStudio/feedback/details/262047
-#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64)
+#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \
+ !defined(_M_ARM64EC)
extern "C" uint64_t __rdtsc();
#pragma intrinsic(__rdtsc)
#endif
@@ -114,7 +115,7 @@
// when I know it will work. Otherwise, I'll use __rdtsc and hope
// the code is being compiled with a non-ancient compiler.
_asm rdtsc
-#elif defined(COMPILER_MSVC) && defined(_M_ARM64)
+#elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC))
// See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
// and https://reviews.llvm.org/D53115
int64_t virtual_timer_value;
@@ -132,7 +133,7 @@
// Native Client does not provide any API to access cycle counter.
// Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
- // because is provides nanosecond resolution (which is noticable at
+ // because is provides nanosecond resolution (which is noticeable at
// least for PNaCl modules running on x86 Mac & Linux).
// Initialize to always return 0 if clock_gettime fails.
struct timespec ts = {0, 0};
@@ -173,7 +174,7 @@
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
-#elif defined(__loongarch__)
+#elif defined(__loongarch__) || defined(__csky__)
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
@@ -188,15 +189,16 @@
#endif
return tsc;
#elif defined(__riscv) // RISC-V
- // Use RDCYCLE (and RDCYCLEH on riscv32)
+ // Use RDTIME (and RDTIMEH on riscv32).
+ // RDCYCLE is a privileged instruction since Linux 6.6.
#if __riscv_xlen == 32
uint32_t cycles_lo, cycles_hi0, cycles_hi1;
// This asm also includes the PowerPC overflow handling strategy, as above.
// Implemented in assembly because Clang insisted on branching.
asm volatile(
- "rdcycleh %0\n"
- "rdcycle %1\n"
- "rdcycleh %2\n"
+ "rdtimeh %0\n"
+ "rdtime %1\n"
+ "rdtimeh %2\n"
"sub %0, %0, %2\n"
"seqz %0, %0\n"
"sub %0, zero, %0\n"
@@ -205,17 +207,31 @@
return (static_cast<uint64_t>(cycles_hi1) << 32) | cycles_lo;
#else
uint64_t cycles;
- asm volatile("rdcycle %0" : "=r"(cycles));
+ asm volatile("rdtime %0" : "=r"(cycles));
return cycles;
#endif
#elif defined(__e2k__) || defined(__elbrus__)
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+#elif defined(__hexagon__)
+ uint64_t pcycle;
+ asm volatile("%0 = C15:14" : "=r"(pcycle));
+ return static_cast<double>(pcycle);
+#elif defined(__alpha__)
+ // Alpha has a cycle counter, the PCC register, but it is an unsigned 32-bit
+ // integer and thus wraps every ~4s, making using it for tick counts
+ // unreliable beyond this time range. The real-time clock is low-precision,
+ // roughtly ~1ms, but it is the only option that can reasonable count
+ // indefinitely.
+ struct timeval tv;
+ gettimeofday(&tv, nullptr);
+ return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#else
-// The soft failover to a generic implementation is automatic only for ARM.
-// For other platforms the developer is expected to make an attempt to create
-// a fast implementation and use generic version if nothing better is available.
+ // The soft failover to a generic implementation is automatic only for ARM.
+ // For other platforms the developer is expected to make an attempt to create
+ // a fast implementation and use generic version if nothing better is
+ // available.
#error You need to define CycleTimer for your OS and CPU
#endif
}
diff --git a/third-party/benchmark/src/internal_macros.h b/third-party/benchmark/src/internal_macros.h
index 91f367b..8dd7d0c 100644
--- a/third-party/benchmark/src/internal_macros.h
+++ b/third-party/benchmark/src/internal_macros.h
@@ -1,8 +1,6 @@
#ifndef BENCHMARK_INTERNAL_MACROS_H_
#define BENCHMARK_INTERNAL_MACROS_H_
-#include "benchmark/benchmark.h"
-
/* Needed to detect STL */
#include <cstdlib>
@@ -44,6 +42,19 @@
#define BENCHMARK_OS_CYGWIN 1
#elif defined(_WIN32)
#define BENCHMARK_OS_WINDOWS 1
+ // WINAPI_FAMILY_PARTITION is defined in winapifamily.h.
+ // We include windows.h which implicitly includes winapifamily.h for compatibility.
+ #ifndef NOMINMAX
+ #define NOMINMAX
+ #endif
+ #include <windows.h>
+ #if defined(WINAPI_FAMILY_PARTITION)
+ #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+ #define BENCHMARK_OS_WINDOWS_WIN32 1
+ #elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
+ #define BENCHMARK_OS_WINDOWS_RT 1
+ #endif
+ #endif
#if defined(__MINGW32__)
#define BENCHMARK_OS_MINGW 1
#endif
@@ -80,6 +91,8 @@
#define BENCHMARK_OS_QNX 1
#elif defined(__MVS__)
#define BENCHMARK_OS_ZOS 1
+#elif defined(__hexagon__)
+#define BENCHMARK_OS_QURT 1
#endif
#if defined(__ANDROID__) && defined(__GLIBCXX__)
diff --git a/third-party/benchmark/src/json_reporter.cc b/third-party/benchmark/src/json_reporter.cc
index e84a4ed..b8c8c94 100644
--- a/third-party/benchmark/src/json_reporter.cc
+++ b/third-party/benchmark/src/json_reporter.cc
@@ -28,10 +28,6 @@
#include "timers.h"
namespace benchmark {
-namespace internal {
-extern std::map<std::string, std::string>* global_context;
-}
-
namespace {
std::string StrEscape(const std::string& s) {
@@ -89,12 +85,6 @@
return ss.str();
}
-std::string FormatKV(std::string const& key, IterationCount value) {
- std::stringstream ss;
- ss << '"' << StrEscape(key) << "\": " << value;
- return ss.str();
-}
-
std::string FormatKV(std::string const& key, double value) {
std::stringstream ss;
ss << '"' << StrEscape(key) << "\": ";
@@ -177,15 +167,25 @@
}
out << "],\n";
+ out << indent << FormatKV("library_version", GetBenchmarkVersion());
+ out << ",\n";
+
#if defined(NDEBUG)
const char build_type[] = "release";
#else
const char build_type[] = "debug";
#endif
out << indent << FormatKV("library_build_type", build_type);
+ out << ",\n";
- if (internal::global_context != nullptr) {
- for (const auto& kv : *internal::global_context) {
+ // NOTE: our json schema is not strictly tied to the library version!
+ out << indent << FormatKV("json_schema_version", int64_t(1));
+
+ std::map<std::string, std::string>* global_context =
+ internal::GetGlobalContext();
+
+ if (global_context != nullptr) {
+ for (const auto& kv : *global_context) {
out << ",\n";
out << indent << FormatKV(kv.first, kv.second);
}
@@ -261,9 +261,12 @@
BENCHMARK_UNREACHABLE();
}()) << ",\n";
}
- if (run.error_occurred) {
- out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n";
- out << indent << FormatKV("error_message", run.error_message) << ",\n";
+ if (internal::SkippedWithError == run.skipped) {
+ out << indent << FormatKV("error_occurred", true) << ",\n";
+ out << indent << FormatKV("error_message", run.skip_message) << ",\n";
+ } else if (internal::SkippedWithMessage == run.skipped) {
+ out << indent << FormatKV("skipped", true) << ",\n";
+ out << indent << FormatKV("skip_message", run.skip_message) << ",\n";
}
if (!run.report_big_o && !run.report_rms) {
out << indent << FormatKV("iterations", run.iterations) << ",\n";
@@ -301,7 +304,8 @@
out << ",\n"
<< indent << FormatKV("max_bytes_used", memory_result.max_bytes_used);
- auto report_if_present = [&out, &indent](const char* label, int64_t val) {
+ auto report_if_present = [&out, &indent](const std::string& label,
+ int64_t val) {
if (val != MemoryManager::TombstoneValue)
out << ",\n" << indent << FormatKV(label, val);
};
diff --git a/third-party/benchmark/src/log.h b/third-party/benchmark/src/log.h
index 48c071a..9a21400 100644
--- a/third-party/benchmark/src/log.h
+++ b/third-party/benchmark/src/log.h
@@ -4,7 +4,12 @@
#include <iostream>
#include <ostream>
-#include "benchmark/benchmark.h"
+// NOTE: this is also defined in benchmark.h but we're trying to avoid a
+// dependency.
+// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
+#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
+#define BENCHMARK_HAS_CXX11
+#endif
namespace benchmark {
namespace internal {
@@ -23,7 +28,16 @@
private:
LogType(std::ostream* out) : out_(out) {}
std::ostream* out_;
- BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType);
+
+ // NOTE: we could use BENCHMARK_DISALLOW_COPY_AND_ASSIGN but we shouldn't have
+ // a dependency on benchmark.h from here.
+#ifndef BENCHMARK_HAS_CXX11
+ LogType(const LogType&);
+ LogType& operator=(const LogType&);
+#else
+ LogType(const LogType&) = delete;
+ LogType& operator=(const LogType&) = delete;
+#endif
};
template <class Tp>
@@ -47,13 +61,13 @@
}
inline LogType& GetNullLogInstance() {
- static LogType log(nullptr);
- return log;
+ static LogType null_log(static_cast<std::ostream*>(nullptr));
+ return null_log;
}
inline LogType& GetErrorLogInstance() {
- static LogType log(&std::clog);
- return log;
+ static LogType error_log(&std::clog);
+ return error_log;
}
inline LogType& GetLogInstanceForLevel(int level) {
diff --git a/third-party/benchmark/src/perf_counters.cc b/third-party/benchmark/src/perf_counters.cc
index b2ac768..d466e27e 100644
--- a/third-party/benchmark/src/perf_counters.cc
+++ b/third-party/benchmark/src/perf_counters.cc
@@ -15,6 +15,7 @@
#include "perf_counters.h"
#include <cstring>
+#include <memory>
#include <vector>
#if defined HAVE_LIBPFM
@@ -28,105 +29,254 @@
constexpr size_t PerfCounterValues::kMaxCounters;
#if defined HAVE_LIBPFM
+
+size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
+ // Create a pointer for multiple reads
+ const size_t bufsize = values_.size() * sizeof(values_[0]);
+ char* ptr = reinterpret_cast<char*>(values_.data());
+ size_t size = bufsize;
+ for (int lead : leaders) {
+ auto read_bytes = ::read(lead, ptr, size);
+ if (read_bytes >= ssize_t(sizeof(uint64_t))) {
+ // Actual data bytes are all bytes minus initial padding
+ std::size_t data_bytes = read_bytes - sizeof(uint64_t);
+ // This should be very cheap since it's in hot cache
+ std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
+ // Increment our counters
+ ptr += data_bytes;
+ size -= data_bytes;
+ } else {
+ int err = errno;
+ GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
+ << " " << ::strerror(err) << "\n";
+ return 0;
+ }
+ }
+ return (bufsize - size) / sizeof(uint64_t);
+}
+
const bool PerfCounters::kSupported = true;
-bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; }
+// Initializes libpfm only on the first call. Returns whether that single
+// initialization was successful.
+bool PerfCounters::Initialize() {
+ // Function-scope static gets initialized only once on first call.
+ static const bool success = []() {
+ return pfm_initialize() == PFM_SUCCESS;
+ }();
+ return success;
+}
+
+bool PerfCounters::IsCounterSupported(const std::string& name) {
+ Initialize();
+ perf_event_attr_t attr;
+ std::memset(&attr, 0, sizeof(attr));
+ pfm_perf_encode_arg_t arg;
+ std::memset(&arg, 0, sizeof(arg));
+ arg.attr = &attr;
+ const int mode = PFM_PLM3; // user mode only
+ int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
+ &arg);
+ return (ret == PFM_SUCCESS);
+}
PerfCounters PerfCounters::Create(
const std::vector<std::string>& counter_names) {
- if (counter_names.empty()) {
- return NoCounters();
+ if (!counter_names.empty()) {
+ Initialize();
}
- if (counter_names.size() > PerfCounterValues::kMaxCounters) {
- GetErrorLogInstance()
- << counter_names.size()
- << " counters were requested. The minimum is 1, the maximum is "
- << PerfCounterValues::kMaxCounters << "\n";
- return NoCounters();
- }
- std::vector<int> counter_ids(counter_names.size());
- const int mode = PFM_PLM3; // user mode only
+ // Valid counters will populate these arrays but we start empty
+ std::vector<std::string> valid_names;
+ std::vector<int> counter_ids;
+ std::vector<int> leader_ids;
+
+ // Resize to the maximum possible
+ valid_names.reserve(counter_names.size());
+ counter_ids.reserve(counter_names.size());
+
+ const int kCounterMode = PFM_PLM3; // user mode only
+
+ // Group leads will be assigned on demand. The idea is that once we cannot
+ // create a counter descriptor, the reason is that this group has maxed out
+ // so we set the group_id again to -1 and retry - giving the algorithm a
+ // chance to create a new group leader to hold the next set of counters.
+ int group_id = -1;
+
+ // Loop through all performance counters
for (size_t i = 0; i < counter_names.size(); ++i) {
- const bool is_first = i == 0;
- struct perf_event_attr attr {};
- attr.size = sizeof(attr);
- const int group_id = !is_first ? counter_ids[0] : -1;
+ // we are about to push into the valid names vector
+ // check if we did not reach the maximum
+ if (valid_names.size() == PerfCounterValues::kMaxCounters) {
+ // Log a message if we maxed out and stop adding
+ GetErrorLogInstance()
+ << counter_names.size() << " counters were requested. The maximum is "
+ << PerfCounterValues::kMaxCounters << " and " << valid_names.size()
+ << " were already added. All remaining counters will be ignored\n";
+ // stop the loop and return what we have already
+ break;
+ }
+
+ // Check if this name is empty
const auto& name = counter_names[i];
if (name.empty()) {
- GetErrorLogInstance() << "A counter name was the empty string\n";
- return NoCounters();
+ GetErrorLogInstance()
+ << "A performance counter name was the empty string\n";
+ continue;
}
+
+ // Here first means first in group, ie the group leader
+ const bool is_first = (group_id < 0);
+
+ // This struct will be populated by libpfm from the counter string
+ // and then fed into the syscall perf_event_open
+ struct perf_event_attr attr {};
+ attr.size = sizeof(attr);
+
+ // This is the input struct to libpfm.
pfm_perf_encode_arg_t arg{};
arg.attr = &attr;
-
- const int pfm_get =
- pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg);
+ const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
+ PFM_OS_PERF_EVENT, &arg);
if (pfm_get != PFM_SUCCESS) {
- GetErrorLogInstance() << "Unknown counter name: " << name << "\n";
- return NoCounters();
+ GetErrorLogInstance()
+ << "Unknown performance counter name: " << name << "\n";
+ continue;
}
- attr.disabled = is_first;
- // Note: the man page for perf_event_create suggests inerit = true and
+
+ // We then proceed to populate the remaining fields in our attribute struct
+ // Note: the man page for perf_event_create suggests inherit = true and
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
// case.
+ attr.disabled = is_first;
attr.inherit = true;
attr.pinned = is_first;
attr.exclude_kernel = true;
attr.exclude_user = false;
attr.exclude_hv = true;
- // Read all counters in one read.
+
+ // Read all counters in a group in one read.
attr.read_format = PERF_FORMAT_GROUP;
int id = -1;
- static constexpr size_t kNrOfSyscallRetries = 5;
- // Retry syscall as it was interrupted often (b/64774091).
- for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
- ++num_retries) {
- id = perf_event_open(&attr, 0, -1, group_id, 0);
- if (id >= 0 || errno != EINTR) {
- break;
+ while (id < 0) {
+ static constexpr size_t kNrOfSyscallRetries = 5;
+ // Retry syscall as it was interrupted often (b/64774091).
+ for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
+ ++num_retries) {
+ id = perf_event_open(&attr, 0, -1, group_id, 0);
+ if (id >= 0 || errno != EINTR) {
+ break;
+ }
+ }
+ if (id < 0) {
+ // If the file descriptor is negative we might have reached a limit
+ // in the current group. Set the group_id to -1 and retry
+ if (group_id >= 0) {
+ // Create a new group
+ group_id = -1;
+ } else {
+ // At this point we have already retried to set a new group id and
+ // failed. We then give up.
+ break;
+ }
}
}
+
+ // We failed to get a new file descriptor. We might have reached a hard
+ // hardware limit that cannot be resolved even with group multiplexing
if (id < 0) {
- GetErrorLogInstance()
- << "Failed to get a file descriptor for " << name << "\n";
+ GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
+ "for performance counter "
+ << name << ". Ignoring\n";
+
+ // We give up on this counter but try to keep going
+ // as the others would be fine
+ continue;
+ }
+ if (group_id < 0) {
+ // This is a leader, store and assign it to the current file descriptor
+ leader_ids.push_back(id);
+ group_id = id;
+ }
+ // This is a valid counter, add it to our descriptor's list
+ counter_ids.push_back(id);
+ valid_names.push_back(name);
+ }
+
+ // Loop through all group leaders activating them
+ // There is another option of starting ALL counters in a process but
+ // that would be far reaching an intrusion. If the user is using PMCs
+ // by themselves then this would have a side effect on them. It is
+ // friendlier to loop through all groups individually.
+ for (int lead : leader_ids) {
+ if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
+ // This should never happen but if it does, we give up on the
+ // entire batch as recovery would be a mess.
+ GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
+ "Claring out all counters.\n";
+
+ // Close all peformance counters
+ for (int id : counter_ids) {
+ ::close(id);
+ }
+
+ // Return an empty object so our internal state is still good and
+ // the process can continue normally without impact
return NoCounters();
}
-
- counter_ids[i] = id;
- }
- if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) {
- GetErrorLogInstance() << "Failed to start counters\n";
- return NoCounters();
}
- return PerfCounters(counter_names, std::move(counter_ids));
+ return PerfCounters(std::move(valid_names), std::move(counter_ids),
+ std::move(leader_ids));
}
-PerfCounters::~PerfCounters() {
+void PerfCounters::CloseCounters() const {
if (counter_ids_.empty()) {
return;
}
- ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE);
+ for (int lead : leader_ids_) {
+ ioctl(lead, PERF_EVENT_IOC_DISABLE);
+ }
for (int fd : counter_ids_) {
close(fd);
}
}
#else // defined HAVE_LIBPFM
+size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
+
const bool PerfCounters::kSupported = false;
bool PerfCounters::Initialize() { return false; }
+bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
+
PerfCounters PerfCounters::Create(
const std::vector<std::string>& counter_names) {
if (!counter_names.empty()) {
- GetErrorLogInstance() << "Performance counters not supported.";
+ GetErrorLogInstance() << "Performance counters not supported.\n";
}
return NoCounters();
}
-PerfCounters::~PerfCounters() = default;
+void PerfCounters::CloseCounters() const {}
#endif // defined HAVE_LIBPFM
+
+PerfCountersMeasurement::PerfCountersMeasurement(
+ const std::vector<std::string>& counter_names)
+ : start_values_(counter_names.size()), end_values_(counter_names.size()) {
+ counters_ = PerfCounters::Create(counter_names);
+}
+
+PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
+ if (this != &other) {
+ CloseCounters();
+
+ counter_ids_ = std::move(other.counter_ids_);
+ leader_ids_ = std::move(other.leader_ids_);
+ counter_names_ = std::move(other.counter_names_);
+ }
+ return *this;
+}
} // namespace internal
} // namespace benchmark
diff --git a/third-party/benchmark/src/perf_counters.h b/third-party/benchmark/src/perf_counters.h
index 47ca138..bf5eb6b 100644
--- a/third-party/benchmark/src/perf_counters.h
+++ b/third-party/benchmark/src/perf_counters.h
@@ -17,16 +17,25 @@
#include <array>
#include <cstdint>
+#include <cstring>
+#include <memory>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "log.h"
+#include "mutex.h"
#ifndef BENCHMARK_OS_WINDOWS
#include <unistd.h>
#endif
+#if defined(_MSC_VER)
+#pragma warning(push)
+// C4251: <symbol> needs to have dll-interface to be used by clients of class
+#pragma warning(disable : 4251)
+#endif
+
namespace benchmark {
namespace internal {
@@ -36,18 +45,21 @@
// The implementation ensures the storage is inlined, and allows 0-based
// indexing into the counter values.
// The object is used in conjunction with a PerfCounters object, by passing it
-// to Snapshot(). The values are populated such that
-// perfCounters->names()[i]'s value is obtained at position i (as given by
-// operator[]) of this object.
-class PerfCounterValues {
+// to Snapshot(). The Read() method relocates individual reads, discarding
+// the initial padding from each group leader in the values buffer such that
+// all user accesses through the [] operator are correct.
+class BENCHMARK_EXPORT PerfCounterValues {
public:
explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
BM_CHECK_LE(nr_counters_, kMaxCounters);
}
- uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; }
+ // We are reading correctly now so the values don't need to skip padding
+ uint64_t operator[](size_t pos) const { return values_[pos]; }
- static constexpr size_t kMaxCounters = 3;
+ // Increased the maximum to 32 only since the buffer
+ // is std::array<> backed
+ static constexpr size_t kMaxCounters = 32;
private:
friend class PerfCounters;
@@ -58,7 +70,14 @@
sizeof(uint64_t) * (kPadding + nr_counters_)};
}
- static constexpr size_t kPadding = 1;
+ // This reading is complex and as the goal of this class is to
+ // abstract away the intrincacies of the reading process, this is
+ // a better place for it
+ size_t Read(const std::vector<int>& leaders);
+
+ // Move the padding to 2 due to the reading algorithm (1st padding plus a
+ // current read padding)
+ static constexpr size_t kPadding = 2;
std::array<uint64_t, kPadding + kMaxCounters> values_;
const size_t nr_counters_;
};
@@ -66,27 +85,34 @@
// Collect PMU counters. The object, once constructed, is ready to be used by
// calling read(). PMU counter collection is enabled from the time create() is
// called, to obtain the object, until the object's destructor is called.
-class PerfCounters final {
+class BENCHMARK_EXPORT PerfCounters final {
public:
// True iff this platform supports performance counters.
static const bool kSupported;
- bool IsValid() const { return is_valid_; }
+ // Returns an empty object
static PerfCounters NoCounters() { return PerfCounters(); }
- ~PerfCounters();
+ ~PerfCounters() { CloseCounters(); }
+ PerfCounters() = default;
PerfCounters(PerfCounters&&) = default;
PerfCounters(const PerfCounters&) = delete;
+ PerfCounters& operator=(PerfCounters&&) noexcept;
+ PerfCounters& operator=(const PerfCounters&) = delete;
// Platform-specific implementations may choose to do some library
// initialization here.
static bool Initialize();
+ // Check if the given counter is supported, if the app wants to
+ // check before passing
+ static bool IsCounterSupported(const std::string& name);
+
// Return a PerfCounters object ready to read the counters with the names
// specified. The values are user-mode only. The counter name format is
// implementation and OS specific.
- // TODO: once we move to C++-17, this should be a std::optional, and then the
- // IsValid() boolean can be dropped.
+ // In case of failure, this method will in the worst case return an
+ // empty object whose state will still be valid.
static PerfCounters Create(const std::vector<std::string>& counter_names);
// Take a snapshot of the current value of the counters into the provided
@@ -95,10 +121,7 @@
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
#ifndef BENCHMARK_OS_WINDOWS
assert(values != nullptr);
- assert(IsValid());
- auto buffer = values->get_data_buffer();
- auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second);
- return static_cast<size_t>(read_bytes) == buffer.second;
+ return values->Read(leader_ids_) == counter_ids_.size();
#else
(void)values;
return false;
@@ -110,63 +133,68 @@
private:
PerfCounters(const std::vector<std::string>& counter_names,
- std::vector<int>&& counter_ids)
+ std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
: counter_ids_(std::move(counter_ids)),
- counter_names_(counter_names),
- is_valid_(true) {}
- PerfCounters() : is_valid_(false) {}
+ leader_ids_(std::move(leader_ids)),
+ counter_names_(counter_names) {}
+
+ void CloseCounters() const;
std::vector<int> counter_ids_;
- const std::vector<std::string> counter_names_;
- const bool is_valid_;
+ std::vector<int> leader_ids_;
+ std::vector<std::string> counter_names_;
};
// Typical usage of the above primitives.
-class PerfCountersMeasurement final {
+class BENCHMARK_EXPORT PerfCountersMeasurement final {
public:
- PerfCountersMeasurement(PerfCounters&& c)
- : counters_(std::move(c)),
- start_values_(counters_.IsValid() ? counters_.names().size() : 0),
- end_values_(counters_.IsValid() ? counters_.names().size() : 0) {}
+ PerfCountersMeasurement(const std::vector<std::string>& counter_names);
- bool IsValid() const { return counters_.IsValid(); }
+ size_t num_counters() const { return counters_.num_counters(); }
- BENCHMARK_ALWAYS_INLINE void Start() {
- assert(IsValid());
+ std::vector<std::string> names() const { return counters_.names(); }
+
+ BENCHMARK_ALWAYS_INLINE bool Start() {
+ if (num_counters() == 0) return true;
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
- counters_.Snapshot(&start_values_);
+ valid_read_ &= counters_.Snapshot(&start_values_);
ClobberMemory();
+
+ return valid_read_;
}
- BENCHMARK_ALWAYS_INLINE std::vector<std::pair<std::string, double>>
- StopAndGetMeasurements() {
- assert(IsValid());
+ BENCHMARK_ALWAYS_INLINE bool Stop(
+ std::vector<std::pair<std::string, double>>& measurements) {
+ if (num_counters() == 0) return true;
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
- counters_.Snapshot(&end_values_);
+ valid_read_ &= counters_.Snapshot(&end_values_);
ClobberMemory();
- std::vector<std::pair<std::string, double>> ret;
for (size_t i = 0; i < counters_.names().size(); ++i) {
double measurement = static_cast<double>(end_values_[i]) -
static_cast<double>(start_values_[i]);
- ret.push_back({counters_.names()[i], measurement});
+ measurements.push_back({counters_.names()[i], measurement});
}
- return ret;
+
+ return valid_read_;
}
private:
PerfCounters counters_;
+ bool valid_read_ = true;
PerfCounterValues start_values_;
PerfCounterValues end_values_;
};
-BENCHMARK_UNUSED static bool perf_init_anchor = PerfCounters::Initialize();
-
} // namespace internal
} // namespace benchmark
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
#endif // BENCHMARK_PERF_COUNTERS_H
diff --git a/third-party/benchmark/src/re.h b/third-party/benchmark/src/re.h
index 6300467..9afb869b 100644
--- a/third-party/benchmark/src/re.h
+++ b/third-party/benchmark/src/re.h
@@ -33,7 +33,7 @@
// Prefer C regex libraries when compiling w/o exceptions so that we can
// correctly report errors.
#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \
- defined(BENCHMARK_HAVE_STD_REGEX) && \
+ defined(HAVE_STD_REGEX) && \
(defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX))
#undef HAVE_STD_REGEX
#endif
diff --git a/third-party/benchmark/src/reporter.cc b/third-party/benchmark/src/reporter.cc
index 1d2df17..076bc31 100644
--- a/third-party/benchmark/src/reporter.cc
+++ b/third-party/benchmark/src/reporter.cc
@@ -25,9 +25,6 @@
#include "timers.h"
namespace benchmark {
-namespace internal {
-extern std::map<std::string, std::string> *global_context;
-}
BenchmarkReporter::BenchmarkReporter()
: output_stream_(&std::cout), error_stream_(&std::cerr) {}
@@ -39,7 +36,11 @@
BM_CHECK(out) << "cannot be null";
auto &Out = *out;
+#ifndef BENCHMARK_OS_QURT
+ // Date/time information is not available on QuRT.
+ // Attempting to get it via this call cause the binary to crash.
Out << LocalDateTimeString() << "\n";
+#endif
if (context.executable_name)
Out << "Running " << context.executable_name << "\n";
@@ -67,8 +68,11 @@
Out << "\n";
}
- if (internal::global_context != nullptr) {
- for (const auto &kv : *internal::global_context) {
+ std::map<std::string, std::string> *global_context =
+ internal::GetGlobalContext();
+
+ if (global_context != nullptr) {
+ for (const auto &kv : *global_context) {
Out << kv.first << ": " << kv.second << "\n";
}
}
diff --git a/third-party/benchmark/src/sleep.cc b/third-party/benchmark/src/sleep.cc
deleted file mode 100644
index ab59000..0000000
--- a/third-party/benchmark/src/sleep.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2015 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "sleep.h"
-
-#include <cerrno>
-#include <cstdlib>
-#include <ctime>
-
-#include "internal_macros.h"
-
-#ifdef BENCHMARK_OS_WINDOWS
-#include <windows.h>
-#endif
-
-#ifdef BENCHMARK_OS_ZOS
-#include <unistd.h>
-#endif
-
-namespace benchmark {
-#ifdef BENCHMARK_OS_WINDOWS
-// Window's Sleep takes milliseconds argument.
-void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); }
-void SleepForSeconds(double seconds) {
- SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
-}
-#else // BENCHMARK_OS_WINDOWS
-void SleepForMicroseconds(int microseconds) {
-#ifdef BENCHMARK_OS_ZOS
- // z/OS does not support nanosleep. Instead call sleep() and then usleep() to
- // sleep for the remaining microseconds because usleep() will fail if its
- // argument is greater than 1000000.
- div_t sleepTime = div(microseconds, kNumMicrosPerSecond);
- int seconds = sleepTime.quot;
- while (seconds != 0) seconds = sleep(seconds);
- while (usleep(sleepTime.rem) == -1 && errno == EINTR)
- ;
-#else
- struct timespec sleep_time;
- sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
- sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
- while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
- ; // Ignore signals and wait for the full interval to elapse.
-#endif
-}
-
-void SleepForMilliseconds(int milliseconds) {
- SleepForMicroseconds(milliseconds * kNumMicrosPerMilli);
-}
-
-void SleepForSeconds(double seconds) {
- SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond));
-}
-#endif // BENCHMARK_OS_WINDOWS
-} // end namespace benchmark
diff --git a/third-party/benchmark/src/sleep.h b/third-party/benchmark/src/sleep.h
deleted file mode 100644
index f98551a..0000000
--- a/third-party/benchmark/src/sleep.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef BENCHMARK_SLEEP_H_
-#define BENCHMARK_SLEEP_H_
-
-namespace benchmark {
-const int kNumMillisPerSecond = 1000;
-const int kNumMicrosPerMilli = 1000;
-const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000;
-const int kNumNanosPerMicro = 1000;
-const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;
-
-void SleepForMilliseconds(int milliseconds);
-void SleepForSeconds(double seconds);
-} // end namespace benchmark
-
-#endif // BENCHMARK_SLEEP_H_
diff --git a/third-party/benchmark/src/statistics.cc b/third-party/benchmark/src/statistics.cc
index 3e5ef09..261dcb2 100644
--- a/third-party/benchmark/src/statistics.cc
+++ b/third-party/benchmark/src/statistics.cc
@@ -32,7 +32,7 @@
double StatisticsMean(const std::vector<double>& v) {
if (v.empty()) return 0.0;
- return StatisticsSum(v) * (1.0 / v.size());
+ return StatisticsSum(v) * (1.0 / static_cast<double>(v.size()));
}
double StatisticsMedian(const std::vector<double>& v) {
@@ -42,13 +42,13 @@
auto center = copy.begin() + v.size() / 2;
std::nth_element(copy.begin(), center, copy.end());
- // did we have an odd number of samples?
- // if yes, then center is the median
- // it no, then we are looking for the average between center and the value
- // before
+ // Did we have an odd number of samples? If yes, then center is the median.
+ // If not, then we are looking for the average between center and the value
+ // before. Instead of resorting, we just look for the max value before it,
+ // which is not necessarily the element immediately preceding `center` Since
+ // `copy` is only partially sorted by `nth_element`.
if (v.size() % 2 == 1) return *center;
- auto center2 = copy.begin() + v.size() / 2 - 1;
- std::nth_element(copy.begin(), center2, copy.end());
+ auto center2 = std::max_element(copy.begin(), center);
return (*center + *center2) / 2.0;
}
@@ -71,8 +71,11 @@
// Sample standard deviation is undefined for n = 1
if (v.size() == 1) return 0.0;
- const double avg_squares = SumSquares(v) * (1.0 / v.size());
- return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean)));
+ const double avg_squares =
+ SumSquares(v) * (1.0 / static_cast<double>(v.size()));
+ return Sqrt(static_cast<double>(v.size()) /
+ (static_cast<double>(v.size()) - 1.0) *
+ (avg_squares - Sqr(mean)));
}
double StatisticsCV(const std::vector<double>& v) {
@@ -81,6 +84,8 @@
const auto stddev = StatisticsStdDev(v);
const auto mean = StatisticsMean(v);
+ if (std::fpclassify(mean) == FP_ZERO) return 0.0;
+
return stddev / mean;
}
@@ -89,9 +94,8 @@
typedef BenchmarkReporter::Run Run;
std::vector<Run> results;
- auto error_count =
- std::count_if(reports.begin(), reports.end(),
- [](Run const& run) { return run.error_occurred; });
+ auto error_count = std::count_if(reports.begin(), reports.end(),
+ [](Run const& run) { return run.skipped; });
if (reports.size() - error_count < 2) {
// We don't report aggregated data if there was a single run.
@@ -118,11 +122,13 @@
for (auto const& cnt : r.counters) {
auto it = counter_stats.find(cnt.first);
if (it == counter_stats.end()) {
- counter_stats.insert({cnt.first, {cnt.second, std::vector<double>{}}});
- it = counter_stats.find(cnt.first);
+ it = counter_stats
+ .emplace(cnt.first,
+ CounterStat{cnt.second, std::vector<double>{}})
+ .first;
it->second.s.reserve(reports.size());
} else {
- BM_CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags);
+ BM_CHECK_EQ(it->second.c.flags, cnt.second.flags);
}
}
}
@@ -131,7 +137,7 @@
for (Run const& run : reports) {
BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name());
BM_CHECK_EQ(run_iterations, run.iterations);
- if (run.error_occurred) continue;
+ if (run.skipped) continue;
real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
// user counters
diff --git a/third-party/benchmark/src/statistics.h b/third-party/benchmark/src/statistics.h
index a9545a5..6e5560e 100644
--- a/third-party/benchmark/src/statistics.h
+++ b/third-party/benchmark/src/statistics.h
@@ -22,15 +22,21 @@
namespace benchmark {
-// Return a vector containing the mean, median and standard devation information
-// (and any user-specified info) for the specified list of reports. If 'reports'
-// contains less than two non-errored runs an empty vector is returned
+// Return a vector containing the mean, median and standard deviation
+// information (and any user-specified info) for the specified list of reports.
+// If 'reports' contains less than two non-errored runs an empty vector is
+// returned
+BENCHMARK_EXPORT
std::vector<BenchmarkReporter::Run> ComputeStats(
const std::vector<BenchmarkReporter::Run>& reports);
+BENCHMARK_EXPORT
double StatisticsMean(const std::vector<double>& v);
+BENCHMARK_EXPORT
double StatisticsMedian(const std::vector<double>& v);
+BENCHMARK_EXPORT
double StatisticsStdDev(const std::vector<double>& v);
+BENCHMARK_EXPORT
double StatisticsCV(const std::vector<double>& v);
} // end namespace benchmark
diff --git a/third-party/benchmark/src/string_util.cc b/third-party/benchmark/src/string_util.cc
index 401fa13..c69e40a 100644
--- a/third-party/benchmark/src/string_util.cc
+++ b/third-party/benchmark/src/string_util.cc
@@ -11,16 +11,17 @@
#include <sstream>
#include "arraysize.h"
+#include "benchmark/benchmark.h"
namespace benchmark {
namespace {
-
// kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta.
-const char kBigSIUnits[] = "kMGTPEZY";
+const char* const kBigSIUnits[] = {"k", "M", "G", "T", "P", "E", "Z", "Y"};
// Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi.
-const char kBigIECUnits[] = "KMGTPEZY";
+const char* const kBigIECUnits[] = {"Ki", "Mi", "Gi", "Ti",
+ "Pi", "Ei", "Zi", "Yi"};
// milli, micro, nano, pico, femto, atto, zepto, yocto.
-const char kSmallSIUnits[] = "munpfazy";
+const char* const kSmallSIUnits[] = {"m", "u", "n", "p", "f", "a", "z", "y"};
// We require that all three arrays have the same size.
static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits),
@@ -30,9 +31,8 @@
static const int64_t kUnitsSize = arraysize(kBigSIUnits);
-void ToExponentAndMantissa(double val, double thresh, int precision,
- double one_k, std::string* mantissa,
- int64_t* exponent) {
+void ToExponentAndMantissa(double val, int precision, double one_k,
+ std::string* mantissa, int64_t* exponent) {
std::stringstream mantissa_stream;
if (val < 0) {
@@ -43,8 +43,8 @@
// Adjust threshold so that it never excludes things which can't be rendered
// in 'precision' digits.
const double adjusted_threshold =
- std::max(thresh, 1.0 / std::pow(10.0, precision));
- const double big_threshold = adjusted_threshold * one_k;
+ std::max(1.0, 1.0 / std::pow(10.0, precision));
+ const double big_threshold = (adjusted_threshold * one_k) - 1;
const double small_threshold = adjusted_threshold;
// Values in ]simple_threshold,small_threshold[ will be printed as-is
const double simple_threshold = 0.01;
@@ -92,37 +92,20 @@
const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1);
if (index >= kUnitsSize) return "";
- const char* array =
+ const char* const* array =
(exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits);
- if (iec)
- return array[index] + std::string("i");
- else
- return std::string(1, array[index]);
+
+ return std::string(array[index]);
}
-std::string ToBinaryStringFullySpecified(double value, double threshold,
- int precision, double one_k = 1024.0) {
+std::string ToBinaryStringFullySpecified(double value, int precision,
+ Counter::OneK one_k) {
std::string mantissa;
int64_t exponent;
- ToExponentAndMantissa(value, threshold, precision, one_k, &mantissa,
+ ToExponentAndMantissa(value, precision,
+ one_k == Counter::kIs1024 ? 1024.0 : 1000.0, &mantissa,
&exponent);
- return mantissa + ExponentToPrefix(exponent, false);
-}
-
-} // end namespace
-
-void AppendHumanReadable(int n, std::string* str) {
- std::stringstream ss;
- // Round down to the nearest SI prefix.
- ss << ToBinaryStringFullySpecified(n, 1.0, 0);
- *str += ss.str();
-}
-
-std::string HumanReadableNumber(double n, double one_k) {
- // 1.1 means that figures up to 1.1k should be shown with the next unit down;
- // this softens edge effects.
- // 1 means that we should show one decimal place of precision.
- return ToBinaryStringFullySpecified(n, 1.1, 1, one_k);
+ return mantissa + ExponentToPrefix(exponent, one_k == Counter::kIs1024);
}
std::string StrFormatImp(const char* msg, va_list args) {
@@ -133,21 +116,21 @@
// TODO(ericwf): use std::array for first attempt to avoid one memory
// allocation guess what the size might be
std::array<char, 256> local_buff;
- std::size_t size = local_buff.size();
+
// 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
// in the android-ndk
- auto ret = vsnprintf(local_buff.data(), size, msg, args_cp);
+ auto ret = vsnprintf(local_buff.data(), local_buff.size(), msg, args_cp);
va_end(args_cp);
// handle empty expansion
if (ret == 0) return std::string{};
- if (static_cast<std::size_t>(ret) < size)
+ if (static_cast<std::size_t>(ret) < local_buff.size())
return std::string(local_buff.data());
// we did not provide a long enough buffer on our first attempt.
// add 1 to size to account for null-byte in size cast to prevent overflow
- size = static_cast<std::size_t>(ret) + 1;
+ std::size_t size = static_cast<std::size_t>(ret) + 1;
auto buff_ptr = std::unique_ptr<char[]>(new char[size]);
// 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
// in the android-ndk
@@ -155,6 +138,12 @@
return std::string(buff_ptr.get());
}
+} // end namespace
+
+std::string HumanReadableNumber(double n, Counter::OneK one_k) {
+ return ToBinaryStringFullySpecified(n, 1, one_k);
+}
+
std::string StrFormat(const char* format, ...) {
va_list args;
va_start(args, format);
diff --git a/third-party/benchmark/src/string_util.h b/third-party/benchmark/src/string_util.h
index ff3b7da..731aa2c 100644
--- a/third-party/benchmark/src/string_util.h
+++ b/third-party/benchmark/src/string_util.h
@@ -4,15 +4,19 @@
#include <sstream>
#include <string>
#include <utility>
+#include <vector>
+#include "benchmark/benchmark.h"
+#include "benchmark/export.h"
+#include "check.h"
#include "internal_macros.h"
namespace benchmark {
-void AppendHumanReadable(int n, std::string* str);
+BENCHMARK_EXPORT
+std::string HumanReadableNumber(double n, Counter::OneK one_k);
-std::string HumanReadableNumber(double n, double one_k = 1024.0);
-
+BENCHMARK_EXPORT
#if defined(__MINGW32__)
__attribute__((format(__MINGW_PRINTF_FORMAT, 1, 2)))
#elif defined(__GNUC__)
@@ -38,6 +42,7 @@
return ss.str();
}
+BENCHMARK_EXPORT
std::vector<std::string> StrSplit(const std::string& str, char delim);
// Disable lint checking for this block since it re-implements C functions.
diff --git a/third-party/benchmark/src/sysinfo.cc b/third-party/benchmark/src/sysinfo.cc
index 3a56e8c..46df973 100644
--- a/third-party/benchmark/src/sysinfo.cc
+++ b/third-party/benchmark/src/sysinfo.cc
@@ -22,6 +22,10 @@
#include "internal_macros.h"
#ifdef BENCHMARK_OS_WINDOWS
+#if !defined(WINVER) || WINVER < 0x0600
+#undef WINVER
+#define WINVER 0x0600
+#endif // WINVER handling
#include <shlwapi.h>
#undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA
#include <versionhelpers.h>
@@ -30,7 +34,7 @@
#include <codecvt>
#else
#include <fcntl.h>
-#ifndef BENCHMARK_OS_FUCHSIA
+#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -45,10 +49,17 @@
#endif
#if defined(BENCHMARK_OS_SOLARIS)
#include <kstat.h>
+#include <netdb.h>
#endif
#if defined(BENCHMARK_OS_QNX)
#include <sys/syspage.h>
#endif
+#if defined(BENCHMARK_OS_QURT)
+#include <qurt.h>
+#endif
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+#include <pthread.h>
+#endif
#include <algorithm>
#include <array>
@@ -65,15 +76,17 @@
#include <limits>
#include <locale>
#include <memory>
+#include <random>
#include <sstream>
#include <utility>
+#include "benchmark/benchmark.h"
#include "check.h"
#include "cycleclock.h"
#include "internal_macros.h"
#include "log.h"
-#include "sleep.h"
#include "string_util.h"
+#include "timers.h"
namespace benchmark {
namespace {
@@ -98,67 +111,59 @@
/// `sysctl` with the result type it's to be interpreted as.
struct ValueUnion {
union DataT {
- uint32_t uint32_value;
- uint64_t uint64_value;
+ int32_t int32_value;
+ int64_t int64_value;
// For correct aliasing of union members from bytes.
char bytes[8];
};
using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>;
// The size of the data union member + its trailing array size.
- size_t Size;
- DataPtr Buff;
+ std::size_t size;
+ DataPtr buff;
public:
- ValueUnion() : Size(0), Buff(nullptr, &std::free) {}
+ ValueUnion() : size(0), buff(nullptr, &std::free) {}
- explicit ValueUnion(size_t BuffSize)
- : Size(sizeof(DataT) + BuffSize),
- Buff(::new (std::malloc(Size)) DataT(), &std::free) {}
+ explicit ValueUnion(std::size_t buff_size)
+ : size(sizeof(DataT) + buff_size),
+ buff(::new (std::malloc(size)) DataT(), &std::free) {}
ValueUnion(ValueUnion&& other) = default;
- explicit operator bool() const { return bool(Buff); }
+ explicit operator bool() const { return bool(buff); }
- char* data() const { return Buff->bytes; }
+ char* data() const { return buff->bytes; }
std::string GetAsString() const { return std::string(data()); }
int64_t GetAsInteger() const {
- if (Size == sizeof(Buff->uint32_value))
- return static_cast<int32_t>(Buff->uint32_value);
- else if (Size == sizeof(Buff->uint64_value))
- return static_cast<int64_t>(Buff->uint64_value);
- BENCHMARK_UNREACHABLE();
- }
-
- uint64_t GetAsUnsigned() const {
- if (Size == sizeof(Buff->uint32_value))
- return Buff->uint32_value;
- else if (Size == sizeof(Buff->uint64_value))
- return Buff->uint64_value;
+ if (size == sizeof(buff->int32_value))
+ return buff->int32_value;
+ else if (size == sizeof(buff->int64_value))
+ return buff->int64_value;
BENCHMARK_UNREACHABLE();
}
template <class T, int N>
std::array<T, N> GetAsArray() {
- const int ArrSize = sizeof(T) * N;
- BM_CHECK_LE(ArrSize, Size);
- std::array<T, N> Arr;
- std::memcpy(Arr.data(), data(), ArrSize);
- return Arr;
+ const int arr_size = sizeof(T) * N;
+ BM_CHECK_LE(arr_size, size);
+ std::array<T, N> arr;
+ std::memcpy(arr.data(), data(), arr_size);
+ return arr;
}
};
-ValueUnion GetSysctlImp(std::string const& Name) {
+ValueUnion GetSysctlImp(std::string const& name) {
#if defined BENCHMARK_OS_OPENBSD
int mib[2];
mib[0] = CTL_HW;
- if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")) {
+ if ((name == "hw.ncpu") || (name == "hw.cpuspeed")) {
ValueUnion buff(sizeof(int));
- if (Name == "hw.ncpu") {
+ if (name == "hw.ncpu") {
mib[1] = HW_NCPU;
} else {
mib[1] = HW_CPUSPEED;
@@ -171,41 +176,41 @@
}
return ValueUnion();
#else
- size_t CurBuffSize = 0;
- if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1)
+ std::size_t cur_buff_size = 0;
+ if (sysctlbyname(name.c_str(), nullptr, &cur_buff_size, nullptr, 0) == -1)
return ValueUnion();
- ValueUnion buff(CurBuffSize);
- if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0)
+ ValueUnion buff(cur_buff_size);
+ if (sysctlbyname(name.c_str(), buff.data(), &buff.size, nullptr, 0) == 0)
return buff;
return ValueUnion();
#endif
}
BENCHMARK_MAYBE_UNUSED
-bool GetSysctl(std::string const& Name, std::string* Out) {
- Out->clear();
- auto Buff = GetSysctlImp(Name);
- if (!Buff) return false;
- Out->assign(Buff.data());
+bool GetSysctl(std::string const& name, std::string* out) {
+ out->clear();
+ auto buff = GetSysctlImp(name);
+ if (!buff) return false;
+ out->assign(buff.data());
return true;
}
template <class Tp,
class = typename std::enable_if<std::is_integral<Tp>::value>::type>
-bool GetSysctl(std::string const& Name, Tp* Out) {
- *Out = 0;
- auto Buff = GetSysctlImp(Name);
- if (!Buff) return false;
- *Out = static_cast<Tp>(Buff.GetAsUnsigned());
+bool GetSysctl(std::string const& name, Tp* out) {
+ *out = 0;
+ auto buff = GetSysctlImp(name);
+ if (!buff) return false;
+ *out = static_cast<Tp>(buff.GetAsInteger());
return true;
}
template <class Tp, size_t N>
-bool GetSysctl(std::string const& Name, std::array<Tp, N>* Out) {
- auto Buff = GetSysctlImp(Name);
- if (!Buff) return false;
- *Out = Buff.GetAsArray<Tp, N>();
+bool GetSysctl(std::string const& name, std::array<Tp, N>* out) {
+ auto buff = GetSysctlImp(name);
+ if (!buff) return false;
+ *out = buff.GetAsArray<Tp, N>();
return true;
}
#endif
@@ -241,21 +246,21 @@
#endif
}
-int CountSetBitsInCPUMap(std::string Val) {
- auto CountBits = [](std::string Part) {
+int CountSetBitsInCPUMap(std::string val) {
+ auto CountBits = [](std::string part) {
using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>;
- Part = "0x" + Part;
- CPUMask Mask(benchmark::stoul(Part, nullptr, 16));
- return static_cast<int>(Mask.count());
+ part = "0x" + part;
+ CPUMask mask(benchmark::stoul(part, nullptr, 16));
+ return static_cast<int>(mask.count());
};
- size_t Pos;
+ std::size_t pos;
int total = 0;
- while ((Pos = Val.find(',')) != std::string::npos) {
- total += CountBits(Val.substr(0, Pos));
- Val = Val.substr(Pos + 1);
+ while ((pos = val.find(',')) != std::string::npos) {
+ total += CountBits(val.substr(0, pos));
+ val = val.substr(pos + 1);
}
- if (!Val.empty()) {
- total += CountBits(Val);
+ if (!val.empty()) {
+ total += CountBits(val);
}
return total;
}
@@ -264,16 +269,16 @@
std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
std::vector<CPUInfo::CacheInfo> res;
std::string dir = "/sys/devices/system/cpu/cpu0/cache/";
- int Idx = 0;
+ int idx = 0;
while (true) {
CPUInfo::CacheInfo info;
- std::string FPath = StrCat(dir, "index", Idx++, "/");
- std::ifstream f(StrCat(FPath, "size").c_str());
+ std::string fpath = StrCat(dir, "index", idx++, "/");
+ std::ifstream f(StrCat(fpath, "size").c_str());
if (!f.is_open()) break;
std::string suffix;
f >> info.size;
if (f.fail())
- PrintErrorAndDie("Failed while reading file '", FPath, "size'");
+ PrintErrorAndDie("Failed while reading file '", fpath, "size'");
if (f.good()) {
f >> suffix;
if (f.bad())
@@ -284,13 +289,13 @@
else if (suffix == "K")
info.size *= 1024;
}
- if (!ReadFromFile(StrCat(FPath, "type"), &info.type))
- PrintErrorAndDie("Failed to read from file ", FPath, "type");
- if (!ReadFromFile(StrCat(FPath, "level"), &info.level))
- PrintErrorAndDie("Failed to read from file ", FPath, "level");
+ if (!ReadFromFile(StrCat(fpath, "type"), &info.type))
+ PrintErrorAndDie("Failed to read from file ", fpath, "type");
+ if (!ReadFromFile(StrCat(fpath, "level"), &info.level))
+ PrintErrorAndDie("Failed to read from file ", fpath, "level");
std::string map_str;
- if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str))
- PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map");
+ if (!ReadFromFile(StrCat(fpath, "shared_cpu_map"), &map_str))
+ PrintErrorAndDie("Failed to read from file ", fpath, "shared_cpu_map");
info.num_sharing = CountSetBitsInCPUMap(map_str);
res.push_back(info);
}
@@ -301,26 +306,26 @@
#ifdef BENCHMARK_OS_MACOSX
std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() {
std::vector<CPUInfo::CacheInfo> res;
- std::array<uint64_t, 4> CacheCounts{{0, 0, 0, 0}};
- GetSysctl("hw.cacheconfig", &CacheCounts);
+ std::array<int, 4> cache_counts{{0, 0, 0, 0}};
+ GetSysctl("hw.cacheconfig", &cache_counts);
struct {
std::string name;
std::string type;
int level;
- uint64_t num_sharing;
- } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]},
- {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]},
- {"hw.l2cachesize", "Unified", 2, CacheCounts[2]},
- {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}};
- for (auto& C : Cases) {
+ int num_sharing;
+ } cases[] = {{"hw.l1dcachesize", "Data", 1, cache_counts[1]},
+ {"hw.l1icachesize", "Instruction", 1, cache_counts[1]},
+ {"hw.l2cachesize", "Unified", 2, cache_counts[2]},
+ {"hw.l3cachesize", "Unified", 3, cache_counts[3]}};
+ for (auto& c : cases) {
int val;
- if (!GetSysctl(C.name, &val)) continue;
+ if (!GetSysctl(c.name, &val)) continue;
CPUInfo::CacheInfo info;
- info.type = C.type;
- info.level = C.level;
+ info.type = c.type;
+ info.level = c.level;
info.size = val;
- info.num_sharing = static_cast<int>(C.num_sharing);
+ info.num_sharing = c.num_sharing;
res.push_back(std::move(info));
}
return res;
@@ -334,7 +339,7 @@
using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>;
GetLogicalProcessorInformation(nullptr, &buffer_size);
- UPtr buff((PInfo*)malloc(buffer_size), &std::free);
+ UPtr buff(static_cast<PInfo*>(std::malloc(buffer_size)), &std::free);
if (!GetLogicalProcessorInformation(buff.get(), &buffer_size))
PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ",
GetLastError());
@@ -345,16 +350,16 @@
for (; it != end; ++it) {
if (it->Relationship != RelationCache) continue;
using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>;
- BitSet B(it->ProcessorMask);
+ BitSet b(it->ProcessorMask);
// To prevent duplicates, only consider caches where CPU 0 is specified
- if (!B.test(0)) continue;
- CInfo* Cache = &it->Cache;
+ if (!b.test(0)) continue;
+ const CInfo& cache = it->Cache;
CPUInfo::CacheInfo C;
- C.num_sharing = static_cast<int>(B.count());
- C.level = Cache->Level;
- C.size = Cache->Size;
+ C.num_sharing = static_cast<int>(b.count());
+ C.level = cache.Level;
+ C.size = cache.Size;
C.type = "Unknown";
- switch (Cache->Type) {
+ switch (cache.Type) {
case CacheUnified:
C.type = "Unified";
break;
@@ -417,6 +422,8 @@
return GetCacheSizesWindows();
#elif defined(BENCHMARK_OS_QNX)
return GetCacheSizesQNX();
+#elif defined(BENCHMARK_OS_QURT)
+ return std::vector<CPUInfo::CacheInfo>();
#else
return GetCacheSizesFromKVFS();
#endif
@@ -425,23 +432,32 @@
std::string GetSystemName() {
#if defined(BENCHMARK_OS_WINDOWS)
std::string str;
- const unsigned COUNT = MAX_COMPUTERNAME_LENGTH + 1;
+ static constexpr int COUNT = MAX_COMPUTERNAME_LENGTH + 1;
TCHAR hostname[COUNT] = {'\0'};
DWORD DWCOUNT = COUNT;
if (!GetComputerName(hostname, &DWCOUNT)) return std::string("");
#ifndef UNICODE
str = std::string(hostname, DWCOUNT);
#else
- // Using wstring_convert, Is deprecated in C++17
- using convert_type = std::codecvt_utf8<wchar_t>;
- std::wstring_convert<convert_type, wchar_t> converter;
- std::wstring wStr(hostname, DWCOUNT);
- str = converter.to_bytes(wStr);
+ // `WideCharToMultiByte` returns `0` when conversion fails.
+ int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname,
+ DWCOUNT, NULL, 0, NULL, NULL);
+ str.resize(len);
+ WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, DWCOUNT, &str[0],
+ str.size(), NULL, NULL);
#endif
return str;
-#else // defined(BENCHMARK_OS_WINDOWS)
+#elif defined(BENCHMARK_OS_QURT)
+ std::string str = "Hexagon DSP";
+ qurt_arch_version_t arch_version_struct;
+ if (qurt_sysenv_get_arch_version(&arch_version_struct) == QURT_EOK) {
+ str += " v";
+ str += std::to_string(arch_version_struct.arch_version);
+ }
+ return str;
+#else
#ifndef HOST_NAME_MAX
-#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined
+#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac doesn't have HOST_NAME_MAX defined
#define HOST_NAME_MAX 64
#elif defined(BENCHMARK_OS_NACL)
#define HOST_NAME_MAX 64
@@ -449,6 +465,8 @@
#define HOST_NAME_MAX 154
#elif defined(BENCHMARK_OS_RTEMS)
#define HOST_NAME_MAX 256
+#elif defined(BENCHMARK_OS_SOLARIS)
+#define HOST_NAME_MAX MAXHOSTNAMELEN
#elif defined(BENCHMARK_OS_ZOS)
#define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
#else
@@ -463,12 +481,11 @@
#endif // Catch-all POSIX block.
}
-int GetNumCPUs() {
+int GetNumCPUsImpl() {
#ifdef BENCHMARK_HAS_SYSCTL
- int NumCPU = -1;
- if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU;
- fprintf(stderr, "Err: %s\n", strerror(errno));
- std::exit(EXIT_FAILURE);
+ int num_cpu = -1;
+ if (GetSysctl("hw.ncpu", &num_cpu)) return num_cpu;
+ PrintErrorAndDie("Err: ", strerror(errno));
#elif defined(BENCHMARK_OS_WINDOWS)
SYSTEM_INFO sysinfo;
// Use memset as opposed to = {} to avoid GCC missing initializer false
@@ -480,64 +497,155 @@
// group
#elif defined(BENCHMARK_OS_SOLARIS)
// Returns -1 in case of a failure.
- int NumCPU = sysconf(_SC_NPROCESSORS_ONLN);
- if (NumCPU < 0) {
- fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n",
- strerror(errno));
+ long num_cpu = sysconf(_SC_NPROCESSORS_ONLN);
+ if (num_cpu < 0) {
+ PrintErrorAndDie("sysconf(_SC_NPROCESSORS_ONLN) failed with error: ",
+ strerror(errno));
}
- return NumCPU;
+ return (int)num_cpu;
#elif defined(BENCHMARK_OS_QNX)
return static_cast<int>(_syspage_ptr->num_cpu);
+#elif defined(BENCHMARK_OS_QURT)
+ qurt_sysenv_max_hthreads_t hardware_threads;
+ if (qurt_sysenv_get_max_hw_threads(&hardware_threads) != QURT_EOK) {
+ hardware_threads.max_hthreads = 1;
+ }
+ return hardware_threads.max_hthreads;
#else
- int NumCPUs = 0;
- int MaxID = -1;
+ int num_cpus = 0;
+ int max_id = -1;
std::ifstream f("/proc/cpuinfo");
if (!f.is_open()) {
- std::cerr << "failed to open /proc/cpuinfo\n";
- return -1;
+ PrintErrorAndDie("Failed to open /proc/cpuinfo");
}
+#if defined(__alpha__)
+ const std::string Key = "cpus detected";
+#else
const std::string Key = "processor";
+#endif
std::string ln;
while (std::getline(f, ln)) {
if (ln.empty()) continue;
- size_t SplitIdx = ln.find(':');
+ std::size_t split_idx = ln.find(':');
std::string value;
#if defined(__s390__)
// s390 has another format in /proc/cpuinfo
// it needs to be parsed differently
- if (SplitIdx != std::string::npos)
- value = ln.substr(Key.size() + 1, SplitIdx - Key.size() - 1);
+ if (split_idx != std::string::npos)
+ value = ln.substr(Key.size() + 1, split_idx - Key.size() - 1);
#else
- if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
+ if (split_idx != std::string::npos) value = ln.substr(split_idx + 1);
#endif
if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) {
- NumCPUs++;
+ num_cpus++;
if (!value.empty()) {
- int CurID = benchmark::stoi(value);
- MaxID = std::max(CurID, MaxID);
+ const int cur_id = benchmark::stoi(value);
+ max_id = std::max(cur_id, max_id);
}
}
}
if (f.bad()) {
- std::cerr << "Failure reading /proc/cpuinfo\n";
- return -1;
+ PrintErrorAndDie("Failure reading /proc/cpuinfo");
}
if (!f.eof()) {
- std::cerr << "Failed to read to end of /proc/cpuinfo\n";
- return -1;
+ PrintErrorAndDie("Failed to read to end of /proc/cpuinfo");
}
f.close();
- if ((MaxID + 1) != NumCPUs) {
+ if ((max_id + 1) != num_cpus) {
fprintf(stderr,
"CPU ID assignments in /proc/cpuinfo seem messed up."
" This is usually caused by a bad BIOS.\n");
}
- return NumCPUs;
+ return num_cpus;
#endif
BENCHMARK_UNREACHABLE();
}
+int GetNumCPUs() {
+ const int num_cpus = GetNumCPUsImpl();
+ if (num_cpus < 1) {
+ PrintErrorAndDie(
+ "Unable to extract number of CPUs. If your platform uses "
+ "/proc/cpuinfo, custom support may need to be added.");
+ }
+ return num_cpus;
+}
+
+class ThreadAffinityGuard final {
+ public:
+ ThreadAffinityGuard() : reset_affinity(SetAffinity()) {
+ if (!reset_affinity)
+ std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU "
+ "frequency may be incorrect."
+ << std::endl;
+ }
+
+ ~ThreadAffinityGuard() {
+ if (!reset_affinity) return;
+
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+ int ret = pthread_setaffinity_np(self, sizeof(previous_affinity),
+ &previous_affinity);
+ if (ret == 0) return;
+#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
+ DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity);
+ if (ret != 0) return;
+#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
+ PrintErrorAndDie("Failed to reset thread affinity");
+ }
+
+ ThreadAffinityGuard(ThreadAffinityGuard&&) = delete;
+ ThreadAffinityGuard(const ThreadAffinityGuard&) = delete;
+ ThreadAffinityGuard& operator=(ThreadAffinityGuard&&) = delete;
+ ThreadAffinityGuard& operator=(const ThreadAffinityGuard&) = delete;
+
+ private:
+ bool SetAffinity() {
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+ int ret;
+ self = pthread_self();
+ ret = pthread_getaffinity_np(self, sizeof(previous_affinity),
+ &previous_affinity);
+ if (ret != 0) return false;
+
+ cpu_set_t affinity;
+ memcpy(&affinity, &previous_affinity, sizeof(affinity));
+
+ bool is_first_cpu = true;
+
+ for (int i = 0; i < CPU_SETSIZE; ++i)
+ if (CPU_ISSET(i, &affinity)) {
+ if (is_first_cpu)
+ is_first_cpu = false;
+ else
+ CPU_CLR(i, &affinity);
+ }
+
+ if (is_first_cpu) return false;
+
+ ret = pthread_setaffinity_np(self, sizeof(affinity), &affinity);
+ return ret == 0;
+#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
+ self = GetCurrentThread();
+ DWORD_PTR mask = static_cast<DWORD_PTR>(1) << GetCurrentProcessorNumber();
+ previous_affinity = SetThreadAffinityMask(self, mask);
+ return previous_affinity != 0;
+#else
+ return false;
+#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
+ }
+
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+ pthread_t self;
+ cpu_set_t previous_affinity;
+#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
+ HANDLE self;
+ DWORD_PTR previous_affinity;
+#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
+ bool reset_affinity;
+};
+
double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
// Currently, scaling is only used on linux path here,
// suppress diagnostics about it being unused on other paths.
@@ -566,7 +674,7 @@
&freq)) {
// The value is in kHz (as the file name suggests). For example, on a
// 2GHz warpstation, the file contains the value "2000000".
- return freq * 1000.0;
+ return static_cast<double>(freq) * 1000.0;
}
const double error_value = -1;
@@ -578,7 +686,7 @@
return error_value;
}
- auto startsWithKey = [](std::string const& Value, std::string const& Key) {
+ auto StartsWithKey = [](std::string const& Value, std::string const& Key) {
if (Key.size() > Value.size()) return false;
auto Cmp = [&](char X, char Y) {
return std::tolower(X) == std::tolower(Y);
@@ -589,18 +697,18 @@
std::string ln;
while (std::getline(f, ln)) {
if (ln.empty()) continue;
- size_t SplitIdx = ln.find(':');
+ std::size_t split_idx = ln.find(':');
std::string value;
- if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
+ if (split_idx != std::string::npos) value = ln.substr(split_idx + 1);
// When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
// accept positive values. Some environments (virtual machines) report zero,
// which would cause infinite looping in WallTime_Init.
- if (startsWithKey(ln, "cpu MHz")) {
+ if (StartsWithKey(ln, "cpu MHz")) {
if (!value.empty()) {
double cycles_per_second = benchmark::stod(value) * 1000000.0;
if (cycles_per_second > 0) return cycles_per_second;
}
- } else if (startsWithKey(ln, "bogomips")) {
+ } else if (StartsWithKey(ln, "bogomips")) {
if (!value.empty()) {
bogo_clock = benchmark::stod(value) * 1000000.0;
if (bogo_clock < 0.0) bogo_clock = error_value;
@@ -622,7 +730,7 @@
if (bogo_clock >= 0.0) return bogo_clock;
#elif defined BENCHMARK_HAS_SYSCTL
- constexpr auto* FreqStr =
+ constexpr auto* freqStr =
#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD)
"machdep.tsc_freq";
#elif defined BENCHMARK_OS_OPENBSD
@@ -634,14 +742,17 @@
#endif
unsigned long long hz = 0;
#if defined BENCHMARK_OS_OPENBSD
- if (GetSysctl(FreqStr, &hz)) return hz * 1000000;
+ if (GetSysctl(freqStr, &hz)) return hz * 1000000;
#else
- if (GetSysctl(FreqStr, &hz)) return hz;
+ if (GetSysctl(freqStr, &hz)) return hz;
#endif
fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
- FreqStr, strerror(errno));
+ freqStr, strerror(errno));
+ fprintf(stderr,
+ "This does not affect benchmark measurements, only the "
+ "metadata output.\n");
-#elif defined BENCHMARK_OS_WINDOWS
+#elif defined BENCHMARK_OS_WINDOWS_WIN32
// In NT, read MHz from the registry. If we fail to do so or we're in win9x
// then make a crude estimate.
DWORD data, data_size = sizeof(data);
@@ -650,15 +761,16 @@
SHGetValueA(HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"~MHz", nullptr, &data, &data_size)))
- return static_cast<double>((int64_t)data *
- (int64_t)(1000 * 1000)); // was mhz
+ return static_cast<double>(static_cast<int64_t>(data) *
+ static_cast<int64_t>(1000 * 1000)); // was mhz
#elif defined(BENCHMARK_OS_SOLARIS)
kstat_ctl_t* kc = kstat_open();
if (!kc) {
std::cerr << "failed to open /dev/kstat\n";
return -1;
}
- kstat_t* ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0");
+ kstat_t* ksp = kstat_lookup(kc, const_cast<char*>("cpu_info"), -1,
+ const_cast<char*>("cpu_info0"));
if (!ksp) {
std::cerr << "failed to lookup in /dev/kstat\n";
return -1;
@@ -667,8 +779,8 @@
std::cerr << "failed to read from /dev/kstat\n";
return -1;
}
- kstat_named_t* knp =
- (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz");
+ kstat_named_t* knp = (kstat_named_t*)kstat_data_lookup(
+ ksp, const_cast<char*>("current_clock_Hz"));
if (!knp) {
std::cerr << "failed to lookup data in /dev/kstat\n";
return -1;
@@ -682,22 +794,55 @@
kstat_close(kc);
return clock_hz;
#elif defined(BENCHMARK_OS_QNX)
- return static_cast<double>((int64_t)(SYSPAGE_ENTRY(cpuinfo)->speed) *
- (int64_t)(1000 * 1000));
+ return static_cast<double>(
+ static_cast<int64_t>(SYSPAGE_ENTRY(cpuinfo)->speed) *
+ static_cast<int64_t>(1000 * 1000));
+#elif defined(BENCHMARK_OS_QURT)
+ // QuRT doesn't provide any API to query Hexagon frequency.
+ return 1000000000;
#endif
// If we've fallen through, attempt to roughly estimate the CPU clock rate.
- const int estimate_time_ms = 1000;
+
+ // Make sure to use the same cycle counter when starting and stopping the
+ // cycle timer. We just pin the current thread to a cpu in the previous
+ // affinity set.
+ ThreadAffinityGuard affinity_guard;
+
+ static constexpr double estimate_time_s = 1.0;
+ const double start_time = ChronoClockNow();
const auto start_ticks = cycleclock::Now();
- SleepForMilliseconds(estimate_time_ms);
- return static_cast<double>(cycleclock::Now() - start_ticks);
+
+ // Impose load instead of calling sleep() to make sure the cycle counter
+ // works.
+ using PRNG = std::minstd_rand;
+ using Result = PRNG::result_type;
+ PRNG rng(static_cast<Result>(start_ticks));
+
+ Result state = 0;
+
+ do {
+ static constexpr size_t batch_size = 10000;
+ rng.discard(batch_size);
+ state += rng();
+
+ } while (ChronoClockNow() - start_time < estimate_time_s);
+
+ DoNotOptimize(state);
+
+ const auto end_ticks = cycleclock::Now();
+ const double end_time = ChronoClockNow();
+
+ return static_cast<double>(end_ticks - start_ticks) / (end_time - start_time);
+ // Reset the affinity of current thread when the lifetime of affinity_guard
+ // ends.
}
std::vector<double> GetLoadAvg() {
#if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \
defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \
defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \
- !defined(__ANDROID__)
- constexpr int kMaxSamples = 3;
+ !(defined(__ANDROID__) && __ANDROID_API__ < 29)
+ static constexpr int kMaxSamples = 3;
std::vector<double> res(kMaxSamples, 0.0);
const int nelem = getloadavg(res.data(), kMaxSamples);
if (nelem < 1) {
diff --git a/third-party/benchmark/src/thread_manager.h b/third-party/benchmark/src/thread_manager.h
index 4680285..819b3c4 100644
--- a/third-party/benchmark/src/thread_manager.h
+++ b/third-party/benchmark/src/thread_manager.h
@@ -43,8 +43,8 @@
double manual_time_used = 0;
int64_t complexity_n = 0;
std::string report_label_;
- std::string error_message_;
- bool has_error_ = false;
+ std::string skip_message_;
+ internal::Skipped skipped_ = internal::NotSkipped;
UserCounters counters;
};
GUARDED_BY(GetBenchmarkMutex()) Result results;
diff --git a/third-party/benchmark/src/timers.cc b/third-party/benchmark/src/timers.cc
index ed35c01..667e7b2 100644
--- a/third-party/benchmark/src/timers.cc
+++ b/third-party/benchmark/src/timers.cc
@@ -23,7 +23,7 @@
#include <windows.h>
#else
#include <fcntl.h>
-#ifndef BENCHMARK_OS_FUCHSIA
+#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -38,6 +38,9 @@
#include <mach/mach_port.h>
#include <mach/thread_act.h>
#endif
+#if defined(BENCHMARK_OS_QURT)
+#include <qurt.h>
+#endif
#endif
#ifdef BENCHMARK_OS_EMSCRIPTEN
@@ -56,7 +59,6 @@
#include "check.h"
#include "log.h"
-#include "sleep.h"
#include "string_util.h"
namespace benchmark {
@@ -65,6 +67,9 @@
#if defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wunused-function"
#endif
+#if defined(__NVCOMPILER)
+#pragma diag_suppress declared_but_not_referenced
+#endif
namespace {
#if defined(BENCHMARK_OS_WINDOWS)
@@ -79,7 +84,7 @@
static_cast<double>(user.QuadPart)) *
1e-7;
}
-#elif !defined(BENCHMARK_OS_FUCHSIA)
+#elif !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
double MakeTime(struct rusage const& ru) {
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
@@ -97,7 +102,8 @@
#endif
#if defined(CLOCK_PROCESS_CPUTIME_ID) || defined(CLOCK_THREAD_CPUTIME_ID)
double MakeTime(struct timespec const& ts) {
- return ts.tv_sec + (static_cast<double>(ts.tv_nsec) * 1e-9);
+ return static_cast<double>(ts.tv_sec) +
+ (static_cast<double>(ts.tv_nsec) * 1e-9);
}
#endif
@@ -119,11 +125,15 @@
&user_time))
return MakeTime(kernel_time, user_time);
DiagnoseAndExit("GetProccessTimes() failed");
+#elif defined(BENCHMARK_OS_QURT)
+ return static_cast<double>(
+ qurt_timer_timetick_to_us(qurt_timer_get_ticks())) *
+ 1.0e-6;
#elif defined(BENCHMARK_OS_EMSCRIPTEN)
// clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten.
// Use Emscripten-specific API. Reported CPU time would be exactly the
// same as total time, but this is ok because there aren't long-latency
- // syncronous system calls in Emscripten.
+ // synchronous system calls in Emscripten.
return emscripten_get_now() * 1e-3;
#elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
// FIXME We want to use clock_gettime, but its not available in MacOS 10.11.
@@ -149,6 +159,10 @@
GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time,
&user_time);
return MakeTime(kernel_time, user_time);
+#elif defined(BENCHMARK_OS_QURT)
+ return static_cast<double>(
+ qurt_timer_timetick_to_us(qurt_timer_get_ticks())) *
+ 1.0e-6;
#elif defined(BENCHMARK_OS_MACOSX)
// FIXME We want to use clock_gettime, but its not available in MacOS 10.11.
// See https://github.com/google/benchmark/pull/292
diff --git a/third-party/benchmark/test/AssemblyTests.cmake b/third-party/benchmark/test/AssemblyTests.cmake
index 3d07858..c43c711 100644
--- a/third-party/benchmark/test/AssemblyTests.cmake
+++ b/third-party/benchmark/test/AssemblyTests.cmake
@@ -1,3 +1,23 @@
+set(CLANG_SUPPORTED_VERSION "5.0.0")
+set(GCC_SUPPORTED_VERSION "5.5.0")
+
+if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${CLANG_SUPPORTED_VERSION})
+ message (WARNING
+ "Unsupported Clang version " ${CMAKE_CXX_COMPILER_VERSION}
+ ". Expected is " ${CLANG_SUPPORTED_VERSION}
+ ". Assembly tests may be broken.")
+ endif()
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+ if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${GCC_SUPPORTED_VERSION})
+ message (WARNING
+ "Unsupported GCC version " ${CMAKE_CXX_COMPILER_VERSION}
+ ". Expected is " ${GCC_SUPPORTED_VERSION}
+ ". Assembly tests may be broken.")
+ endif()
+else()
+ message (WARNING "Unsupported compiler. Assembly tests may be broken.")
+endif()
include(split_list)
@@ -23,6 +43,7 @@
macro(add_filecheck_test name)
cmake_parse_arguments(ARG "" "" "CHECK_PREFIXES" ${ARGV})
add_library(${name} OBJECT ${name}.cc)
+ target_link_libraries(${name} PRIVATE benchmark::benchmark)
set_target_properties(${name} PROPERTIES COMPILE_FLAGS "-S ${ASM_TEST_FLAGS}")
set(ASM_OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${name}.s")
add_custom_target(copy_${name} ALL
diff --git a/third-party/benchmark/test/CMakeLists.txt b/third-party/benchmark/test/CMakeLists.txt
index 162af53..1de175f 100644
--- a/third-party/benchmark/test/CMakeLists.txt
+++ b/third-party/benchmark/test/CMakeLists.txt
@@ -1,8 +1,12 @@
# Enable the tests
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
find_package(Threads REQUIRED)
include(CheckCXXCompilerFlag)
+add_cxx_compiler_flag(-Wno-unused-variable)
+
# NOTE: Some tests use `<cassert>` to perform the test. Therefore we must
# strip -DNDEBUG from the default CMake flags in DEBUG mode.
string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
@@ -22,6 +26,10 @@
endforeach()
endif()
+if (NOT BUILD_SHARED_LIBS)
+ add_definitions(-DBENCHMARK_STATIC_DEFINE)
+endif()
+
check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG)
set(BENCHMARK_O3_FLAG "")
if (BENCHMARK_HAS_O3_FLAG)
@@ -35,10 +43,14 @@
endif()
add_library(output_test_helper STATIC output_test_helper.cc output_test.h)
+target_link_libraries(output_test_helper PRIVATE benchmark::benchmark)
macro(compile_benchmark_test name)
add_executable(${name} "${name}.cc")
target_link_libraries(${name} benchmark::benchmark ${CMAKE_THREAD_LIBS_INIT})
+ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC")
+ target_compile_options( ${name} PRIVATE --diag_suppress partial_override )
+ endif()
endmacro(compile_benchmark_test)
macro(compile_benchmark_test_with_main name)
@@ -48,26 +60,43 @@
macro(compile_output_test name)
add_executable(${name} "${name}.cc" output_test.h)
- target_link_libraries(${name} output_test_helper benchmark::benchmark
+ target_link_libraries(${name} output_test_helper benchmark::benchmark_main
${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
endmacro(compile_output_test)
+macro(benchmark_add_test)
+ add_test(${ARGV})
+ if(WIN32 AND BUILD_SHARED_LIBS)
+ cmake_parse_arguments(TEST "" "NAME" "" ${ARGN})
+ set_tests_properties(${TEST_NAME} PROPERTIES ENVIRONMENT_MODIFICATION "PATH=path_list_prepend:$<TARGET_FILE_DIR:benchmark::benchmark>")
+ endif()
+endmacro(benchmark_add_test)
+
# Demonstration executable
compile_benchmark_test(benchmark_test)
-add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01s)
compile_benchmark_test(spec_arg_test)
-add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen)
+benchmark_add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen)
+
+compile_benchmark_test(spec_arg_verbosity_test)
+benchmark_add_test(NAME spec_arg_verbosity COMMAND spec_arg_verbosity_test --v=42)
compile_benchmark_test(benchmark_setup_teardown_test)
-add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test)
+benchmark_add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test)
compile_benchmark_test(filter_test)
macro(add_filter_test name filter expect)
- add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect})
- add_test(NAME ${name}_list_only COMMAND filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect})
+ benchmark_add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01s --benchmark_filter=${filter} ${expect})
+ benchmark_add_test(NAME ${name}_list_only COMMAND filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect})
endmacro(add_filter_test)
+compile_benchmark_test(benchmark_min_time_flag_time_test)
+benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_test)
+
+compile_benchmark_test(benchmark_min_time_flag_iters_test)
+benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test)
+
add_filter_test(filter_simple "Foo" 3)
add_filter_test(filter_simple_negative "-Foo" 2)
add_filter_test(filter_suffix "BM_.*" 4)
@@ -88,78 +117,83 @@
add_filter_test(filter_regex_end_negative "-.*Ba$" 4)
compile_benchmark_test(options_test)
-add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01s)
compile_benchmark_test(basic_test)
-add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01s)
compile_output_test(repetitions_test)
-add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01 --benchmark_repetitions=3)
+benchmark_add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01s --benchmark_repetitions=3)
compile_benchmark_test(diagnostics_test)
-add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01s)
compile_benchmark_test(skip_with_error_test)
-add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01s)
compile_benchmark_test(donotoptimize_test)
+# Enable errors for deprecated deprecations (DoNotOptimize(Tp const& value)).
+check_cxx_compiler_flag(-Werror=deprecated-declarations BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG)
+if (BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG)
+ target_compile_options (donotoptimize_test PRIVATE "-Werror=deprecated-declarations")
+endif()
# Some of the issues with DoNotOptimize only occur when optimization is enabled
check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG)
if (BENCHMARK_HAS_O3_FLAG)
set_target_properties(donotoptimize_test PROPERTIES COMPILE_FLAGS "-O3")
endif()
-add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01s)
compile_benchmark_test(fixture_test)
-add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01s)
compile_benchmark_test(register_benchmark_test)
-add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01s)
compile_benchmark_test(map_test)
-add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01s)
compile_benchmark_test(multiple_ranges_test)
-add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01s)
compile_benchmark_test(args_product_test)
-add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01s)
compile_benchmark_test_with_main(link_main_test)
-add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01s)
compile_output_test(reporter_output_test)
-add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01s)
compile_output_test(templated_fixture_test)
-add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01s)
compile_output_test(user_counters_test)
-add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01s)
compile_output_test(perf_counters_test)
-add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01 --benchmark_perf_counters=CYCLES,BRANCHES)
+benchmark_add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01s --benchmark_perf_counters=CYCLES,INSTRUCTIONS)
compile_output_test(internal_threading_test)
-add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01s)
compile_output_test(report_aggregates_only_test)
-add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01s)
compile_output_test(display_aggregates_only_test)
-add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01s)
compile_output_test(user_counters_tabular_test)
-add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01)
+benchmark_add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01s)
compile_output_test(user_counters_thousands_test)
-add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01s)
compile_output_test(memory_manager_test)
-add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01)
+benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s)
-check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG)
-if (BENCHMARK_HAS_CXX03_FLAG)
+# MSVC does not allow to set the language standard to C++98/03.
+if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
compile_benchmark_test(cxx03_test)
set_target_properties(cxx03_test
PROPERTIES
@@ -170,22 +204,22 @@
# causing the test to fail to compile. To prevent this we explicitly disable
# the warning.
check_cxx_compiler_flag(-Wno-odr BENCHMARK_HAS_WNO_ODR)
- if (BENCHMARK_ENABLE_LTO AND BENCHMARK_HAS_WNO_ODR)
- set_target_properties(cxx03_test
- PROPERTIES
- LINK_FLAGS "-Wno-odr")
+ check_cxx_compiler_flag(-Wno-lto-type-mismatch BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH)
+ # Cannot set_target_properties multiple times here because the warnings will
+ # be overwritten on each call
+ set (DISABLE_LTO_WARNINGS "")
+ if (BENCHMARK_HAS_WNO_ODR)
+ set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-odr")
endif()
- add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01)
+ if (BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH)
+ set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-lto-type-mismatch")
+ endif()
+ set_target_properties(cxx03_test PROPERTIES LINK_FLAGS "${DISABLE_LTO_WARNINGS}")
+ benchmark_add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01s)
endif()
-# Attempt to work around flaky test failures when running on Appveyor servers.
-if (DEFINED ENV{APPVEYOR})
- set(COMPLEXITY_MIN_TIME "0.5")
-else()
- set(COMPLEXITY_MIN_TIME "0.01")
-endif()
compile_output_test(complexity_test)
-add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=${COMPLEXITY_MIN_TIME})
+benchmark_add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=1000000x)
###############################################################################
# GoogleTest Unit Tests
@@ -200,7 +234,12 @@
macro(add_gtest name)
compile_gtest(${name})
- add_test(NAME ${name} COMMAND ${name})
+ benchmark_add_test(NAME ${name} COMMAND ${name})
+ if(WIN32 AND BUILD_SHARED_LIBS)
+ set_tests_properties(${name} PROPERTIES
+ ENVIRONMENT_MODIFICATION "PATH=path_list_prepend:$<TARGET_FILE_DIR:benchmark::benchmark>;PATH=path_list_prepend:$<TARGET_FILE_DIR:gmock_main>"
+ )
+ endif()
endmacro()
add_gtest(benchmark_gtest)
@@ -210,6 +249,8 @@
add_gtest(statistics_gtest)
add_gtest(string_util_gtest)
add_gtest(perf_counters_gtest)
+ add_gtest(time_unit_gtest)
+ add_gtest(min_time_parse_gtest)
endif(BENCHMARK_ENABLE_GTEST_TESTS)
###############################################################################
diff --git a/third-party/benchmark/test/args_product_test.cc b/third-party/benchmark/test/args_product_test.cc
index d44f391..63b8b71 100644
--- a/third-party/benchmark/test/args_product_test.cc
+++ b/third-party/benchmark/test/args_product_test.cc
@@ -23,7 +23,7 @@
{2, 15, 10, 9},
{4, 5, 6, 11}}) {}
- void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE {
+ void SetUp(const ::benchmark::State& state) override {
std::vector<int64_t> ranges = {state.range(0), state.range(1),
state.range(2), state.range(3)};
@@ -34,7 +34,7 @@
// NOTE: This is not TearDown as we want to check after _all_ runs are
// complete.
- virtual ~ArgsProductFixture() {
+ ~ArgsProductFixture() override {
if (actualValues != expectedValues) {
std::cout << "EXPECTED\n";
for (const auto& v : expectedValues) {
diff --git a/third-party/benchmark/test/basic_test.cc b/third-party/benchmark/test/basic_test.cc
index 3a8fd42..c25bec7 100644
--- a/third-party/benchmark/test/basic_test.cc
+++ b/third-party/benchmark/test/basic_test.cc
@@ -5,7 +5,8 @@
void BM_empty(benchmark::State& state) {
for (auto _ : state) {
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
}
BENCHMARK(BM_empty);
@@ -147,7 +148,7 @@
auto arg = state.range(0);
T sum = 0;
for (auto _ : state) {
- sum += arg;
+ sum += static_cast<T>(arg);
}
}
BENCHMARK(BM_OneTemplateFunc<int>)->Arg(1);
@@ -159,8 +160,8 @@
A sum = 0;
B prod = 1;
for (auto _ : state) {
- sum += arg;
- prod *= arg;
+ sum += static_cast<A>(arg);
+ prod *= static_cast<B>(arg);
}
}
BENCHMARK(BM_TwoTemplateFunc<int, double>)->Arg(1);
diff --git a/third-party/benchmark/test/benchmark_gtest.cc b/third-party/benchmark/test/benchmark_gtest.cc
index 14a885b..2c9e555 100644
--- a/third-party/benchmark/test/benchmark_gtest.cc
+++ b/third-party/benchmark/test/benchmark_gtest.cc
@@ -3,12 +3,12 @@
#include <vector>
#include "../src/benchmark_register.h"
+#include "benchmark/benchmark.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace benchmark {
namespace internal {
-extern std::map<std::string, std::string>* global_context;
namespace {
@@ -38,8 +38,9 @@
TEST(AddRangeTest, FullRange8) {
std::vector<int8_t> dst;
- AddRange(&dst, int8_t{1}, std::numeric_limits<int8_t>::max(), 8);
- EXPECT_THAT(dst, testing::ElementsAre(1, 8, 64, 127));
+ AddRange(&dst, int8_t{1}, std::numeric_limits<int8_t>::max(), int8_t{8});
+ EXPECT_THAT(
+ dst, testing::ElementsAre(int8_t{1}, int8_t{8}, int8_t{64}, int8_t{127}));
}
TEST(AddRangeTest, FullRange64) {
@@ -129,11 +130,13 @@
TEST(AddRangeTest, Simple8) {
std::vector<int8_t> dst;
- AddRange<int8_t>(&dst, 1, 8, 2);
- EXPECT_THAT(dst, testing::ElementsAre(1, 2, 4, 8));
+ AddRange<int8_t>(&dst, int8_t{1}, int8_t{8}, int8_t{2});
+ EXPECT_THAT(dst,
+ testing::ElementsAre(int8_t{1}, int8_t{2}, int8_t{4}, int8_t{8}));
}
TEST(AddCustomContext, Simple) {
+ std::map<std::string, std::string> *&global_context = GetGlobalContext();
EXPECT_THAT(global_context, nullptr);
AddCustomContext("foo", "bar");
@@ -148,6 +151,7 @@
}
TEST(AddCustomContext, DuplicateKey) {
+ std::map<std::string, std::string> *&global_context = GetGlobalContext();
EXPECT_THAT(global_context, nullptr);
AddCustomContext("foo", "bar");
diff --git a/third-party/benchmark/test/benchmark_min_time_flag_iters_test.cc b/third-party/benchmark/test/benchmark_min_time_flag_iters_test.cc
new file mode 100644
index 0000000..3de93a7
--- /dev/null
+++ b/third-party/benchmark/test/benchmark_min_time_flag_iters_test.cc
@@ -0,0 +1,66 @@
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+
+// Tests that we can specify the number of iterations with
+// --benchmark_min_time=<NUM>x.
+namespace {
+
+class TestReporter : public benchmark::ConsoleReporter {
+ public:
+ virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE {
+ return ConsoleReporter::ReportContext(context);
+ };
+
+ virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE {
+ assert(report.size() == 1);
+ iter_nums_.push_back(report[0].iterations);
+ ConsoleReporter::ReportRuns(report);
+ };
+
+ TestReporter() {}
+
+ virtual ~TestReporter() {}
+
+ const std::vector<benchmark::IterationCount>& GetIters() const {
+ return iter_nums_;
+ }
+
+ private:
+ std::vector<benchmark::IterationCount> iter_nums_;
+};
+
+} // end namespace
+
+static void BM_MyBench(benchmark::State& state) {
+ for (auto s : state) {
+ }
+}
+BENCHMARK(BM_MyBench);
+
+int main(int argc, char** argv) {
+ // Make a fake argv and append the new --benchmark_min_time=<foo> to it.
+ int fake_argc = argc + 1;
+ const char** fake_argv = new const char*[static_cast<size_t>(fake_argc)];
+ for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i];
+ fake_argv[argc] = "--benchmark_min_time=4x";
+
+ benchmark::Initialize(&fake_argc, const_cast<char**>(fake_argv));
+
+ TestReporter test_reporter;
+ const size_t returned_count =
+ benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench");
+ assert(returned_count == 1);
+
+ // Check the executed iters.
+ const std::vector<benchmark::IterationCount> iters = test_reporter.GetIters();
+ assert(!iters.empty() && iters[0] == 4);
+
+ delete[] fake_argv;
+ return 0;
+}
diff --git a/third-party/benchmark/test/benchmark_min_time_flag_time_test.cc b/third-party/benchmark/test/benchmark_min_time_flag_time_test.cc
new file mode 100644
index 0000000..04a82eb9
--- /dev/null
+++ b/third-party/benchmark/test/benchmark_min_time_flag_time_test.cc
@@ -0,0 +1,90 @@
+#include <cassert>
+#include <climits>
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+
+// Tests that we can specify the min time with
+// --benchmark_min_time=<NUM> (no suffix needed) OR
+// --benchmark_min_time=<NUM>s
+namespace {
+
+// This is from benchmark.h
+typedef int64_t IterationCount;
+
+class TestReporter : public benchmark::ConsoleReporter {
+ public:
+ virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE {
+ return ConsoleReporter::ReportContext(context);
+ };
+
+ virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE {
+ assert(report.size() == 1);
+ ConsoleReporter::ReportRuns(report);
+ };
+
+ virtual void ReportRunsConfig(double min_time, bool /* has_explicit_iters */,
+ IterationCount /* iters */) BENCHMARK_OVERRIDE {
+ min_times_.push_back(min_time);
+ }
+
+ TestReporter() {}
+
+ virtual ~TestReporter() {}
+
+ const std::vector<double>& GetMinTimes() const { return min_times_; }
+
+ private:
+ std::vector<double> min_times_;
+};
+
+bool AlmostEqual(double a, double b) {
+ return std::fabs(a - b) < std::numeric_limits<double>::epsilon();
+}
+
+void DoTestHelper(int* argc, const char** argv, double expected) {
+ benchmark::Initialize(argc, const_cast<char**>(argv));
+
+ TestReporter test_reporter;
+ const size_t returned_count =
+ benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench");
+ assert(returned_count == 1);
+
+ // Check the min_time
+ const std::vector<double>& min_times = test_reporter.GetMinTimes();
+ assert(!min_times.empty() && AlmostEqual(min_times[0], expected));
+}
+
+} // end namespace
+
+static void BM_MyBench(benchmark::State& state) {
+ for (auto s : state) {
+ }
+}
+BENCHMARK(BM_MyBench);
+
+int main(int argc, char** argv) {
+ // Make a fake argv and append the new --benchmark_min_time=<foo> to it.
+ int fake_argc = argc + 1;
+ const char** fake_argv = new const char*[static_cast<size_t>(fake_argc)];
+
+ for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i];
+
+ const char* no_suffix = "--benchmark_min_time=4";
+ const char* with_suffix = "--benchmark_min_time=4.0s";
+ double expected = 4.0;
+
+ fake_argv[argc] = no_suffix;
+ DoTestHelper(&fake_argc, fake_argv, expected);
+
+ fake_argv[argc] = with_suffix;
+ DoTestHelper(&fake_argc, fake_argv, expected);
+
+ delete[] fake_argv;
+ return 0;
+}
diff --git a/third-party/benchmark/test/benchmark_name_gtest.cc b/third-party/benchmark/test/benchmark_name_gtest.cc
index afb401c..0a6746d 100644
--- a/third-party/benchmark/test/benchmark_name_gtest.cc
+++ b/third-party/benchmark/test/benchmark_name_gtest.cc
@@ -32,6 +32,14 @@
EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_time:3.4s");
}
+TEST(BenchmarkNameTest, MinWarmUpTime) {
+ auto name = BenchmarkName();
+ name.function_name = "function_name";
+ name.args = "some_args:3/4";
+ name.min_warmup_time = "min_warmup_time:3.5s";
+ EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_warmup_time:3.5s");
+}
+
TEST(BenchmarkNameTest, Iterations) {
auto name = BenchmarkName();
name.function_name = "function_name";
diff --git a/third-party/benchmark/test/benchmark_random_interleaving_gtest.cc b/third-party/benchmark/test/benchmark_random_interleaving_gtest.cc
index d04befa..7f20867 100644
--- a/third-party/benchmark/test/benchmark_random_interleaving_gtest.cc
+++ b/third-party/benchmark/test/benchmark_random_interleaving_gtest.cc
@@ -51,10 +51,9 @@
void Execute(const std::string& pattern) {
queue->Clear();
- BenchmarkReporter* reporter = new NullReporter;
+ std::unique_ptr<BenchmarkReporter> reporter(new NullReporter());
FLAGS_benchmark_filter = pattern;
- RunSpecifiedBenchmarks(reporter);
- delete reporter;
+ RunSpecifiedBenchmarks(reporter.get());
queue->Put("DONE"); // End marker
}
diff --git a/third-party/benchmark/test/benchmark_setup_teardown_test.cc b/third-party/benchmark/test/benchmark_setup_teardown_test.cc
index efa34e1..6c3cc2e 100644
--- a/third-party/benchmark/test/benchmark_setup_teardown_test.cc
+++ b/third-party/benchmark/test/benchmark_setup_teardown_test.cc
@@ -10,19 +10,19 @@
// Test that Setup() and Teardown() are called exactly once
// for each benchmark run (single-threaded).
-namespace single {
+namespace singlethreaded {
static int setup_call = 0;
static int teardown_call = 0;
-} // namespace single
+} // namespace singlethreaded
static void DoSetup1(const benchmark::State& state) {
- ++single::setup_call;
+ ++singlethreaded::setup_call;
// Setup/Teardown should never be called with any thread_idx != 0.
assert(state.thread_index() == 0);
}
static void DoTeardown1(const benchmark::State& state) {
- ++single::teardown_call;
+ ++singlethreaded::teardown_call;
assert(state.thread_index() == 0);
}
@@ -80,11 +80,11 @@
class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture {
public:
- void SetUp(const ::benchmark::State&) BENCHMARK_OVERRIDE {
+ void SetUp(const ::benchmark::State&) override {
fixture_interaction::fixture_setup++;
}
- ~FIXTURE_BECHMARK_NAME() {}
+ ~FIXTURE_BECHMARK_NAME() override {}
};
BENCHMARK_F(FIXTURE_BECHMARK_NAME, BM_WithFixture)(benchmark::State& st) {
@@ -134,8 +134,8 @@
assert(ret > 0);
// Setup/Teardown is called once for each arg group (1,3,5,7).
- assert(single::setup_call == 4);
- assert(single::teardown_call == 4);
+ assert(singlethreaded::setup_call == 4);
+ assert(singlethreaded::teardown_call == 4);
// 3 group of threads calling this function (3,5,10).
assert(concurrent::setup_call.load(std::memory_order_relaxed) == 3);
@@ -145,7 +145,7 @@
// Setup is called 4 times, once for each arg group (1,3,5,7)
assert(fixture_interaction::setup == 4);
- // Fixture::Setup is called everytime the bm routine is run.
+ // Fixture::Setup is called every time the bm routine is run.
// The exact number is indeterministic, so we just assert that
// it's more than setup.
assert(fixture_interaction::fixture_setup > fixture_interaction::setup);
diff --git a/third-party/benchmark/test/benchmark_test.cc b/third-party/benchmark/test/benchmark_test.cc
index 2906cdc..8b14017 100644
--- a/third-party/benchmark/test/benchmark_test.cc
+++ b/third-party/benchmark/test/benchmark_test.cc
@@ -5,6 +5,7 @@
#include <stdint.h>
#include <chrono>
+#include <complex>
#include <cstdlib>
#include <iostream>
#include <limits>
@@ -15,6 +16,7 @@
#include <sstream>
#include <string>
#include <thread>
+#include <type_traits>
#include <utility>
#include <vector>
@@ -26,7 +28,7 @@
namespace {
-int BENCHMARK_NOINLINE Factorial(uint32_t n) {
+int BENCHMARK_NOINLINE Factorial(int n) {
return (n == 1) ? 1 : n * Factorial(n - 1);
}
@@ -74,7 +76,8 @@
static void BM_CalculatePi(benchmark::State& state) {
static const int depth = 1024;
for (auto _ : state) {
- benchmark::DoNotOptimize(CalculatePi(static_cast<int>(depth)));
+ double pi = CalculatePi(static_cast<int>(depth));
+ benchmark::DoNotOptimize(pi);
}
}
BENCHMARK(BM_CalculatePi)->Threads(8);
@@ -90,7 +93,8 @@
for (int j = 0; j < state.range(1); ++j) data.insert(rand());
}
state.SetItemsProcessed(state.iterations() * state.range(1));
- state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int));
+ state.SetBytesProcessed(state.iterations() * state.range(1) *
+ static_cast<int64_t>(sizeof(int)));
}
// Test many inserts at once to reduce the total iterations needed. Otherwise,
@@ -108,7 +112,7 @@
}
const int64_t items_processed = state.iterations() * state.range(0);
state.SetItemsProcessed(items_processed);
- state.SetBytesProcessed(items_processed * sizeof(v));
+ state.SetBytesProcessed(items_processed * static_cast<int64_t>(sizeof(v)));
}
BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int)
->Range(1 << 0, 1 << 10);
@@ -122,7 +126,10 @@
size_t len = static_cast<size_t>(state.range(0));
std::string s1(len, '-');
std::string s2(len, '-');
- for (auto _ : state) benchmark::DoNotOptimize(s1.compare(s2));
+ for (auto _ : state) {
+ auto comp = s1.compare(s2);
+ benchmark::DoNotOptimize(comp);
+ }
}
BENCHMARK(BM_StringCompare)->Range(1, 1 << 20);
@@ -169,7 +176,7 @@
for (int i = from; i < to; i++) {
// No need to lock test_vector_mu as ranges
// do not overlap between threads.
- benchmark::DoNotOptimize(test_vector->at(i) = 1);
+ benchmark::DoNotOptimize(test_vector->at(static_cast<size_t>(i)) = 1);
}
}
@@ -220,6 +227,31 @@
}
BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0);
+template <class T, class U, class... ExtraArgs>
+void BM_template2_capture(benchmark::State& state, ExtraArgs&&... extra_args) {
+ static_assert(std::is_same<T, void>::value, "");
+ static_assert(std::is_same<U, char*>::value, "");
+ static_assert(std::is_same<ExtraArgs..., unsigned int>::value, "");
+ unsigned int dummy[sizeof...(ExtraArgs)] = {extra_args...};
+ assert(dummy[0] == 42);
+ for (auto _ : state) {
+ }
+}
+BENCHMARK_TEMPLATE2_CAPTURE(BM_template2_capture, void, char*, foo, 42U);
+BENCHMARK_CAPTURE((BM_template2_capture<void, char*>), foo, 42U);
+
+template <class T, class... ExtraArgs>
+void BM_template1_capture(benchmark::State& state, ExtraArgs&&... extra_args) {
+ static_assert(std::is_same<T, void>::value, "");
+ static_assert(std::is_same<ExtraArgs..., unsigned long>::value, "");
+ unsigned long dummy[sizeof...(ExtraArgs)] = {extra_args...};
+ assert(dummy[0] == 24);
+ for (auto _ : state) {
+ }
+}
+BENCHMARK_TEMPLATE1_CAPTURE(BM_template1_capture, void, foo, 24UL);
+BENCHMARK_CAPTURE(BM_template1_capture<void>, foo, 24UL);
+
#endif // BENCHMARK_HAS_CXX11
static void BM_DenseThreadRanges(benchmark::State& st) {
@@ -244,4 +276,25 @@
BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2);
BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3);
+static void BM_BenchmarkName(benchmark::State& state) {
+ for (auto _ : state) {
+ }
+
+ // Check that the benchmark name is passed correctly to `state`.
+ assert("BM_BenchmarkName" == state.name());
+}
+BENCHMARK(BM_BenchmarkName);
+
+// regression test for #1446
+template <typename type>
+static void BM_templated_test(benchmark::State& state) {
+ for (auto _ : state) {
+ type created_string;
+ benchmark::DoNotOptimize(created_string);
+ }
+}
+
+static auto BM_templated_test_double = BM_templated_test<std::complex<double>>;
+BENCHMARK(BM_templated_test_double);
+
BENCHMARK_MAIN();
diff --git a/third-party/benchmark/test/clobber_memory_assembly_test.cc b/third-party/benchmark/test/clobber_memory_assembly_test.cc
index ab26913..54e26cc 100644
--- a/third-party/benchmark/test/clobber_memory_assembly_test.cc
+++ b/third-party/benchmark/test/clobber_memory_assembly_test.cc
@@ -3,6 +3,7 @@
#ifdef __clang__
#pragma clang diagnostic ignored "-Wreturn-type"
#endif
+BENCHMARK_DISABLE_DEPRECATED_WARNING
extern "C" {
diff --git a/third-party/benchmark/test/complexity_test.cc b/third-party/benchmark/test/complexity_test.cc
index ea268b5..fb4ad1a 100644
--- a/third-party/benchmark/test/complexity_test.cc
+++ b/third-party/benchmark/test/complexity_test.cc
@@ -26,7 +26,7 @@
AddCases(
TC_ConsoleOut,
{{"^%bigo_name %bigo_str %bigo_str[ ]*$"},
- {"^%bigo_name", MR_Not}, // Assert we didn't only matched a name.
+ {"^%bigo_name", MR_Not}, // Assert we we didn't only matched a name.
{"^%rms_name %rms %rms[ ]*$", MR_Next}});
AddCases(
TC_JSONOut,
@@ -69,35 +69,44 @@
void BM_Complexity_O1(benchmark::State &state) {
for (auto _ : state) {
- for (int i = 0; i < 1024; ++i) {
- benchmark::DoNotOptimize(&i);
+ // This test requires a non-zero CPU time to avoid divide-by-zero
+ benchmark::DoNotOptimize(state.iterations());
+ long tmp = state.iterations();
+ benchmark::DoNotOptimize(tmp);
+ for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
+ benchmark::DoNotOptimize(state.iterations());
+ tmp *= state.iterations();
+ benchmark::DoNotOptimize(tmp);
}
+
+ // always 1ns per iteration
+ state.SetIterationTime(42 * 1e-9);
}
state.SetComplexityN(state.range(0));
}
-BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(benchmark::o1);
-BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity();
BENCHMARK(BM_Complexity_O1)
->Range(1, 1 << 18)
+ ->UseManualTime()
+ ->Complexity(benchmark::o1);
+BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->UseManualTime()->Complexity();
+BENCHMARK(BM_Complexity_O1)
+ ->Range(1, 1 << 18)
+ ->UseManualTime()
->Complexity([](benchmark::IterationCount) { return 1.0; });
-const char *one_test_name = "BM_Complexity_O1";
-const char *big_o_1_test_name = "BM_Complexity_O1_BigO";
-const char *rms_o_1_test_name = "BM_Complexity_O1_RMS";
-const char *enum_big_o_1 = "\\([0-9]+\\)";
-// FIXME: Tolerate both '(1)' and 'lgN' as output when the complexity is auto
-// deduced.
-// See https://github.com/google/benchmark/issues/272
-const char *auto_big_o_1 = "(\\([0-9]+\\))|(lgN)";
+const char *one_test_name = "BM_Complexity_O1/manual_time";
+const char *big_o_1_test_name = "BM_Complexity_O1/manual_time_BigO";
+const char *rms_o_1_test_name = "BM_Complexity_O1/manual_time_RMS";
+const char *enum_auto_big_o_1 = "\\([0-9]+\\)";
const char *lambda_big_o_1 = "f\\(N\\)";
// Add enum tests
ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
- enum_big_o_1, /*family_index=*/0);
+ enum_auto_big_o_1, /*family_index=*/0);
-// Add auto enum tests
+// Add auto tests
ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
- auto_big_o_1, /*family_index=*/1);
+ enum_auto_big_o_1, /*family_index=*/1);
// Add lambda tests
ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
@@ -107,42 +116,44 @@
// --------------------------- Testing BigO O(N) --------------------------- //
// ========================================================================= //
-std::vector<int> ConstructRandomVector(int64_t size) {
- std::vector<int> v;
- v.reserve(static_cast<int>(size));
- for (int i = 0; i < size; ++i) {
- v.push_back(static_cast<int>(std::rand() % size));
- }
- return v;
-}
-
void BM_Complexity_O_N(benchmark::State &state) {
- auto v = ConstructRandomVector(state.range(0));
- // Test worst case scenario (item not in vector)
- const int64_t item_not_in_vector = state.range(0) * 2;
for (auto _ : state) {
- benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector));
+ // This test requires a non-zero CPU time to avoid divide-by-zero
+ benchmark::DoNotOptimize(state.iterations());
+ long tmp = state.iterations();
+ benchmark::DoNotOptimize(tmp);
+ for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
+ benchmark::DoNotOptimize(state.iterations());
+ tmp *= state.iterations();
+ benchmark::DoNotOptimize(tmp);
+ }
+
+ // 1ns per iteration per entry
+ state.SetIterationTime(static_cast<double>(state.range(0)) * 42.0 * 1e-9);
}
state.SetComplexityN(state.range(0));
}
BENCHMARK(BM_Complexity_O_N)
->RangeMultiplier(2)
- ->Range(1 << 10, 1 << 16)
+ ->Range(1 << 10, 1 << 20)
+ ->UseManualTime()
->Complexity(benchmark::oN);
BENCHMARK(BM_Complexity_O_N)
->RangeMultiplier(2)
- ->Range(1 << 10, 1 << 16)
+ ->Range(1 << 10, 1 << 20)
+ ->UseManualTime()
+ ->Complexity();
+BENCHMARK(BM_Complexity_O_N)
+ ->RangeMultiplier(2)
+ ->Range(1 << 10, 1 << 20)
+ ->UseManualTime()
->Complexity([](benchmark::IterationCount n) -> double {
return static_cast<double>(n);
});
-BENCHMARK(BM_Complexity_O_N)
- ->RangeMultiplier(2)
- ->Range(1 << 10, 1 << 16)
- ->Complexity();
-const char *n_test_name = "BM_Complexity_O_N";
-const char *big_o_n_test_name = "BM_Complexity_O_N_BigO";
-const char *rms_o_n_test_name = "BM_Complexity_O_N_RMS";
+const char *n_test_name = "BM_Complexity_O_N/manual_time";
+const char *big_o_n_test_name = "BM_Complexity_O_N/manual_time_BigO";
+const char *rms_o_n_test_name = "BM_Complexity_O_N/manual_time_RMS";
const char *enum_auto_big_o_n = "N";
const char *lambda_big_o_n = "f\\(N\\)";
@@ -150,40 +161,57 @@
ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name,
enum_auto_big_o_n, /*family_index=*/3);
+// Add auto tests
+ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name,
+ enum_auto_big_o_n, /*family_index=*/4);
+
// Add lambda tests
ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name,
- lambda_big_o_n, /*family_index=*/4);
+ lambda_big_o_n, /*family_index=*/5);
// ========================================================================= //
-// ------------------------- Testing BigO O(N*lgN) ------------------------- //
+// ------------------------- Testing BigO O(NlgN) ------------------------- //
// ========================================================================= //
+static const double kLog2E = 1.44269504088896340736;
static void BM_Complexity_O_N_log_N(benchmark::State &state) {
- auto v = ConstructRandomVector(state.range(0));
for (auto _ : state) {
- std::sort(v.begin(), v.end());
+ // This test requires a non-zero CPU time to avoid divide-by-zero
+ benchmark::DoNotOptimize(state.iterations());
+ long tmp = state.iterations();
+ benchmark::DoNotOptimize(tmp);
+ for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
+ benchmark::DoNotOptimize(state.iterations());
+ tmp *= state.iterations();
+ benchmark::DoNotOptimize(tmp);
+ }
+
+ state.SetIterationTime(static_cast<double>(state.range(0)) * kLog2E *
+ std::log(state.range(0)) * 42.0 * 1e-9);
}
state.SetComplexityN(state.range(0));
}
-static const double kLog2E = 1.44269504088896340736;
BENCHMARK(BM_Complexity_O_N_log_N)
->RangeMultiplier(2)
- ->Range(1 << 10, 1 << 16)
+ ->Range(1 << 10, 1U << 24)
+ ->UseManualTime()
->Complexity(benchmark::oNLogN);
BENCHMARK(BM_Complexity_O_N_log_N)
->RangeMultiplier(2)
- ->Range(1 << 10, 1 << 16)
- ->Complexity([](benchmark::IterationCount n) {
- return kLog2E * n * log(static_cast<double>(n));
- });
+ ->Range(1 << 10, 1U << 24)
+ ->UseManualTime()
+ ->Complexity();
BENCHMARK(BM_Complexity_O_N_log_N)
->RangeMultiplier(2)
- ->Range(1 << 10, 1 << 16)
- ->Complexity();
+ ->Range(1 << 10, 1U << 24)
+ ->UseManualTime()
+ ->Complexity([](benchmark::IterationCount n) {
+ return kLog2E * static_cast<double>(n) * std::log(static_cast<double>(n));
+ });
-const char *n_lg_n_test_name = "BM_Complexity_O_N_log_N";
-const char *big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_BigO";
-const char *rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_RMS";
+const char *n_lg_n_test_name = "BM_Complexity_O_N_log_N/manual_time";
+const char *big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N/manual_time_BigO";
+const char *rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N/manual_time_RMS";
const char *enum_auto_big_o_n_lg_n = "NlgN";
const char *lambda_big_o_n_lg_n = "f\\(N\\)";
@@ -192,10 +220,15 @@
rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n,
/*family_index=*/6);
-// Add lambda tests
+// NOTE: auto big-o is wron.g
+ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name,
+ rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n,
+ /*family_index=*/7);
+
+//// Add lambda tests
ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name,
rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n,
- /*family_index=*/7);
+ /*family_index=*/8);
// ========================================================================= //
// -------- Testing formatting of Complexity with captured args ------------ //
@@ -205,19 +238,30 @@
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
benchmark::DoNotOptimize(state.iterations());
+ long tmp = state.iterations();
+ benchmark::DoNotOptimize(tmp);
+ for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
+ benchmark::DoNotOptimize(state.iterations());
+ tmp *= state.iterations();
+ benchmark::DoNotOptimize(tmp);
+ }
+
+ state.SetIterationTime(static_cast<double>(state.range(0)) * 42.0 * 1e-9);
}
state.SetComplexityN(n);
}
BENCHMARK_CAPTURE(BM_ComplexityCaptureArgs, capture_test, 100)
+ ->UseManualTime()
->Complexity(benchmark::oN)
->Ranges({{1, 2}, {3, 4}});
const std::string complexity_capture_name =
- "BM_ComplexityCaptureArgs/capture_test";
+ "BM_ComplexityCaptureArgs/capture_test/manual_time";
ADD_COMPLEXITY_CASES(complexity_capture_name, complexity_capture_name + "_BigO",
- complexity_capture_name + "_RMS", "N", /*family_index=*/9);
+ complexity_capture_name + "_RMS", "N",
+ /*family_index=*/9);
// ========================================================================= //
// --------------------------- TEST CASES END ------------------------------ //
diff --git a/third-party/benchmark/test/diagnostics_test.cc b/third-party/benchmark/test/diagnostics_test.cc
index c54d5b0..7c68a98 100644
--- a/third-party/benchmark/test/diagnostics_test.cc
+++ b/third-party/benchmark/test/diagnostics_test.cc
@@ -49,7 +49,8 @@
if (called_once == false) try_invalid_pause_resume(state);
for (auto _ : state) {
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
if (called_once == false) try_invalid_pause_resume(state);
@@ -64,7 +65,8 @@
if (called_once == false) try_invalid_pause_resume(state);
while (state.KeepRunning()) {
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
if (called_once == false) try_invalid_pause_resume(state);
@@ -74,7 +76,16 @@
BENCHMARK(BM_diagnostic_test_keep_running);
int main(int argc, char* argv[]) {
+#ifdef NDEBUG
+ // This test is exercising functionality for debug builds, which are not
+ // available in release builds. Skip the test if we are in that environment
+ // to avoid a test failure.
+ std::cout << "Diagnostic test disabled in release build" << std::endl;
+ (void)argc;
+ (void)argv;
+#else
benchmark::internal::GetAbortHandler() = &TestHandler;
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
+#endif
}
diff --git a/third-party/benchmark/test/donotoptimize_assembly_test.cc b/third-party/benchmark/test/donotoptimize_assembly_test.cc
index 2e86a51..dc286f5 100644
--- a/third-party/benchmark/test/donotoptimize_assembly_test.cc
+++ b/third-party/benchmark/test/donotoptimize_assembly_test.cc
@@ -3,12 +3,16 @@
#ifdef __clang__
#pragma clang diagnostic ignored "-Wreturn-type"
#endif
+BENCHMARK_DISABLE_DEPRECATED_WARNING
extern "C" {
extern int ExternInt;
extern int ExternInt2;
extern int ExternInt3;
+extern int BigArray[2049];
+
+const int ConstBigArray[2049]{};
inline int Add42(int x) { return x + 42; }
@@ -23,7 +27,15 @@
int value;
int data[2];
};
+
+struct ExtraLarge {
+ int arr[2049];
+};
}
+
+extern ExtraLarge ExtraLargeObj;
+const ExtraLarge ConstExtraLargeObj{};
+
// CHECK-LABEL: test_with_rvalue:
extern "C" void test_with_rvalue() {
benchmark::DoNotOptimize(Add42(0));
@@ -68,6 +80,22 @@
// CHECK: ret
}
+// CHECK-LABEL: test_with_extra_large_lvalue_with_op:
+extern "C" void test_with_extra_large_lvalue_with_op() {
+ ExtraLargeObj.arr[16] = 42;
+ benchmark::DoNotOptimize(ExtraLargeObj);
+ // CHECK: movl $42, ExtraLargeObj+64(%rip)
+ // CHECK: ret
+}
+
+// CHECK-LABEL: test_with_big_array_with_op
+extern "C" void test_with_big_array_with_op() {
+ BigArray[16] = 42;
+ benchmark::DoNotOptimize(BigArray);
+ // CHECK: movl $42, BigArray+64(%rip)
+ // CHECK: ret
+}
+
// CHECK-LABEL: test_with_non_trivial_lvalue:
extern "C" void test_with_non_trivial_lvalue() {
NotTriviallyCopyable NTC(ExternInt);
@@ -96,6 +124,18 @@
// CHECK: ret
}
+// CHECK-LABEL: test_with_const_extra_large_obj:
+extern "C" void test_with_const_extra_large_obj() {
+ benchmark::DoNotOptimize(ConstExtraLargeObj);
+ // CHECK: ret
+}
+
+// CHECK-LABEL: test_with_const_big_array
+extern "C" void test_with_const_big_array() {
+ benchmark::DoNotOptimize(ConstBigArray);
+ // CHECK: ret
+}
+
// CHECK-LABEL: test_with_non_trivial_const_lvalue:
extern "C" void test_with_non_trivial_const_lvalue() {
const NotTriviallyCopyable Obj(ExternInt);
diff --git a/third-party/benchmark/test/donotoptimize_test.cc b/third-party/benchmark/test/donotoptimize_test.cc
index c321f15..04ec938 100644
--- a/third-party/benchmark/test/donotoptimize_test.cc
+++ b/third-party/benchmark/test/donotoptimize_test.cc
@@ -4,9 +4,9 @@
namespace {
#if defined(__GNUC__)
-std::uint64_t double_up(const std::uint64_t x) __attribute__((const));
+std::int64_t double_up(const std::int64_t x) __attribute__((const));
#endif
-std::uint64_t double_up(const std::uint64_t x) { return x * 2; }
+std::int64_t double_up(const std::int64_t x) { return x * 2; }
} // namespace
// Using DoNotOptimize on types like BitRef seem to cause a lot of problems
@@ -29,6 +29,15 @@
int main(int, char*[]) {
// this test verifies compilation of DoNotOptimize() for some types
+ char buffer1[1] = "";
+ benchmark::DoNotOptimize(buffer1);
+
+ char buffer2[2] = "";
+ benchmark::DoNotOptimize(buffer2);
+
+ char buffer3[3] = "";
+ benchmark::DoNotOptimize(buffer3);
+
char buffer8[8] = "";
benchmark::DoNotOptimize(buffer8);
@@ -37,17 +46,24 @@
char buffer1024[1024] = "";
benchmark::DoNotOptimize(buffer1024);
- benchmark::DoNotOptimize(&buffer1024[0]);
+ char* bptr = &buffer1024[0];
+ benchmark::DoNotOptimize(bptr);
int x = 123;
benchmark::DoNotOptimize(x);
- benchmark::DoNotOptimize(&x);
+ int* xp = &x;
+ benchmark::DoNotOptimize(xp);
benchmark::DoNotOptimize(x += 42);
- benchmark::DoNotOptimize(double_up(x));
+ std::int64_t y = double_up(x);
+ benchmark::DoNotOptimize(y);
// These tests are to e
- benchmark::DoNotOptimize(BitRef::Make());
BitRef lval = BitRef::Make();
benchmark::DoNotOptimize(lval);
+
+#ifdef BENCHMARK_HAS_CXX11
+ // Check that accept rvalue.
+ benchmark::DoNotOptimize(BitRef::Make());
+#endif
}
diff --git a/third-party/benchmark/test/filter_test.cc b/third-party/benchmark/test/filter_test.cc
index a567de2..4c8b8ea 100644
--- a/third-party/benchmark/test/filter_test.cc
+++ b/third-party/benchmark/test/filter_test.cc
@@ -14,28 +14,27 @@
class TestReporter : public benchmark::ConsoleReporter {
public:
- virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE {
+ bool ReportContext(const Context& context) override {
return ConsoleReporter::ReportContext(context);
};
- virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE {
+ void ReportRuns(const std::vector<Run>& report) override {
++count_;
- max_family_index_ =
- std::max<size_t>(max_family_index_, report[0].family_index);
+ max_family_index_ = std::max(max_family_index_, report[0].family_index);
ConsoleReporter::ReportRuns(report);
};
TestReporter() : count_(0), max_family_index_(0) {}
- virtual ~TestReporter() {}
+ ~TestReporter() override {}
- size_t GetCount() const { return count_; }
+ int GetCount() const { return count_; }
- size_t GetMaxFamilyIndex() const { return max_family_index_; }
+ int64_t GetMaxFamilyIndex() const { return max_family_index_; }
private:
- mutable size_t count_;
- mutable size_t max_family_index_;
+ mutable int count_;
+ mutable int64_t max_family_index_;
};
} // end namespace
@@ -79,13 +78,13 @@
benchmark::Initialize(&argc, argv);
TestReporter test_reporter;
- const size_t returned_count =
- benchmark::RunSpecifiedBenchmarks(&test_reporter);
+ const int64_t returned_count =
+ static_cast<int64_t>(benchmark::RunSpecifiedBenchmarks(&test_reporter));
if (argc == 2) {
// Make sure we ran all of the tests
std::stringstream ss(argv[1]);
- size_t expected_return;
+ int64_t expected_return;
ss >> expected_return;
if (returned_count != expected_return) {
@@ -95,8 +94,8 @@
return -1;
}
- const size_t expected_reports = list_only ? 0 : expected_return;
- const size_t reports_count = test_reporter.GetCount();
+ const int64_t expected_reports = list_only ? 0 : expected_return;
+ const int64_t reports_count = test_reporter.GetCount();
if (reports_count != expected_reports) {
std::cerr << "ERROR: Expected " << expected_reports
<< " tests to be run but reported_count = " << reports_count
@@ -104,8 +103,8 @@
return -1;
}
- const size_t max_family_index = test_reporter.GetMaxFamilyIndex();
- const size_t num_families = reports_count == 0 ? 0 : 1 + max_family_index;
+ const int64_t max_family_index = test_reporter.GetMaxFamilyIndex();
+ const int64_t num_families = reports_count == 0 ? 0 : 1 + max_family_index;
if (num_families != expected_reports) {
std::cerr << "ERROR: Expected " << expected_reports
<< " test families to be run but num_families = "
diff --git a/third-party/benchmark/test/fixture_test.cc b/third-party/benchmark/test/fixture_test.cc
index af650db..d1093eb 100644
--- a/third-party/benchmark/test/fixture_test.cc
+++ b/third-party/benchmark/test/fixture_test.cc
@@ -8,21 +8,21 @@
class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture {
public:
- void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE {
+ void SetUp(const ::benchmark::State& state) override {
if (state.thread_index() == 0) {
assert(data.get() == nullptr);
data.reset(new int(42));
}
}
- void TearDown(const ::benchmark::State& state) BENCHMARK_OVERRIDE {
+ void TearDown(const ::benchmark::State& state) override {
if (state.thread_index() == 0) {
assert(data.get() != nullptr);
data.reset();
}
}
- ~FIXTURE_BECHMARK_NAME() { assert(data == nullptr); }
+ ~FIXTURE_BECHMARK_NAME() override { assert(data == nullptr); }
std::unique_ptr<int> data;
};
diff --git a/third-party/benchmark/test/link_main_test.cc b/third-party/benchmark/test/link_main_test.cc
index 241ad5c..131937e 100644
--- a/third-party/benchmark/test/link_main_test.cc
+++ b/third-party/benchmark/test/link_main_test.cc
@@ -2,7 +2,8 @@
void BM_empty(benchmark::State& state) {
for (auto _ : state) {
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
}
BENCHMARK(BM_empty);
diff --git a/third-party/benchmark/test/map_test.cc b/third-party/benchmark/test/map_test.cc
index 5096134..0fdba7c 100644
--- a/third-party/benchmark/test/map_test.cc
+++ b/third-party/benchmark/test/map_test.cc
@@ -24,7 +24,8 @@
m = ConstructRandomMap(size);
state.ResumeTiming();
for (int i = 0; i < size; ++i) {
- benchmark::DoNotOptimize(m.find(std::rand() % size));
+ auto it = m.find(std::rand() % size);
+ benchmark::DoNotOptimize(it);
}
}
state.SetItemsProcessed(state.iterations() * size);
@@ -34,11 +35,11 @@
// Using fixtures.
class MapFixture : public ::benchmark::Fixture {
public:
- void SetUp(const ::benchmark::State& st) BENCHMARK_OVERRIDE {
+ void SetUp(const ::benchmark::State& st) override {
m = ConstructRandomMap(static_cast<int>(st.range(0)));
}
- void TearDown(const ::benchmark::State&) BENCHMARK_OVERRIDE { m.clear(); }
+ void TearDown(const ::benchmark::State&) override { m.clear(); }
std::map<int, int> m;
};
@@ -47,7 +48,8 @@
const int size = static_cast<int>(state.range(0));
for (auto _ : state) {
for (int i = 0; i < size; ++i) {
- benchmark::DoNotOptimize(m.find(std::rand() % size));
+ auto it = m.find(std::rand() % size);
+ benchmark::DoNotOptimize(it);
}
}
state.SetItemsProcessed(state.iterations() * size);
diff --git a/third-party/benchmark/test/memory_manager_test.cc b/third-party/benchmark/test/memory_manager_test.cc
index f0c192f..4df674d 100644
--- a/third-party/benchmark/test/memory_manager_test.cc
+++ b/third-party/benchmark/test/memory_manager_test.cc
@@ -5,16 +5,17 @@
#include "output_test.h"
class TestMemoryManager : public benchmark::MemoryManager {
- void Start() BENCHMARK_OVERRIDE {}
- void Stop(Result* result) BENCHMARK_OVERRIDE {
- result->num_allocs = 42;
- result->max_bytes_used = 42000;
+ void Start() override {}
+ void Stop(Result& result) override {
+ result.num_allocs = 42;
+ result.max_bytes_used = 42000;
}
};
void BM_empty(benchmark::State& state) {
for (auto _ : state) {
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
}
BENCHMARK(BM_empty);
diff --git a/third-party/benchmark/test/min_time_parse_gtest.cc b/third-party/benchmark/test/min_time_parse_gtest.cc
new file mode 100644
index 0000000..e2bdf67
--- /dev/null
+++ b/third-party/benchmark/test/min_time_parse_gtest.cc
@@ -0,0 +1,30 @@
+#include "../src/benchmark_runner.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+TEST(ParseMinTimeTest, InvalidInput) {
+#if GTEST_HAS_DEATH_TEST
+ // Tests only runnable in debug mode (when BM_CHECK is enabled).
+#ifndef NDEBUG
+#ifndef TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS
+ ASSERT_DEATH_IF_SUPPORTED(
+ { benchmark::internal::ParseBenchMinTime("abc"); },
+ "Malformed seconds value passed to --benchmark_min_time: `abc`");
+
+ ASSERT_DEATH_IF_SUPPORTED(
+ { benchmark::internal::ParseBenchMinTime("123ms"); },
+ "Malformed seconds value passed to --benchmark_min_time: `123ms`");
+
+ ASSERT_DEATH_IF_SUPPORTED(
+ { benchmark::internal::ParseBenchMinTime("1z"); },
+ "Malformed seconds value passed to --benchmark_min_time: `1z`");
+
+ ASSERT_DEATH_IF_SUPPORTED(
+ { benchmark::internal::ParseBenchMinTime("1hs"); },
+ "Malformed seconds value passed to --benchmark_min_time: `1hs`");
+#endif
+#endif
+#endif
+}
+} // namespace
diff --git a/third-party/benchmark/test/multiple_ranges_test.cc b/third-party/benchmark/test/multiple_ranges_test.cc
index 7618c4d..5300a96 100644
--- a/third-party/benchmark/test/multiple_ranges_test.cc
+++ b/third-party/benchmark/test/multiple_ranges_test.cc
@@ -28,7 +28,7 @@
{2, 7, 15},
{7, 6, 3}}) {}
- void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE {
+ void SetUp(const ::benchmark::State& state) override {
std::vector<int64_t> ranges = {state.range(0), state.range(1),
state.range(2)};
@@ -39,7 +39,7 @@
// NOTE: This is not TearDown as we want to check after _all_ runs are
// complete.
- virtual ~MultipleRangesFixture() {
+ ~MultipleRangesFixture() override {
if (actualValues != expectedValues) {
std::cout << "EXPECTED\n";
for (const auto& v : expectedValues) {
diff --git a/third-party/benchmark/test/options_test.cc b/third-party/benchmark/test/options_test.cc
index d424d40..a1b209f 100644
--- a/third-party/benchmark/test/options_test.cc
+++ b/third-party/benchmark/test/options_test.cc
@@ -33,6 +33,8 @@
BENCHMARK(BM_basic)->Args({42, 42});
BENCHMARK(BM_basic)->Ranges({{64, 512}, {64, 512}});
BENCHMARK(BM_basic)->MinTime(0.7);
+BENCHMARK(BM_basic)->MinWarmUpTime(0.8);
+BENCHMARK(BM_basic)->MinTime(0.1)->MinWarmUpTime(0.2);
BENCHMARK(BM_basic)->UseRealTime();
BENCHMARK(BM_basic)->ThreadRange(2, 4);
BENCHMARK(BM_basic)->ThreadPerCpu();
@@ -65,8 +67,8 @@
// Test that the requested iteration count is respected.
assert(state.max_iterations == 42);
- size_t actual_iterations = 0;
- for (auto _ : state) ++actual_iterations;
+ for (auto _ : state) {
+ }
assert(state.iterations() == state.max_iterations);
assert(state.iterations() == 42);
}
diff --git a/third-party/benchmark/test/output_test.h b/third-party/benchmark/test/output_test.h
index c6ff8ef..c08fe1d 100644
--- a/third-party/benchmark/test/output_test.h
+++ b/third-party/benchmark/test/output_test.h
@@ -85,7 +85,7 @@
struct Results;
typedef std::function<void(Results const&)> ResultsCheckFn;
-size_t AddChecker(const char* bm_name_pattern, const ResultsCheckFn& fn);
+size_t AddChecker(const std::string& bm_name_pattern, const ResultsCheckFn& fn);
// Class holding the results of a benchmark.
// It is passed in calls to checker functions.
@@ -117,7 +117,7 @@
// get the string for a result by name, or nullptr if the name
// is not found
- const std::string* Get(const char* entry_name) const {
+ const std::string* Get(const std::string& entry_name) const {
auto it = values.find(entry_name);
if (it == values.end()) return nullptr;
return &it->second;
@@ -126,12 +126,12 @@
// get a result by name, parsed as a specific type.
// NOTE: for counters, use GetCounterAs instead.
template <class T>
- T GetAs(const char* entry_name) const;
+ T GetAs(const std::string& entry_name) const;
// counters are written as doubles, so they have to be read first
// as a double, and only then converted to the asked type.
template <class T>
- T GetCounterAs(const char* entry_name) const {
+ T GetCounterAs(const std::string& entry_name) const {
double dval = GetAs<double>(entry_name);
T tval = static_cast<T>(dval);
return tval;
@@ -139,7 +139,7 @@
};
template <class T>
-T Results::GetAs(const char* entry_name) const {
+T Results::GetAs(const std::string& entry_name) const {
auto* sv = Get(entry_name);
BM_CHECK(sv != nullptr && !sv->empty());
std::stringstream ss;
diff --git a/third-party/benchmark/test/output_test_helper.cc b/third-party/benchmark/test/output_test_helper.cc
index 81584cb..265f28a 100644
--- a/third-party/benchmark/test/output_test_helper.cc
+++ b/third-party/benchmark/test/output_test_helper.cc
@@ -45,7 +45,7 @@
static SubMap map = {
{"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"},
// human-readable float
- {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kMGTPEZYmunpfazy]?"},
+ {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kKMGTPEZYmunpfazy]?i?"},
{"%percentage", percentage_re},
{"%int", "[ ]*[0-9]+"},
{" %s ", "[ ]+"},
@@ -65,6 +65,7 @@
{"%csv_us_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",us,,,,,"},
{"%csv_ms_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ms,,,,,"},
{"%csv_s_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",s,,,,,"},
+ {"%csv_cv_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",,,,,,"},
{"%csv_bytes_report",
"[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns," + safe_dec_re + ",,,,"},
{"%csv_items_report",
@@ -143,7 +144,7 @@
TestReporter(std::vector<benchmark::BenchmarkReporter*> reps)
: reporters_(std::move(reps)) {}
- virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE {
+ bool ReportContext(const Context& context) override {
bool last_ret = false;
bool first = true;
for (auto rep : reporters_) {
@@ -157,10 +158,10 @@
return last_ret;
}
- void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE {
+ void ReportRuns(const std::vector<Run>& report) override {
for (auto rep : reporters_) rep->ReportRuns(report);
}
- void Finalize() BENCHMARK_OVERRIDE {
+ void Finalize() override {
for (auto rep : reporters_) rep->Finalize();
}
@@ -248,9 +249,8 @@
if (!p.regex->Match(r.name)) {
BM_VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n";
continue;
- } else {
- BM_VLOG(2) << p.regex_str << " is matched by " << r.name << "\n";
}
+ BM_VLOG(2) << p.regex_str << " is matched by " << r.name << "\n";
BM_VLOG(1) << "Checking results of " << r.name << ": ... \n";
p.fn(r);
BM_VLOG(1) << "Checking results of " << r.name << ": OK.\n";
@@ -300,7 +300,7 @@
} // end namespace internal
-size_t AddChecker(const char* bm_name, const ResultsCheckFn& fn) {
+size_t AddChecker(const std::string& bm_name, const ResultsCheckFn& fn) {
auto& rc = internal::GetResultsChecker();
rc.Add(bm_name, fn);
return rc.results.size();
@@ -328,16 +328,18 @@
BM_CHECK(unit);
if (*unit == "ns") {
return val * 1.e-9;
- } else if (*unit == "us") {
- return val * 1.e-6;
- } else if (*unit == "ms") {
- return val * 1.e-3;
- } else if (*unit == "s") {
- return val;
- } else {
- BM_CHECK(1 == 0) << "unknown time unit: " << *unit;
- return 0;
}
+ if (*unit == "us") {
+ return val * 1.e-6;
+ }
+ if (*unit == "ms") {
+ return val * 1.e-3;
+ }
+ if (*unit == "s") {
+ return val;
+ }
+ BM_CHECK(1 == 0) << "unknown time unit: " << *unit;
+ return 0;
}
// ========================================================================= //
@@ -393,14 +395,14 @@
benchmark::JSONReporter JR;
benchmark::CSVReporter CSVR;
struct ReporterTest {
- const char* name;
+ std::string name;
std::vector<TestCase>& output_cases;
std::vector<TestCase>& error_cases;
benchmark::BenchmarkReporter& reporter;
std::stringstream out_stream;
std::stringstream err_stream;
- ReporterTest(const char* n, std::vector<TestCase>& out_tc,
+ ReporterTest(const std::string& n, std::vector<TestCase>& out_tc,
std::vector<TestCase>& err_tc,
benchmark::BenchmarkReporter& br)
: name(n), output_cases(out_tc), error_cases(err_tc), reporter(br) {
@@ -408,12 +410,12 @@
reporter.SetErrorStream(&err_stream);
}
} TestCases[] = {
- {"ConsoleReporter", GetTestCaseList(TC_ConsoleOut),
+ {std::string("ConsoleReporter"), GetTestCaseList(TC_ConsoleOut),
GetTestCaseList(TC_ConsoleErr), CR},
- {"JSONReporter", GetTestCaseList(TC_JSONOut), GetTestCaseList(TC_JSONErr),
- JR},
- {"CSVReporter", GetTestCaseList(TC_CSVOut), GetTestCaseList(TC_CSVErr),
- CSVR},
+ {std::string("JSONReporter"), GetTestCaseList(TC_JSONOut),
+ GetTestCaseList(TC_JSONErr), JR},
+ {std::string("CSVReporter"), GetTestCaseList(TC_CSVOut),
+ GetTestCaseList(TC_CSVErr), CSVR},
};
// Create the test reporter and run the benchmarks.
@@ -422,7 +424,8 @@
benchmark::RunSpecifiedBenchmarks(&test_rep);
for (auto& rep_test : TestCases) {
- std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n";
+ std::string msg =
+ std::string("\nTesting ") + rep_test.name + std::string(" Output\n");
std::string banner(msg.size() - 1, '-');
std::cout << banner << msg << banner << "\n";
@@ -439,7 +442,7 @@
// the checks to subscribees.
auto& csv = TestCases[2];
// would use == but gcc spits a warning
- BM_CHECK(std::strcmp(csv.name, "CSVReporter") == 0);
+ BM_CHECK(csv.name == std::string("CSVReporter"));
internal::GetResultsChecker().CheckResults(csv.out_stream);
}
diff --git a/third-party/benchmark/test/perf_counters_gtest.cc b/third-party/benchmark/test/perf_counters_gtest.cc
index 3eac624..2e63049 100644
--- a/third-party/benchmark/test/perf_counters_gtest.cc
+++ b/third-party/benchmark/test/perf_counters_gtest.cc
@@ -1,6 +1,8 @@
+#include <random>
#include <thread>
#include "../src/perf_counters.h"
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
#ifndef GTEST_SKIP
@@ -11,12 +13,15 @@
#endif
using benchmark::internal::PerfCounters;
+using benchmark::internal::PerfCountersMeasurement;
using benchmark::internal::PerfCounterValues;
+using ::testing::AllOf;
+using ::testing::Gt;
+using ::testing::Lt;
namespace {
const char kGenericPerfEvent1[] = "CYCLES";
-const char kGenericPerfEvent2[] = "BRANCHES";
-const char kGenericPerfEvent3[] = "INSTRUCTIONS";
+const char kGenericPerfEvent2[] = "INSTRUCTIONS";
TEST(PerfCountersTest, Init) {
EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
@@ -27,7 +32,7 @@
GTEST_SKIP() << "Performance counters not supported.\n";
}
EXPECT_TRUE(PerfCounters::Initialize());
- EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1}).IsValid());
+ EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1}).num_counters(), 1);
}
TEST(PerfCountersTest, NegativeTest) {
@@ -36,29 +41,44 @@
return;
}
EXPECT_TRUE(PerfCounters::Initialize());
- EXPECT_FALSE(PerfCounters::Create({}).IsValid());
- EXPECT_FALSE(PerfCounters::Create({""}).IsValid());
- EXPECT_FALSE(PerfCounters::Create({"not a counter name"}).IsValid());
+ // Safety checks
+ // Create() will always create a valid object, even if passed no or
+ // wrong arguments as the new behavior is to warn and drop unsupported
+ // counters
+ EXPECT_EQ(PerfCounters::Create({}).num_counters(), 0);
+ EXPECT_EQ(PerfCounters::Create({""}).num_counters(), 0);
+ EXPECT_EQ(PerfCounters::Create({"not a counter name"}).num_counters(), 0);
{
- EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
- kGenericPerfEvent3})
- .IsValid());
+ // Try sneaking in a bad egg to see if it is filtered out. The
+ // number of counters has to be two, not zero
+ auto counter =
+ PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
+ EXPECT_EQ(counter.num_counters(), 2);
+ EXPECT_EQ(counter.names(), std::vector<std::string>(
+ {kGenericPerfEvent2, kGenericPerfEvent1}));
}
- EXPECT_FALSE(
- PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1})
- .IsValid());
- EXPECT_FALSE(PerfCounters::Create({kGenericPerfEvent3, "not a counter name",
- kGenericPerfEvent1})
- .IsValid());
{
- EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
- kGenericPerfEvent3})
- .IsValid());
+ // Try sneaking in an outrageous counter, like a fat finger mistake
+ auto counter = PerfCounters::Create(
+ {kGenericPerfEvent2, "not a counter name", kGenericPerfEvent1});
+ EXPECT_EQ(counter.num_counters(), 2);
+ EXPECT_EQ(counter.names(), std::vector<std::string>(
+ {kGenericPerfEvent2, kGenericPerfEvent1}));
}
- EXPECT_FALSE(
- PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
- kGenericPerfEvent3, "MISPREDICTED_BRANCH_RETIRED"})
- .IsValid());
+ {
+ // Finally try a golden input - it should like both of them
+ EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2})
+ .num_counters(),
+ 2);
+ }
+ {
+ // Add a bad apple in the end of the chain to check the edges
+ auto counter = PerfCounters::Create(
+ {kGenericPerfEvent1, kGenericPerfEvent2, "bad event name"});
+ EXPECT_EQ(counter.num_counters(), 2);
+ EXPECT_EQ(counter.names(), std::vector<std::string>(
+ {kGenericPerfEvent1, kGenericPerfEvent2}));
+ }
}
TEST(PerfCountersTest, Read1Counter) {
@@ -67,7 +87,7 @@
}
EXPECT_TRUE(PerfCounters::Initialize());
auto counters = PerfCounters::Create({kGenericPerfEvent1});
- EXPECT_TRUE(counters.IsValid());
+ EXPECT_EQ(counters.num_counters(), 1);
PerfCounterValues values1(1);
EXPECT_TRUE(counters.Snapshot(&values1));
EXPECT_GT(values1[0], 0);
@@ -84,7 +104,7 @@
EXPECT_TRUE(PerfCounters::Initialize());
auto counters =
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
- EXPECT_TRUE(counters.IsValid());
+ EXPECT_EQ(counters.num_counters(), 2);
PerfCounterValues values1(2);
EXPECT_TRUE(counters.Snapshot(&values1));
EXPECT_GT(values1[0], 0);
@@ -95,30 +115,121 @@
EXPECT_GT(values2[1], 0);
}
-size_t do_work() {
- size_t res = 0;
- for (size_t i = 0; i < 100000000; ++i) res += i * i;
- return res;
+TEST(PerfCountersTest, ReopenExistingCounters) {
+ // This test works in recent and old Intel hardware, Pixel 3, and Pixel 6.
+ // However we cannot make assumptions beyond 2 HW counters due to Pixel 6.
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ std::vector<std::string> kMetrics({kGenericPerfEvent1});
+ std::vector<PerfCounters> counters(2);
+ for (auto& counter : counters) {
+ counter = PerfCounters::Create(kMetrics);
+ }
+ PerfCounterValues values(1);
+ EXPECT_TRUE(counters[0].Snapshot(&values));
+ EXPECT_TRUE(counters[1].Snapshot(&values));
}
-void measure(size_t threadcount, PerfCounterValues* values1,
- PerfCounterValues* values2) {
- BM_CHECK_NE(values1, nullptr);
- BM_CHECK_NE(values2, nullptr);
+TEST(PerfCountersTest, CreateExistingMeasurements) {
+ // The test works (i.e. causes read to fail) for the assumptions
+ // about hardware capabilities (i.e. small number (2) hardware
+ // counters) at this date,
+ // the same as previous test ReopenExistingCounters.
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+
+ // This means we will try 10 counters but we can only guarantee
+ // for sure at this time that only 3 will work. Perhaps in the future
+ // we could use libpfm to query for the hardware limits on this
+ // particular platform.
+ const int kMaxCounters = 10;
+ const int kMinValidCounters = 2;
+
+ // Let's use a ubiquitous counter that is guaranteed to work
+ // on all platforms
+ const std::vector<std::string> kMetrics{"cycles"};
+
+ // Cannot create a vector of actual objects because the
+ // copy constructor of PerfCounters is deleted - and so is
+ // implicitly deleted on PerfCountersMeasurement too
+ std::vector<std::unique_ptr<PerfCountersMeasurement>>
+ perf_counter_measurements;
+
+ perf_counter_measurements.reserve(kMaxCounters);
+ for (int j = 0; j < kMaxCounters; ++j) {
+ perf_counter_measurements.emplace_back(
+ new PerfCountersMeasurement(kMetrics));
+ }
+
+ std::vector<std::pair<std::string, double>> measurements;
+
+ // Start all counters together to see if they hold
+ size_t max_counters = kMaxCounters;
+ for (size_t i = 0; i < kMaxCounters; ++i) {
+ auto& counter(*perf_counter_measurements[i]);
+ EXPECT_EQ(counter.num_counters(), 1);
+ if (!counter.Start()) {
+ max_counters = i;
+ break;
+ };
+ }
+
+ ASSERT_GE(max_counters, kMinValidCounters);
+
+ // Start all together
+ for (size_t i = 0; i < max_counters; ++i) {
+ auto& counter(*perf_counter_measurements[i]);
+ EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
+ }
+
+ // Start/stop individually
+ for (size_t i = 0; i < max_counters; ++i) {
+ auto& counter(*perf_counter_measurements[i]);
+ measurements.clear();
+ counter.Start();
+ EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
+ }
+}
+
+// We try to do some meaningful work here but the compiler
+// insists in optimizing away our loop so we had to add a
+// no-optimize macro. In case it fails, we added some entropy
+// to this pool as well.
+
+BENCHMARK_DONT_OPTIMIZE size_t do_work() {
+ static std::mt19937 rd{std::random_device{}()};
+ static std::uniform_int_distribution<size_t> mrand(0, 10);
+ const size_t kNumLoops = 1000000;
+ size_t sum = 0;
+ for (size_t j = 0; j < kNumLoops; ++j) {
+ sum += mrand(rd);
+ }
+ benchmark::DoNotOptimize(sum);
+ return sum;
+}
+
+void measure(size_t threadcount, PerfCounterValues* before,
+ PerfCounterValues* after) {
+ BM_CHECK_NE(before, nullptr);
+ BM_CHECK_NE(after, nullptr);
std::vector<std::thread> threads(threadcount);
auto work = [&]() { BM_CHECK(do_work() > 1000); };
// We need to first set up the counters, then start the threads, so the
- // threads would inherit the counters. But later, we need to first destroy the
- // thread pool (so all the work finishes), then measure the counters. So the
- // scopes overlap, and we need to explicitly control the scope of the
+ // threads would inherit the counters. But later, we need to first destroy
+ // the thread pool (so all the work finishes), then measure the counters. So
+ // the scopes overlap, and we need to explicitly control the scope of the
// threadpool.
auto counters =
- PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent3});
+ PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
for (auto& t : threads) t = std::thread(work);
- counters.Snapshot(values1);
+ counters.Snapshot(before);
for (auto& t : threads) t.join();
- counters.Snapshot(values2);
+ counters.Snapshot(after);
}
TEST(PerfCountersTest, MultiThreaded) {
@@ -126,20 +237,71 @@
GTEST_SKIP() << "Test skipped because libpfm is not supported.";
}
EXPECT_TRUE(PerfCounters::Initialize());
- PerfCounterValues values1(2);
- PerfCounterValues values2(2);
+ PerfCounterValues before(2);
+ PerfCounterValues after(2);
- measure(2, &values1, &values2);
- std::vector<double> D1{static_cast<double>(values2[0] - values1[0]),
- static_cast<double>(values2[1] - values1[1])};
+ // Notice that this test will work even if we taskset it to a single CPU
+ // In this case the threads will run sequentially
+ // Start two threads and measure the number of combined cycles and
+ // instructions
+ measure(2, &before, &after);
+ std::vector<double> Elapsed2Threads{
+ static_cast<double>(after[0] - before[0]),
+ static_cast<double>(after[1] - before[1])};
- measure(4, &values1, &values2);
- std::vector<double> D2{static_cast<double>(values2[0] - values1[0]),
- static_cast<double>(values2[1] - values1[1])};
+ // Start four threads and measure the number of combined cycles and
+ // instructions
+ measure(4, &before, &after);
+ std::vector<double> Elapsed4Threads{
+ static_cast<double>(after[0] - before[0]),
+ static_cast<double>(after[1] - before[1])};
- // Some extra work will happen on the main thread - like joining the threads
- // - so the ratio won't be quite 2.0, but very close.
- EXPECT_GE(D2[0], 1.9 * D1[0]);
- EXPECT_GE(D2[1], 1.9 * D1[1]);
+ // The following expectations fail (at least on a beefy workstation with lots
+ // of cpus) - it seems that in some circumstances the runtime of 4 threads
+ // can even be better than with 2.
+ // So instead of expecting 4 threads to be slower, let's just make sure they
+ // do not differ too much in general (one is not more than 10x than the
+ // other).
+ EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10)));
+ EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10)));
}
+
+TEST(PerfCountersTest, HardwareLimits) {
+ // The test works (i.e. causes read to fail) for the assumptions
+ // about hardware capabilities (i.e. small number (3-4) hardware
+ // counters) at this date,
+ // the same as previous test ReopenExistingCounters.
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+
+ // Taken from `perf list`, but focusses only on those HW events that actually
+ // were reported when running `sudo perf stat -a sleep 10`, intersected over
+ // several platforms. All HW events listed in the first command not reported
+ // in the second seem to not work. This is sad as we don't really get to test
+ // the grouping here (groups can contain up to 6 members)...
+ std::vector<std::string> counter_names{
+ "cycles", // leader
+ "instructions", //
+ "branch-misses", //
+ };
+
+ // In the off-chance that some of these values are not supported,
+ // we filter them out so the test will complete without failure
+ // albeit it might not actually test the grouping on that platform
+ std::vector<std::string> valid_names;
+ for (const std::string& name : counter_names) {
+ if (PerfCounters::IsCounterSupported(name)) {
+ valid_names.push_back(name);
+ }
+ }
+ PerfCountersMeasurement counter(valid_names);
+
+ std::vector<std::pair<std::string, double>> measurements;
+
+ counter.Start();
+ EXPECT_TRUE(counter.Stop(measurements));
+}
+
} // namespace
diff --git a/third-party/benchmark/test/perf_counters_test.cc b/third-party/benchmark/test/perf_counters_test.cc
index 3017a45..3cc593e 100644
--- a/third-party/benchmark/test/perf_counters_test.cc
+++ b/third-party/benchmark/test/perf_counters_test.cc
@@ -1,27 +1,92 @@
+#include <cstdarg>
#undef NDEBUG
+#include "../src/commandlineflags.h"
#include "../src/perf_counters.h"
-
#include "benchmark/benchmark.h"
#include "output_test.h"
+namespace benchmark {
+
+BM_DECLARE_string(benchmark_perf_counters);
+
+} // namespace benchmark
+
static void BM_Simple(benchmark::State& state) {
for (auto _ : state) {
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
}
BENCHMARK(BM_Simple);
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}});
+const int kIters = 1000000;
+
+void BM_WithoutPauseResume(benchmark::State& state) {
+ int n = 0;
+
+ for (auto _ : state) {
+ for (auto i = 0; i < kIters; ++i) {
+ n = 1 - n;
+ benchmark::DoNotOptimize(n);
+ }
+ }
+}
+
+BENCHMARK(BM_WithoutPauseResume);
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_WithoutPauseResume\",$"}});
+
+void BM_WithPauseResume(benchmark::State& state) {
+ int m = 0, n = 0;
+
+ for (auto _ : state) {
+ for (auto i = 0; i < kIters; ++i) {
+ n = 1 - n;
+ benchmark::DoNotOptimize(n);
+ }
+
+ state.PauseTiming();
+ for (auto j = 0; j < kIters; ++j) {
+ m = 1 - m;
+ benchmark::DoNotOptimize(m);
+ }
+ state.ResumeTiming();
+ }
+}
+
+BENCHMARK(BM_WithPauseResume);
+
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_WithPauseResume\",$"}});
+
static void CheckSimple(Results const& e) {
CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0);
- CHECK_COUNTER_VALUE(e, double, "BRANCHES", GT, 0.0);
}
+
+double withoutPauseResumeInstrCount = 0.0;
+double withPauseResumeInstrCount = 0.0;
+
+static void SaveInstrCountWithoutResume(Results const& e) {
+ withoutPauseResumeInstrCount = e.GetAs<double>("INSTRUCTIONS");
+}
+
+static void SaveInstrCountWithResume(Results const& e) {
+ withPauseResumeInstrCount = e.GetAs<double>("INSTRUCTIONS");
+}
+
CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple);
+CHECK_BENCHMARK_RESULTS("BM_WithoutPauseResume", &SaveInstrCountWithoutResume);
+CHECK_BENCHMARK_RESULTS("BM_WithPauseResume", &SaveInstrCountWithResume);
int main(int argc, char* argv[]) {
if (!benchmark::internal::PerfCounters::kSupported) {
return 0;
}
+ benchmark::FLAGS_benchmark_perf_counters = "CYCLES,INSTRUCTIONS";
+ benchmark::internal::PerfCounters::Initialize();
RunOutputTests(argc, argv);
+
+ BM_CHECK_GT(withPauseResumeInstrCount, kIters);
+ BM_CHECK_GT(withoutPauseResumeInstrCount, kIters);
+ BM_CHECK_LT(withPauseResumeInstrCount, 1.5 * withoutPauseResumeInstrCount);
}
diff --git a/third-party/benchmark/test/register_benchmark_test.cc b/third-party/benchmark/test/register_benchmark_test.cc
index 602405b..d69d144 100644
--- a/third-party/benchmark/test/register_benchmark_test.cc
+++ b/third-party/benchmark/test/register_benchmark_test.cc
@@ -10,7 +10,7 @@
class TestReporter : public benchmark::ConsoleReporter {
public:
- virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE {
+ void ReportRuns(const std::vector<Run>& report) override {
all_runs_.insert(all_runs_.end(), begin(report), end(report));
ConsoleReporter::ReportRuns(report);
}
@@ -19,11 +19,11 @@
};
struct TestCase {
- std::string name;
- const char* label;
+ const std::string name;
+ const std::string label;
// Note: not explicit as we rely on it being converted through ADD_CASES.
- TestCase(const char* xname) : TestCase(xname, nullptr) {}
- TestCase(const char* xname, const char* xlabel)
+ TestCase(const std::string& xname) : TestCase(xname, "") {}
+ TestCase(const std::string& xname, const std::string& xlabel)
: name(xname), label(xlabel) {}
typedef benchmark::BenchmarkReporter::Run Run;
@@ -32,7 +32,7 @@
// clang-format off
BM_CHECK(name == run.benchmark_name()) << "expected " << name << " got "
<< run.benchmark_name();
- if (label) {
+ if (!label.empty()) {
BM_CHECK(run.report_label == label) << "expected " << label << " got "
<< run.report_label;
} else {
@@ -96,6 +96,18 @@
#endif // BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
//----------------------------------------------------------------------------//
+// Test RegisterBenchmark with DISABLED_ benchmark
+//----------------------------------------------------------------------------//
+void DISABLED_BM_function(benchmark::State& state) {
+ for (auto _ : state) {
+ }
+}
+BENCHMARK(DISABLED_BM_function);
+ReturnVal dummy3 = benchmark::RegisterBenchmark("DISABLED_BM_function_manual",
+ DISABLED_BM_function);
+// No need to add cases because we don't expect them to run.
+
+//----------------------------------------------------------------------------//
// Test RegisterBenchmark with different callable types
//----------------------------------------------------------------------------//
@@ -111,7 +123,7 @@
{
CustomFixture fx;
benchmark::RegisterBenchmark("custom_fixture", fx);
- AddCases({"custom_fixture"});
+ AddCases({std::string("custom_fixture")});
}
#endif
#ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
diff --git a/third-party/benchmark/test/reporter_output_test.cc b/third-party/benchmark/test/reporter_output_test.cc
index 2b6e654..7867165 100644
--- a/third-party/benchmark/test/reporter_output_test.cc
+++ b/third-party/benchmark/test/reporter_output_test.cc
@@ -17,7 +17,7 @@
AddCases(TC_ConsoleErr,
{
{"^%int-%int-%intT%int:%int:%int[-+]%int:%int$", MR_Default},
- {"Running .*/reporter_output_test(\\.exe)?$", MR_Next},
+ {"Running .*(/|\\\\)reporter_output_test(\\.exe)?$", MR_Next},
{"Run on \\(%int X %float MHz CPU s?\\)", MR_Next},
});
AddCases(TC_JSONOut,
@@ -55,6 +55,9 @@
{{"Load Average: (%float, ){0,2}%float$", MR_Next}});
}
AddCases(TC_JSONOut, {{"\"load_avg\": \\[(%float,?){0,3}],$", MR_Next}});
+ AddCases(TC_JSONOut, {{"\"library_version\": \".*\",$", MR_Next}});
+ AddCases(TC_JSONOut, {{"\"library_build_type\": \".*\",$", MR_Next}});
+ AddCases(TC_JSONOut, {{"\"json_schema_version\": 1$", MR_Next}});
return 0;
}
int dummy_register = AddContextCases();
@@ -93,7 +96,8 @@
void BM_bytes_per_second(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
state.SetBytesProcessed(1);
}
@@ -124,7 +128,8 @@
void BM_items_per_second(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
state.SetItemsProcessed(1);
}
@@ -318,7 +323,7 @@
ADD_CASES(TC_CSVOut, {{"^\"BM_no_arg_name/3\",%csv_report$"}});
// ========================================================================= //
-// ------------------------ Testing Arg Name Output ----------------------- //
+// ------------------------ Testing Arg Name Output ------------------------ //
// ========================================================================= //
void BM_arg_name(benchmark::State& state) {
@@ -404,7 +409,8 @@
void BM_Complexity_O1(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
state.SetComplexityN(state.range(0));
}
@@ -1085,7 +1091,7 @@
{"^\"BM_UserPercentStats/iterations:5/repeats:3/"
"manual_time_stddev\",%csv_report$"},
{"^\"BM_UserPercentStats/iterations:5/repeats:3/"
- "manual_time_\",%csv_report$"}});
+ "manual_time_\",%csv_cv_report$"}});
// ========================================================================= //
// ------------------------- Testing StrEscape JSON ------------------------ //
diff --git a/third-party/benchmark/test/skip_with_error_test.cc b/third-party/benchmark/test/skip_with_error_test.cc
index 026d479..2139a19 100644
--- a/third-party/benchmark/test/skip_with_error_test.cc
+++ b/third-party/benchmark/test/skip_with_error_test.cc
@@ -10,17 +10,17 @@
class TestReporter : public benchmark::ConsoleReporter {
public:
- virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE {
+ bool ReportContext(const Context& context) override {
return ConsoleReporter::ReportContext(context);
};
- virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE {
+ void ReportRuns(const std::vector<Run>& report) override {
all_runs_.insert(all_runs_.end(), begin(report), end(report));
ConsoleReporter::ReportRuns(report);
}
TestReporter() {}
- virtual ~TestReporter() {}
+ ~TestReporter() override {}
mutable std::vector<Run> all_runs_;
};
@@ -35,8 +35,9 @@
void CheckRun(Run const& run) const {
BM_CHECK(name == run.benchmark_name())
<< "expected " << name << " got " << run.benchmark_name();
- BM_CHECK(error_occurred == run.error_occurred);
- BM_CHECK(error_message == run.error_message);
+ BM_CHECK_EQ(error_occurred,
+ benchmark::internal::SkippedWithError == run.skipped);
+ BM_CHECK(error_message == run.skip_message);
if (error_occurred) {
// BM_CHECK(run.iterations == 0);
} else {
@@ -47,7 +48,8 @@
std::vector<TestCase> ExpectedResults;
-int AddCases(const char* base_name, std::initializer_list<TestCase> const& v) {
+int AddCases(const std::string& base_name,
+ std::initializer_list<TestCase> const& v) {
for (auto TC : v) {
TC.name = base_name + TC.name;
ExpectedResults.push_back(std::move(TC));
@@ -141,7 +143,8 @@
void BM_error_after_running(benchmark::State& state) {
for (auto _ : state) {
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
if (state.thread_index() <= (state.threads() / 2))
state.SkipWithError("error message");
diff --git a/third-party/benchmark/test/spec_arg_test.cc b/third-party/benchmark/test/spec_arg_test.cc
index 043db1b..06aafbe 100644
--- a/third-party/benchmark/test/spec_arg_test.cc
+++ b/third-party/benchmark/test/spec_arg_test.cc
@@ -17,11 +17,11 @@
class TestReporter : public benchmark::ConsoleReporter {
public:
- virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE {
+ bool ReportContext(const Context& context) override {
return ConsoleReporter::ReportContext(context);
};
- virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE {
+ void ReportRuns(const std::vector<Run>& report) override {
assert(report.size() == 1);
matched_functions.push_back(report[0].run_name.function_name);
ConsoleReporter::ReportRuns(report);
@@ -29,7 +29,7 @@
TestReporter() {}
- virtual ~TestReporter() {}
+ ~TestReporter() override {}
const std::vector<std::string>& GetMatchedFunctions() const {
return matched_functions;
@@ -91,5 +91,15 @@
<< matched_functions.front() << "]\n";
return 2;
}
+
+ // Test that SetBenchmarkFilter works.
+ const std::string golden_value = "golden_value";
+ benchmark::SetBenchmarkFilter(golden_value);
+ std::string current_value = benchmark::GetBenchmarkFilter();
+ if (golden_value != current_value) {
+ std::cerr << "Expected [" << golden_value
+ << "] for --benchmark_filter but got [" << current_value << "]\n";
+ return 3;
+ }
return 0;
}
diff --git a/third-party/benchmark/test/spec_arg_verbosity_test.cc b/third-party/benchmark/test/spec_arg_verbosity_test.cc
new file mode 100644
index 0000000..8f8eb6d
--- /dev/null
+++ b/third-party/benchmark/test/spec_arg_verbosity_test.cc
@@ -0,0 +1,43 @@
+#include <string.h>
+
+#include <iostream>
+
+#include "benchmark/benchmark.h"
+
+// Tests that the user specified verbosity level can be get.
+static void BM_Verbosity(benchmark::State& state) {
+ for (auto _ : state) {
+ }
+}
+BENCHMARK(BM_Verbosity);
+
+int main(int argc, char** argv) {
+ const int32_t flagv = 42;
+
+ // Verify that argv specify --v=42.
+ bool found = false;
+ for (int i = 0; i < argc; ++i) {
+ if (strcmp("--v=42", argv[i]) == 0) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ std::cerr << "This test requires '--v=42' to be passed as a command-line "
+ << "argument.\n";
+ return 1;
+ }
+
+ benchmark::Initialize(&argc, argv);
+
+ // Check that the current flag value is reported accurately via the
+ // GetBenchmarkVerbosity() function.
+ if (flagv != benchmark::GetBenchmarkVerbosity()) {
+ std::cerr
+ << "Seeing different value for flags. GetBenchmarkVerbosity() returns ["
+ << benchmark::GetBenchmarkVerbosity() << "] expected flag=[" << flagv
+ << "]\n";
+ return 1;
+ }
+ return 0;
+}
diff --git a/third-party/benchmark/test/statistics_gtest.cc b/third-party/benchmark/test/statistics_gtest.cc
index 1de2d87..48c7726 100644
--- a/third-party/benchmark/test/statistics_gtest.cc
+++ b/third-party/benchmark/test/statistics_gtest.cc
@@ -28,8 +28,8 @@
TEST(StatisticsTest, CV) {
EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({101, 101, 101, 101}), 0.0);
EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({1, 2, 3}), 1. / 2.);
- EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({2.5, 2.4, 3.3, 4.2, 5.1}),
- 0.32888184094918121);
+ ASSERT_NEAR(benchmark::StatisticsCV({2.5, 2.4, 3.3, 4.2, 5.1}),
+ 0.32888184094918121, 1e-15);
}
} // end namespace
diff --git a/third-party/benchmark/test/string_util_gtest.cc b/third-party/benchmark/test/string_util_gtest.cc
index 698f2d4..67b4bc0 100644
--- a/third-party/benchmark/test/string_util_gtest.cc
+++ b/third-party/benchmark/test/string_util_gtest.cc
@@ -1,9 +1,12 @@
//===---------------------------------------------------------------------===//
-// statistics_test - Unit tests for src/statistics.cc
+// string_util_test - Unit tests for src/string_util.cc
//===---------------------------------------------------------------------===//
+#include <tuple>
+
#include "../src/internal_macros.h"
#include "../src/string_util.h"
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace {
@@ -63,7 +66,10 @@
EXPECT_EQ(4ul, pos);
}
#ifndef BENCHMARK_HAS_NO_EXCEPTIONS
- { ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument); }
+ {
+ ASSERT_THROW(std::ignore = benchmark::stoul("this is a test"),
+ std::invalid_argument);
+ }
#endif
}
@@ -107,7 +113,10 @@
EXPECT_EQ(4ul, pos);
}
#ifndef BENCHMARK_HAS_NO_EXCEPTIONS
-{ ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument); }
+{
+ ASSERT_THROW(std::ignore = benchmark::stoi("this is a test"),
+ std::invalid_argument);
+}
#endif
}
@@ -137,7 +146,10 @@
EXPECT_EQ(8ul, pos);
}
#ifndef BENCHMARK_HAS_NO_EXCEPTIONS
-{ ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument); }
+{
+ ASSERT_THROW(std::ignore = benchmark::stod("this is a test"),
+ std::invalid_argument);
+}
#endif
}
@@ -149,4 +161,39 @@
std::vector<std::string>({"hello", "there", "is", "more"}));
}
+using HumanReadableFixture = ::testing::TestWithParam<
+ std::tuple<double, benchmark::Counter::OneK, std::string>>;
+
+INSTANTIATE_TEST_SUITE_P(
+ HumanReadableTests, HumanReadableFixture,
+ ::testing::Values(
+ std::make_tuple(0.0, benchmark::Counter::kIs1024, "0"),
+ std::make_tuple(999.0, benchmark::Counter::kIs1024, "999"),
+ std::make_tuple(1000.0, benchmark::Counter::kIs1024, "1000"),
+ std::make_tuple(1024.0, benchmark::Counter::kIs1024, "1Ki"),
+ std::make_tuple(1000 * 1000.0, benchmark::Counter::kIs1024,
+ "976\\.56.Ki"),
+ std::make_tuple(1024 * 1024.0, benchmark::Counter::kIs1024, "1Mi"),
+ std::make_tuple(1000 * 1000 * 1000.0, benchmark::Counter::kIs1024,
+ "953\\.674Mi"),
+ std::make_tuple(1024 * 1024 * 1024.0, benchmark::Counter::kIs1024,
+ "1Gi"),
+ std::make_tuple(0.0, benchmark::Counter::kIs1000, "0"),
+ std::make_tuple(999.0, benchmark::Counter::kIs1000, "999"),
+ std::make_tuple(1000.0, benchmark::Counter::kIs1000, "1k"),
+ std::make_tuple(1024.0, benchmark::Counter::kIs1000, "1.024k"),
+ std::make_tuple(1000 * 1000.0, benchmark::Counter::kIs1000, "1M"),
+ std::make_tuple(1024 * 1024.0, benchmark::Counter::kIs1000,
+ "1\\.04858M"),
+ std::make_tuple(1000 * 1000 * 1000.0, benchmark::Counter::kIs1000,
+ "1G"),
+ std::make_tuple(1024 * 1024 * 1024.0, benchmark::Counter::kIs1000,
+ "1\\.07374G")));
+
+TEST_P(HumanReadableFixture, HumanReadableNumber) {
+ std::string str = benchmark::HumanReadableNumber(std::get<0>(GetParam()),
+ std::get<1>(GetParam()));
+ ASSERT_THAT(str, ::testing::MatchesRegex(std::get<2>(GetParam())));
+}
+
} // end namespace
diff --git a/third-party/benchmark/test/time_unit_gtest.cc b/third-party/benchmark/test/time_unit_gtest.cc
new file mode 100644
index 0000000..484ecbc
--- /dev/null
+++ b/third-party/benchmark/test/time_unit_gtest.cc
@@ -0,0 +1,37 @@
+#include "../include/benchmark/benchmark.h"
+#include "gtest/gtest.h"
+
+namespace benchmark {
+namespace internal {
+
+namespace {
+
+class DummyBenchmark : public Benchmark {
+ public:
+ DummyBenchmark() : Benchmark("dummy") {}
+ void Run(State&) override {}
+};
+
+TEST(DefaultTimeUnitTest, TimeUnitIsNotSet) {
+ DummyBenchmark benchmark;
+ EXPECT_EQ(benchmark.GetTimeUnit(), kNanosecond);
+}
+
+TEST(DefaultTimeUnitTest, DefaultIsSet) {
+ DummyBenchmark benchmark;
+ EXPECT_EQ(benchmark.GetTimeUnit(), kNanosecond);
+ SetDefaultTimeUnit(kMillisecond);
+ EXPECT_EQ(benchmark.GetTimeUnit(), kMillisecond);
+}
+
+TEST(DefaultTimeUnitTest, DefaultAndExplicitUnitIsSet) {
+ DummyBenchmark benchmark;
+ benchmark.Unit(kMillisecond);
+ SetDefaultTimeUnit(kMicrosecond);
+
+ EXPECT_EQ(benchmark.GetTimeUnit(), kMillisecond);
+}
+
+} // namespace
+} // namespace internal
+} // namespace benchmark
diff --git a/third-party/benchmark/test/user_counters_tabular_test.cc b/third-party/benchmark/test/user_counters_tabular_test.cc
index 45ac043..cfc1ab0 100644
--- a/third-party/benchmark/test/user_counters_tabular_test.cc
+++ b/third-party/benchmark/test/user_counters_tabular_test.cc
@@ -63,6 +63,9 @@
void BM_Counters_Tabular(benchmark::State& state) {
for (auto _ : state) {
+ // This test requires a non-zero CPU time to avoid divide-by-zero
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters.insert({
@@ -330,7 +333,7 @@
{{"^\"BM_Counters_Tabular/repeats:2/threads:1_stddev\",%csv_report,"
"%float,%float,%float,%float,%float,%float$"}});
ADD_CASES(TC_CSVOut,
- {{"^\"BM_Counters_Tabular/repeats:2/threads:1_cv\",%csv_report,"
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:1_cv\",%csv_cv_report,"
"%float,%float,%float,%float,%float,%float$"}});
ADD_CASES(TC_CSVOut,
{{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report,"
@@ -348,7 +351,7 @@
{{"^\"BM_Counters_Tabular/repeats:2/threads:2_stddev\",%csv_report,"
"%float,%float,%float,%float,%float,%float$"}});
ADD_CASES(TC_CSVOut,
- {{"^\"BM_Counters_Tabular/repeats:2/threads:2_cv\",%csv_report,"
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:2_cv\",%csv_cv_report,"
"%float,%float,%float,%float,%float,%float$"}});
// VS2013 does not allow this function to be passed as a lambda argument
// to CHECK_BENCHMARK_RESULTS()
@@ -372,7 +375,8 @@
void BM_CounterRates_Tabular(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters.insert({
diff --git a/third-party/benchmark/test/user_counters_test.cc b/third-party/benchmark/test/user_counters_test.cc
index 1cc7455..22252ac 100644
--- a/third-party/benchmark/test/user_counters_test.cc
+++ b/third-party/benchmark/test/user_counters_test.cc
@@ -67,7 +67,8 @@
void BM_Counters_WithBytesAndItemsPSec(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
state.counters["foo"] = 1;
state.counters["bar"] = ++num_calls1;
@@ -118,7 +119,8 @@
void BM_Counters_Rate(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters["foo"] = bm::Counter{1, bm::Counter::kIsRate};
@@ -161,7 +163,8 @@
void BM_Invert(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters["foo"] = bm::Counter{0.0001, bm::Counter::kInvert};
@@ -195,14 +198,14 @@
CHECK_BENCHMARK_RESULTS("BM_Invert", &CheckInvert);
// ========================================================================= //
-// ------------------------- InvertedRate Counters Output
-// -------------------------- //
+// --------------------- InvertedRate Counters Output ---------------------- //
// ========================================================================= //
void BM_Counters_InvertedRate(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters["foo"] =
@@ -330,7 +333,8 @@
void BM_Counters_AvgThreadsRate(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreadsRate};
@@ -417,7 +421,8 @@
void BM_Counters_kIsIterationInvariantRate(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters["foo"] =
@@ -460,7 +465,7 @@
&CheckIsIterationInvariantRate);
// ========================================================================= //
-// ------------------- AvgIterations Counters Output ------------------ //
+// --------------------- AvgIterations Counters Output --------------------- //
// ========================================================================= //
void BM_Counters_AvgIterations(benchmark::State& state) {
@@ -502,13 +507,14 @@
CHECK_BENCHMARK_RESULTS("BM_Counters_AvgIterations", &CheckAvgIterations);
// ========================================================================= //
-// ----------------- AvgIterationsRate Counters Output ---------------- //
+// ------------------- AvgIterationsRate Counters Output ------------------- //
// ========================================================================= //
void BM_Counters_kAvgIterationsRate(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = double(state.iterations()) * double(state.iterations());
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgIterationsRate};
diff --git a/third-party/benchmark/test/user_counters_thousands_test.cc b/third-party/benchmark/test/user_counters_thousands_test.cc
index a42683b..fc15383 100644
--- a/third-party/benchmark/test/user_counters_thousands_test.cc
+++ b/third-party/benchmark/test/user_counters_thousands_test.cc
@@ -16,13 +16,13 @@
{"t0_1000000DefaultBase",
bm::Counter(1000 * 1000, bm::Counter::kDefaults)},
{"t1_1000000Base1000", bm::Counter(1000 * 1000, bm::Counter::kDefaults,
- benchmark::Counter::OneK::kIs1000)},
+ bm::Counter::OneK::kIs1000)},
{"t2_1000000Base1024", bm::Counter(1000 * 1000, bm::Counter::kDefaults,
- benchmark::Counter::OneK::kIs1024)},
+ bm::Counter::OneK::kIs1024)},
{"t3_1048576Base1000", bm::Counter(1024 * 1024, bm::Counter::kDefaults,
- benchmark::Counter::OneK::kIs1000)},
+ bm::Counter::OneK::kIs1000)},
{"t4_1048576Base1024", bm::Counter(1024 * 1024, bm::Counter::kDefaults,
- benchmark::Counter::OneK::kIs1024)},
+ bm::Counter::OneK::kIs1024)},
});
}
BENCHMARK(BM_Counters_Thousands)->Repetitions(2);
@@ -30,21 +30,21 @@
TC_ConsoleOut,
{
{"^BM_Counters_Thousands/repeats:2 %console_report "
- "t0_1000000DefaultBase=1000k "
- "t1_1000000Base1000=1000k t2_1000000Base1024=976.56[23]k "
- "t3_1048576Base1000=1048.58k t4_1048576Base1024=1024k$"},
+ "t0_1000000DefaultBase=1M "
+ "t1_1000000Base1000=1M t2_1000000Base1024=976.56[23]Ki "
+ "t3_1048576Base1000=1.04858M t4_1048576Base1024=1Mi$"},
{"^BM_Counters_Thousands/repeats:2 %console_report "
- "t0_1000000DefaultBase=1000k "
- "t1_1000000Base1000=1000k t2_1000000Base1024=976.56[23]k "
- "t3_1048576Base1000=1048.58k t4_1048576Base1024=1024k$"},
+ "t0_1000000DefaultBase=1M "
+ "t1_1000000Base1000=1M t2_1000000Base1024=976.56[23]Ki "
+ "t3_1048576Base1000=1.04858M t4_1048576Base1024=1Mi$"},
{"^BM_Counters_Thousands/repeats:2_mean %console_report "
- "t0_1000000DefaultBase=1000k t1_1000000Base1000=1000k "
- "t2_1000000Base1024=976.56[23]k t3_1048576Base1000=1048.58k "
- "t4_1048576Base1024=1024k$"},
+ "t0_1000000DefaultBase=1M t1_1000000Base1000=1M "
+ "t2_1000000Base1024=976.56[23]Ki t3_1048576Base1000=1.04858M "
+ "t4_1048576Base1024=1Mi$"},
{"^BM_Counters_Thousands/repeats:2_median %console_report "
- "t0_1000000DefaultBase=1000k t1_1000000Base1000=1000k "
- "t2_1000000Base1024=976.56[23]k t3_1048576Base1000=1048.58k "
- "t4_1048576Base1024=1024k$"},
+ "t0_1000000DefaultBase=1M t1_1000000Base1000=1M "
+ "t2_1000000Base1024=976.56[23]Ki t3_1048576Base1000=1.04858M "
+ "t4_1048576Base1024=1Mi$"},
{"^BM_Counters_Thousands/repeats:2_stddev %console_time_only_report [ "
"]*2 t0_1000000DefaultBase=0 t1_1000000Base1000=0 "
"t2_1000000Base1024=0 t3_1048576Base1000=0 t4_1048576Base1024=0$"},
diff --git a/third-party/benchmark/tools/compare.py b/third-party/benchmark/tools/compare.py
index f1504c9..7572520 100755
--- a/third-party/benchmark/tools/compare.py
+++ b/third-party/benchmark/tools/compare.py
@@ -1,29 +1,35 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
-import unittest
+# type: ignore
"""
compare.py - versatile benchmark output compare tool
"""
import argparse
-from argparse import ArgumentParser
import json
+import os
import sys
+import unittest
+from argparse import ArgumentParser
+
import gbench
-from gbench import util, report
-from gbench.util import *
+from gbench import report, util
def check_inputs(in1, in2, flags):
"""
Perform checking on the user provided inputs and diagnose any abnormalities
"""
- in1_kind, in1_err = classify_input_file(in1)
- in2_kind, in2_err = classify_input_file(in2)
- output_file = find_benchmark_flag("--benchmark_out=", flags)
- output_type = find_benchmark_flag("--benchmark_out_format=", flags)
- if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file:
+ in1_kind, in1_err = util.classify_input_file(in1)
+ in2_kind, in2_err = util.classify_input_file(in2)
+ output_file = util.find_benchmark_flag("--benchmark_out=", flags)
+ output_type = util.find_benchmark_flag("--benchmark_out_format=", flags)
+ if (
+ in1_kind == util.IT_Executable
+ and in2_kind == util.IT_Executable
+ and output_file
+ ):
print(
(
"WARNING: '--benchmark_out=%s' will be passed to both "
@@ -31,11 +37,14 @@
)
% output_file
)
- if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0:
- print(
- "WARNING: passing optional flags has no effect since both "
- "inputs are JSON"
- )
+ if in1_kind == util.IT_JSON and in2_kind == util.IT_JSON:
+ # When both sides are JSON the only supported flag is
+ # --benchmark_filter=
+ for flag in util.remove_benchmark_flags("--benchmark_filter=", flags):
+ print(
+ "WARNING: passing %s has no effect since both "
+ "inputs are JSON" % flag
+ )
if output_type is not None and output_type != "json":
print(
(
@@ -48,7 +57,9 @@
def create_parser():
- parser = ArgumentParser(description="versatile benchmark output compare tool")
+ parser = ArgumentParser(
+ description="versatile benchmark output compare tool"
+ )
parser.add_argument(
"-a",
@@ -294,7 +305,9 @@
# Now, filter the benchmarks so that the difference report can work
if filter_baseline and filter_contender:
replacement = "[%s vs. %s]" % (filter_baseline, filter_contender)
- json1 = gbench.report.filter_benchmark(json1_orig, filter_baseline, replacement)
+ json1 = gbench.report.filter_benchmark(
+ json1_orig, filter_baseline, replacement
+ )
json2 = gbench.report.filter_benchmark(
json2_orig, filter_contender, replacement
)
@@ -314,7 +327,7 @@
# Optionally, diff and output to JSON
if args.dump_to_json is not None:
with open(args.dump_to_json, "w") as f_json:
- json.dump(diff_report, f_json)
+ json.dump(diff_report, f_json, indent=1)
class TestParser(unittest.TestCase):
@@ -423,7 +436,9 @@
self.assertFalse(parsed.benchmark_options)
def test_filters_with_remainder(self):
- parsed = self.parser.parse_args(["filters", self.testInput0, "c", "d", "e"])
+ parsed = self.parser.parse_args(
+ ["filters", self.testInput0, "c", "d", "e"]
+ )
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, "filters")
@@ -459,7 +474,14 @@
def test_benchmarksfiltered_with_remainder(self):
parsed = self.parser.parse_args(
- ["benchmarksfiltered", self.testInput0, "c", self.testInput1, "e", "f"]
+ [
+ "benchmarksfiltered",
+ self.testInput0,
+ "c",
+ self.testInput1,
+ "e",
+ "f",
+ ]
)
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
diff --git a/third-party/benchmark/tools/gbench/Inputs/test1_run1.json b/third-party/benchmark/tools/gbench/Inputs/test1_run1.json
index 601e327..9daed0b 100644
--- a/third-party/benchmark/tools/gbench/Inputs/test1_run1.json
+++ b/third-party/benchmark/tools/gbench/Inputs/test1_run1.json
@@ -114,6 +114,14 @@
"real_time": 1,
"cpu_time": 1,
"time_unit": "s"
+ },
+ {
+ "name": "BM_hasLabel",
+ "label": "a label",
+ "iterations": 1,
+ "real_time": 1,
+ "cpu_time": 1,
+ "time_unit": "s"
}
]
}
diff --git a/third-party/benchmark/tools/gbench/Inputs/test1_run2.json b/third-party/benchmark/tools/gbench/Inputs/test1_run2.json
index 3cbcf39..dc52970 100644
--- a/third-party/benchmark/tools/gbench/Inputs/test1_run2.json
+++ b/third-party/benchmark/tools/gbench/Inputs/test1_run2.json
@@ -114,6 +114,14 @@
"real_time": 1,
"cpu_time": 1,
"time_unit": "ns"
+ },
+ {
+ "name": "BM_hasLabel",
+ "label": "a label",
+ "iterations": 1,
+ "real_time": 1,
+ "cpu_time": 1,
+ "time_unit": "s"
}
]
}
diff --git a/third-party/benchmark/tools/gbench/Inputs/test5_run0.json b/third-party/benchmark/tools/gbench/Inputs/test5_run0.json
new file mode 100644
index 0000000..074103b
--- /dev/null
+++ b/third-party/benchmark/tools/gbench/Inputs/test5_run0.json
@@ -0,0 +1,18 @@
+{
+ "context": {
+ "date": "2016-08-02 17:44:46",
+ "num_cpus": 4,
+ "mhz_per_cpu": 4228,
+ "cpu_scaling_enabled": false,
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_ManyRepetitions",
+ "iterations": 1000,
+ "real_time": 1,
+ "cpu_time": 1000,
+ "time_unit": "s"
+ }
+ ]
+}
diff --git a/third-party/benchmark/tools/gbench/Inputs/test5_run1.json b/third-party/benchmark/tools/gbench/Inputs/test5_run1.json
new file mode 100644
index 0000000..430df9f
--- /dev/null
+++ b/third-party/benchmark/tools/gbench/Inputs/test5_run1.json
@@ -0,0 +1,18 @@
+{
+ "context": {
+ "date": "2016-08-02 17:44:46",
+ "num_cpus": 4,
+ "mhz_per_cpu": 4228,
+ "cpu_scaling_enabled": false,
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_ManyRepetitions",
+ "iterations": 1000,
+ "real_time": 1000,
+ "cpu_time": 1,
+ "time_unit": "s"
+ }
+ ]
+}
diff --git a/third-party/benchmark/tools/gbench/__init__.py b/third-party/benchmark/tools/gbench/__init__.py
index ffca396..9212568 100644
--- a/third-party/benchmark/tools/gbench/__init__.py
+++ b/third-party/benchmark/tools/gbench/__init__.py
@@ -5,4 +5,4 @@
__versioninfo__ = (0, 5, 0)
__version__ = ".".join(str(v) for v in __versioninfo__) + "dev"
-__all__ = []
+__all__ = [] # type: ignore
diff --git a/third-party/benchmark/tools/gbench/report.py b/third-party/benchmark/tools/gbench/report.py
index 5092b0b..7158fd1 100644
--- a/third-party/benchmark/tools/gbench/report.py
+++ b/third-party/benchmark/tools/gbench/report.py
@@ -1,15 +1,17 @@
-"""report.py - Utilities for reporting statistics about benchmark results
+# type: ignore
+
+"""
+report.py - Utilities for reporting statistics about benchmark results
"""
-import unittest
-import os
-import re
import copy
+import os
import random
+import re
+import unittest
-from scipy.stats import mannwhitneyu, gmean
from numpy import array
-from pandas import Timedelta
+from scipy.stats import gmean, mannwhitneyu
class BenchmarkColor(object):
@@ -42,6 +44,13 @@
UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better.
UTEST_COL_NAME = "_pvalue"
+_TIME_UNIT_TO_SECONDS_MULTIPLIER = {
+ "s": 1.0,
+ "ms": 1e-3,
+ "us": 1e-6,
+ "ns": 1e-9,
+}
+
def color_format(use_color, fmt_str, *args, **kwargs):
"""
@@ -52,7 +61,10 @@
"""
assert use_color is True or use_color is False
if not use_color:
- args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE for arg in args]
+ args = [
+ arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+ for arg in args
+ ]
kwargs = {
key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
for key, arg in kwargs.items()
@@ -165,9 +177,9 @@
Get value of field_name field of benchmark, which is time with time unit
time_unit, as time in seconds.
"""
- time_unit = benchmark["time_unit"] if "time_unit" in benchmark else "s"
- dt = Timedelta(benchmark[field_name], time_unit)
- return dt / Timedelta(1, "s")
+ timedelta = benchmark[field_name]
+ time_unit = benchmark.get("time_unit", "s")
+ return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit)
def calculate_geomean(json):
@@ -273,6 +285,7 @@
partitions = partition_benchmarks(json1, json2)
for partition in partitions:
benchmark_name = partition[0][0]["name"]
+ label = partition[0][0]["label"] if "label" in partition[0][0] else ""
time_unit = partition[0][0]["time_unit"]
measurements = []
utest_results = {}
@@ -286,8 +299,12 @@
"cpu_time": bn["cpu_time"],
"real_time_other": other_bench["real_time"],
"cpu_time_other": other_bench["cpu_time"],
- "time": calculate_change(bn["real_time"], other_bench["real_time"]),
- "cpu": calculate_change(bn["cpu_time"], other_bench["cpu_time"]),
+ "time": calculate_change(
+ bn["real_time"], other_bench["real_time"]
+ ),
+ "cpu": calculate_change(
+ bn["cpu_time"], other_bench["cpu_time"]
+ ),
}
)
@@ -298,7 +315,7 @@
have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(
timings_cpu, timings_time
)
- if cpu_pvalue and time_pvalue:
+ if cpu_pvalue is not None and time_pvalue is not None:
utest_results = {
"have_optimal_repetitions": have_optimal_repetitions,
"cpu_pvalue": cpu_pvalue,
@@ -313,16 +330,20 @@
# benchmark suite.
if measurements:
run_type = (
- partition[0][0]["run_type"] if "run_type" in partition[0][0] else ""
+ partition[0][0]["run_type"]
+ if "run_type" in partition[0][0]
+ else ""
)
aggregate_name = (
partition[0][0]["aggregate_name"]
- if run_type == "aggregate" and "aggregate_name" in partition[0][0]
+ if run_type == "aggregate"
+ and "aggregate_name" in partition[0][0]
else ""
)
diff_report.append(
{
"name": benchmark_name,
+ "label": label,
"measurements": measurements,
"time_unit": time_unit,
"run_type": run_type,
@@ -337,6 +358,7 @@
diff_report.append(
{
"name": "OVERALL_GEOMEAN",
+ "label": "",
"measurements": [
{
"real_time": lhs_gmean[0],
@@ -392,7 +414,7 @@
# and if it is non-aggregate, then don't print it.
if (
not include_aggregates_only
- or not "run_type" in benchmark
+ or "run_type" not in benchmark
or benchmark["run_type"] == "aggregate"
):
for measurement in benchmark["measurements"]:
@@ -438,7 +460,9 @@
def load_results(self):
import json
- testInputs = os.path.join(os.path.dirname(os.path.realpath(__file__)), "Inputs")
+ testInputs = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), "Inputs"
+ )
testOutput = os.path.join(testInputs, "test3_run0.json")
with open(testOutput, "r") as f:
json = json.load(f)
@@ -485,16 +509,73 @@
["BM_SameTimes", "+0.0000", "+0.0000", "10", "10", "10", "10"],
["BM_2xFaster", "-0.5000", "-0.5000", "50", "25", "50", "25"],
["BM_2xSlower", "+1.0000", "+1.0000", "50", "100", "50", "100"],
- ["BM_1PercentFaster", "-0.0100", "-0.0100", "100", "99", "100", "99"],
- ["BM_1PercentSlower", "+0.0100", "+0.0100", "100", "101", "100", "101"],
- ["BM_10PercentFaster", "-0.1000", "-0.1000", "100", "90", "100", "90"],
- ["BM_10PercentSlower", "+0.1000", "+0.1000", "100", "110", "100", "110"],
- ["BM_100xSlower", "+99.0000", "+99.0000", "100", "10000", "100", "10000"],
- ["BM_100xFaster", "-0.9900", "-0.9900", "10000", "100", "10000", "100"],
- ["BM_10PercentCPUToTime", "+0.1000", "-0.1000", "100", "110", "100", "90"],
+ [
+ "BM_1PercentFaster",
+ "-0.0100",
+ "-0.0100",
+ "100",
+ "99",
+ "100",
+ "99",
+ ],
+ [
+ "BM_1PercentSlower",
+ "+0.0100",
+ "+0.0100",
+ "100",
+ "101",
+ "100",
+ "101",
+ ],
+ [
+ "BM_10PercentFaster",
+ "-0.1000",
+ "-0.1000",
+ "100",
+ "90",
+ "100",
+ "90",
+ ],
+ [
+ "BM_10PercentSlower",
+ "+0.1000",
+ "+0.1000",
+ "100",
+ "110",
+ "100",
+ "110",
+ ],
+ [
+ "BM_100xSlower",
+ "+99.0000",
+ "+99.0000",
+ "100",
+ "10000",
+ "100",
+ "10000",
+ ],
+ [
+ "BM_100xFaster",
+ "-0.9900",
+ "-0.9900",
+ "10000",
+ "100",
+ "10000",
+ "100",
+ ],
+ [
+ "BM_10PercentCPUToTime",
+ "+0.1000",
+ "-0.1000",
+ "100",
+ "110",
+ "100",
+ "90",
+ ],
["BM_ThirdFaster", "-0.3333", "-0.3334", "100", "67", "100", "67"],
["BM_NotBadTimeUnit", "-0.9000", "+0.2000", "0", "0", "0", "1"],
- ["OVERALL_GEOMEAN", "-0.8344", "-0.8026", "0", "0", "0", "0"],
+ ["BM_hasLabel", "+0.0000", "+0.0000", "1", "1", "1", "1"],
+ ["OVERALL_GEOMEAN", "-0.8113", "-0.7779", "0", "0", "0", "0"],
]
output_lines_with_header = print_difference_report(
self.json_diff_report, use_color=False
@@ -512,6 +593,7 @@
expected_output = [
{
"name": "BM_SameTimes",
+ "label": "",
"measurements": [
{
"time": 0.0000,
@@ -527,6 +609,7 @@
},
{
"name": "BM_2xFaster",
+ "label": "",
"measurements": [
{
"time": -0.5000,
@@ -542,6 +625,7 @@
},
{
"name": "BM_2xSlower",
+ "label": "",
"measurements": [
{
"time": 1.0000,
@@ -557,6 +641,7 @@
},
{
"name": "BM_1PercentFaster",
+ "label": "",
"measurements": [
{
"time": -0.0100,
@@ -572,6 +657,7 @@
},
{
"name": "BM_1PercentSlower",
+ "label": "",
"measurements": [
{
"time": 0.0100,
@@ -587,6 +673,7 @@
},
{
"name": "BM_10PercentFaster",
+ "label": "",
"measurements": [
{
"time": -0.1000,
@@ -602,6 +689,7 @@
},
{
"name": "BM_10PercentSlower",
+ "label": "",
"measurements": [
{
"time": 0.1000,
@@ -617,6 +705,7 @@
},
{
"name": "BM_100xSlower",
+ "label": "",
"measurements": [
{
"time": 99.0000,
@@ -632,6 +721,7 @@
},
{
"name": "BM_100xFaster",
+ "label": "",
"measurements": [
{
"time": -0.9900,
@@ -647,6 +737,7 @@
},
{
"name": "BM_10PercentCPUToTime",
+ "label": "",
"measurements": [
{
"time": 0.1000,
@@ -662,6 +753,7 @@
},
{
"name": "BM_ThirdFaster",
+ "label": "",
"measurements": [
{
"time": -0.3333,
@@ -677,6 +769,7 @@
},
{
"name": "BM_NotBadTimeUnit",
+ "label": "",
"measurements": [
{
"time": -0.9000,
@@ -691,15 +784,32 @@
"utest": {},
},
{
- "name": "OVERALL_GEOMEAN",
+ "name": "BM_hasLabel",
+ "label": "a label",
"measurements": [
{
- "real_time": 1.193776641714438e-06,
- "cpu_time": 1.2144445585302297e-06,
+ "time": 0.0000,
+ "cpu": 0.0000,
+ "real_time": 1,
+ "real_time_other": 1,
+ "cpu_time": 1,
+ "cpu_time_other": 1,
+ }
+ ],
+ "time_unit": "s",
+ "utest": {},
+ },
+ {
+ "name": "OVERALL_GEOMEAN",
+ "label": "",
+ "measurements": [
+ {
+ "real_time": 3.1622776601683826e-06,
+ "cpu_time": 3.2130844755623912e-06,
"real_time_other": 1.9768988699420897e-07,
"cpu_time_other": 2.397447755209533e-07,
- "time": -0.834399601997324,
- "cpu": -0.8025889499549471,
+ "time": -0.8112976497120911,
+ "cpu": -0.7778551721181174,
}
],
"time_unit": "s",
@@ -711,6 +821,7 @@
self.assertEqual(len(self.json_diff_report), len(expected_output))
for out, expected in zip(self.json_diff_report, expected_output):
self.assertEqual(out["name"], expected["name"])
+ self.assertEqual(out["label"], expected["label"])
self.assertEqual(out["time_unit"], expected["time_unit"])
assert_utest(self, out, expected)
assert_measurements(self, out, expected)
@@ -1086,7 +1197,9 @@
assert_measurements(self, out, expected)
-class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(unittest.TestCase):
+class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
+ unittest.TestCase
+):
@classmethod
def setUpClass(cls):
def load_results():
@@ -1369,12 +1482,108 @@
for n in range(len(self.json["benchmarks"]) ** 2):
random.shuffle(self.json["benchmarks"])
- sorted_benchmarks = util.sort_benchmark_results(self.json)["benchmarks"]
+ sorted_benchmarks = util.sort_benchmark_results(self.json)[
+ "benchmarks"
+ ]
self.assertEqual(len(expected_names), len(sorted_benchmarks))
for out, expected in zip(sorted_benchmarks, expected_names):
self.assertEqual(out["name"], expected)
+class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly2(
+ unittest.TestCase
+):
+ @classmethod
+ def setUpClass(cls):
+ def load_results():
+ import json
+
+ testInputs = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), "Inputs"
+ )
+ testOutput1 = os.path.join(testInputs, "test5_run0.json")
+ testOutput2 = os.path.join(testInputs, "test5_run1.json")
+ with open(testOutput1, "r") as f:
+ json1 = json.load(f)
+ json1["benchmarks"] = [
+ json1["benchmarks"][0] for i in range(1000)
+ ]
+ with open(testOutput2, "r") as f:
+ json2 = json.load(f)
+ json2["benchmarks"] = [
+ json2["benchmarks"][0] for i in range(1000)
+ ]
+ return json1, json2
+
+ json1, json2 = load_results()
+ cls.json_diff_report = get_difference_report(json1, json2, utest=True)
+
+ def test_json_diff_report_pretty_printing(self):
+ expect_line = [
+ "BM_ManyRepetitions_pvalue",
+ "0.0000",
+ "0.0000",
+ "U",
+ "Test,",
+ "Repetitions:",
+ "1000",
+ "vs",
+ "1000",
+ ]
+ output_lines_with_header = print_difference_report(
+ self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
+ )
+ output_lines = output_lines_with_header[2:]
+ found = False
+ for i in range(0, len(output_lines)):
+ parts = [x for x in output_lines[i].split(" ") if x]
+ found = expect_line == parts
+ if found:
+ break
+ self.assertTrue(found)
+
+ def test_json_diff_report(self):
+ expected_output = [
+ {
+ "name": "BM_ManyRepetitions",
+ "label": "",
+ "time_unit": "s",
+ "run_type": "",
+ "aggregate_name": "",
+ "utest": {
+ "have_optimal_repetitions": True,
+ "cpu_pvalue": 0.0,
+ "time_pvalue": 0.0,
+ "nr_of_repetitions": 1000,
+ "nr_of_repetitions_other": 1000,
+ },
+ },
+ {
+ "name": "OVERALL_GEOMEAN",
+ "label": "",
+ "measurements": [
+ {
+ "real_time": 1.0,
+ "cpu_time": 1000.000000000069,
+ "real_time_other": 1000.000000000069,
+ "cpu_time_other": 1.0,
+ "time": 999.000000000069,
+ "cpu": -0.9990000000000001,
+ }
+ ],
+ "time_unit": "s",
+ "run_type": "aggregate",
+ "aggregate_name": "geomean",
+ "utest": {},
+ },
+ ]
+ self.assertEqual(len(self.json_diff_report), len(expected_output))
+ for out, expected in zip(self.json_diff_report, expected_output):
+ self.assertEqual(out["name"], expected["name"])
+ self.assertEqual(out["time_unit"], expected["time_unit"])
+ assert_utest(self, out, expected)
+
+
def assert_utest(unittest_instance, lhs, rhs):
if lhs["utest"]:
unittest_instance.assertAlmostEqual(
diff --git a/third-party/benchmark/tools/gbench/util.py b/third-party/benchmark/tools/gbench/util.py
index a46903a..4d061a3 100644
--- a/third-party/benchmark/tools/gbench/util.py
+++ b/third-party/benchmark/tools/gbench/util.py
@@ -2,10 +2,10 @@
"""
import json
import os
-import tempfile
+import re
import subprocess
import sys
-import functools
+import tempfile
# Input file type enumeration
IT_Invalid = 0
@@ -58,7 +58,7 @@
"""
Return a tuple (type, msg) where 'type' specifies the classified type
of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable
- string represeting the error.
+ string representing the error.
"""
ftype = IT_Invalid
err_msg = None
@@ -72,7 +72,8 @@
ftype = IT_JSON
else:
err_msg = (
- "'%s' does not name a valid benchmark executable or JSON file" % filename
+ "'%s' does not name a valid benchmark executable or JSON file"
+ % filename
)
return ftype, err_msg
@@ -113,13 +114,41 @@
return [f for f in benchmark_flags if not f.startswith(prefix)]
-def load_benchmark_results(fname):
+def load_benchmark_results(fname, benchmark_filter):
"""
Read benchmark output from a file and return the JSON object.
+
+ Apply benchmark_filter, a regular expression, with nearly the same
+ semantics of the --benchmark_filter argument. May be None.
+ Note: the Python regular expression engine is used instead of the
+ one used by the C++ code, which may produce different results
+ in complex cases.
+
REQUIRES: 'fname' names a file containing JSON benchmark output.
"""
+
+ def benchmark_wanted(benchmark):
+ if benchmark_filter is None:
+ return True
+ name = benchmark.get("run_name", None) or benchmark["name"]
+ return re.search(benchmark_filter, name) is not None
+
with open(fname, "r") as f:
- return json.load(f)
+ results = json.load(f)
+ if "context" in results:
+ if "json_schema_version" in results["context"]:
+ json_schema_version = results["context"]["json_schema_version"]
+ if json_schema_version != 1:
+ print(
+ "In %s, got unnsupported JSON schema version: %i, expected 1"
+ % (fname, json_schema_version)
+ )
+ sys.exit(1)
+ if "benchmarks" in results:
+ results["benchmarks"] = list(
+ filter(benchmark_wanted, results["benchmarks"])
+ )
+ return results
def sort_benchmark_results(result):
@@ -168,7 +197,9 @@
is_temp_output = True
thandle, output_name = tempfile.mkstemp()
os.close(thandle)
- benchmark_flags = list(benchmark_flags) + ["--benchmark_out=%s" % output_name]
+ benchmark_flags = list(benchmark_flags) + [
+ "--benchmark_out=%s" % output_name
+ ]
cmd = [exe_name] + benchmark_flags
print("RUNNING: %s" % " ".join(cmd))
@@ -176,7 +207,7 @@
if exitCode != 0:
print("TEST FAILED...")
sys.exit(exitCode)
- json_res = load_benchmark_results(output_name)
+ json_res = load_benchmark_results(output_name, None)
if is_temp_output:
os.unlink(output_name)
return json_res
@@ -191,7 +222,10 @@
"""
ftype = check_input_file(filename)
if ftype == IT_JSON:
- return load_benchmark_results(filename)
+ benchmark_filter = find_benchmark_flag(
+ "--benchmark_filter=", benchmark_flags
+ )
+ return load_benchmark_results(filename, benchmark_filter)
if ftype == IT_Executable:
return run_benchmark(filename, benchmark_flags)
raise ValueError("Unknown file type %s" % ftype)
diff --git a/third-party/benchmark/tools/libpfm.BUILD.bazel b/third-party/benchmark/tools/libpfm.BUILD.bazel
new file mode 100644
index 0000000..6269534
--- /dev/null
+++ b/third-party/benchmark/tools/libpfm.BUILD.bazel
@@ -0,0 +1,22 @@
+# Build rule for libpfm, which is required to collect performance counters for
+# BENCHMARK_ENABLE_LIBPFM builds.
+
+load("@rules_foreign_cc//foreign_cc:defs.bzl", "make")
+
+filegroup(
+ name = "pfm_srcs",
+ srcs = glob(["**"]),
+)
+
+make(
+ name = "libpfm",
+ lib_source = ":pfm_srcs",
+ lib_name = "libpfm",
+ copts = [
+ "-Wno-format-truncation",
+ "-Wno-use-after-free",
+ ],
+ visibility = [
+ "//visibility:public",
+ ],
+)
diff --git a/third-party/benchmark/tools/requirements.txt b/third-party/benchmark/tools/requirements.txt
index 3b3331b..f32f35b 100644
--- a/third-party/benchmark/tools/requirements.txt
+++ b/third-party/benchmark/tools/requirements.txt
@@ -1 +1,2 @@
-scipy>=1.5.0
\ No newline at end of file
+numpy == 1.25
+scipy == 1.10.0
diff --git a/third-party/benchmark/tools/strip_asm.py b/third-party/benchmark/tools/strip_asm.py
index 086255d..bc3a774 100755
--- a/third-party/benchmark/tools/strip_asm.py
+++ b/third-party/benchmark/tools/strip_asm.py
@@ -1,20 +1,20 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
"""
strip_asm.py - Cleanup ASM output for the specified file
"""
-from argparse import ArgumentParser
-import sys
import os
import re
+import sys
+from argparse import ArgumentParser
def find_used_labels(asm):
found = set()
- label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
- for l in asm.splitlines():
- m = label_re.match(l)
+ label_re = re.compile(r"\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
+ for line in asm.splitlines():
+ m = label_re.match(line)
if m:
found.add(".L%s" % m.group(1))
return found
@@ -23,8 +23,8 @@
def normalize_labels(asm):
decls = set()
label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
- for l in asm.splitlines():
- m = label_decl.match(l)
+ for line in asm.splitlines():
+ m = label_decl.match(line)
if m:
decls.add(m.group(0))
if len(decls) == 0:
@@ -33,7 +33,7 @@
if not needs_dot:
return asm
for ld in decls:
- asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", "\\1." + ld, asm)
+ asm = re.sub(r"(^|\s+)" + ld + r"(?=:|\s)", "\\1." + ld, asm)
return asm
@@ -41,11 +41,11 @@
asm = normalize_labels(asm)
used_decls = find_used_labels(asm)
new_asm = ""
- label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
- for l in asm.splitlines():
- m = label_decl.match(l)
+ label_decl = re.compile(r"^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
+ for line in asm.splitlines():
+ m = label_decl.match(line)
if not m or m.group(0) in used_decls:
- new_asm += l
+ new_asm += line
new_asm += "\n"
return new_asm
@@ -63,21 +63,24 @@
return True
-def process_identifiers(l):
+def process_identifiers(line):
"""
process_identifiers - process all identifiers and modify them to have
consistent names across all platforms; specifically across ELF and MachO.
For example, MachO inserts an additional understore at the beginning of
names. This function removes that.
"""
- parts = re.split(r"([a-zA-Z0-9_]+)", l)
+ parts = re.split(r"([a-zA-Z0-9_]+)", line)
new_line = ""
for tk in parts:
if is_identifier(tk):
if tk.startswith("__Z"):
tk = tk[1:]
elif (
- tk.startswith("_") and len(tk) > 1 and tk[1].isalpha() and tk[1] != "Z"
+ tk.startswith("_")
+ and len(tk) > 1
+ and tk[1].isalpha()
+ and tk[1] != "Z"
):
tk = tk[1:]
new_line += tk
@@ -93,33 +96,35 @@
# TODO: Add more things we want to remove
discard_regexes = [
- re.compile("\s+\..*$"), # directive
- re.compile("\s*#(NO_APP|APP)$"), # inline ASM
- re.compile("\s*#.*$"), # comment line
- re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), # global directive
+ re.compile(r"\s+\..*$"), # directive
+ re.compile(r"\s*#(NO_APP|APP)$"), # inline ASM
+ re.compile(r"\s*#.*$"), # comment line
re.compile(
- "\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"
+ r"\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"
+ ), # global directive
+ re.compile(
+ r"\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"
),
]
- keep_regexes = []
+ keep_regexes: list[re.Pattern] = []
fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
- for l in asm.splitlines():
+ for line in asm.splitlines():
# Remove Mach-O attribute
- l = l.replace("@GOTPCREL", "")
+ line = line.replace("@GOTPCREL", "")
add_line = True
for reg in discard_regexes:
- if reg.match(l) is not None:
+ if reg.match(line) is not None:
add_line = False
break
for reg in keep_regexes:
- if reg.match(l) is not None:
+ if reg.match(line) is not None:
add_line = True
break
if add_line:
- if fn_label_def.match(l) and len(new_contents) != 0:
+ if fn_label_def.match(line) and len(new_contents) != 0:
new_contents += "\n"
- l = process_identifiers(l)
- new_contents += l
+ line = process_identifiers(line)
+ new_contents += line
new_contents += "\n"
return new_contents
@@ -127,7 +132,11 @@
def main():
parser = ArgumentParser(description="generate a stripped assembly file")
parser.add_argument(
- "input", metavar="input", type=str, nargs=1, help="An input assembly file"
+ "input",
+ metavar="input",
+ type=str,
+ nargs=1,
+ help="An input assembly file",
)
parser.add_argument(
"out", metavar="output", type=str, nargs=1, help="The output file"
@@ -136,9 +145,9 @@
input = args.input[0]
output = args.out[0]
if not os.path.isfile(input):
- print(("ERROR: input file '%s' does not exist") % input)
+ print("ERROR: input file '%s' does not exist" % input)
sys.exit(1)
- contents = None
+
with open(input, "r") as f:
contents = f.read()
new_contents = process_asm(contents)