mirror of
https://github.com/NixOS/nixpkgs.git
synced 2024-11-25 15:11:35 +00:00
cpython: add enableNoSemanticInterposition flag
This flags enables the -fno-semantic-interposition compiler flag for all our cpython builds by default. It can be disabled by overriding the enableNoSemanticInterposition attribute. This CFLAG has the impact that our python code runs up to 23% faster than previously. See the pyperformance benchmark results below. The only downside that exists is that we can no longer override libpython symbols via LD_PRELOAD when calling the python interpreter. If a user needs they can disable the newly introduced flag and use that instead. However those situations should be rare. The Fedora Project did the equivalent change in their Fedora 32 release (and haven't reverted it yet). They wrote a nice [wiki page] about the ups and downs of this change. I recommend reading that. Benchmark results of running Python 3.9 before and after this change: +-------------------------+---------------+--------------------+--------------+------------------------+ | Benchmark | py39.nix.json | py39-nsip.nix.json | Change | Significance | +=========================+===============+====================+==============+========================+ | 2to3 | 666 ms | 642 ms | 1.04x faster | Significant (t=12.73) | +-------------------------+---------------+--------------------+--------------+------------------------+ | chameleon | 15.0 ms | 14.6 ms | 1.03x faster | Significant (t=4.70) | +-------------------------+---------------+--------------------+--------------+------------------------+ | chaos | 198 ms | 182 ms | 1.09x faster | Significant (t=13.73) | +-------------------------+---------------+--------------------+--------------+------------------------+ | crypto_pyaes | 185 ms | 175 ms | 1.06x faster | Significant (t=9.70) | +-------------------------+---------------+--------------------+--------------+------------------------+ | deltablue | 12.3 ms | 11.2 ms | 1.10x faster | Significant (t=14.45) | +-------------------------+---------------+--------------------+--------------+------------------------+ | django_template | 85.1 ms | 82.0 ms | 1.04x faster | Significant (t=6.61) | +-------------------------+---------------+--------------------+--------------+------------------------+ | dulwich_log | 102 ms | 101 ms | 1.01x faster | Not significant | +-------------------------+---------------+--------------------+--------------+------------------------+ | fannkuch | 670 ms | 634 ms | 1.06x faster | Significant (t=15.87) | +-------------------------+---------------+--------------------+--------------+------------------------+ | float | 182 ms | 176 ms | 1.03x faster | Significant (t=7.32) | +-------------------------+---------------+--------------------+--------------+------------------------+ | go | 393 ms | 366 ms | 1.07x faster | Significant (t=17.63) | +-------------------------+---------------+--------------------+--------------+------------------------+ | hexiom | 15.8 ms | 14.9 ms | 1.06x faster | Significant (t=13.81) | +-------------------------+---------------+--------------------+--------------+------------------------+ | json_dumps | 19.3 ms | 18.7 ms | 1.03x faster | Significant (t=7.46) | +-------------------------+---------------+--------------------+--------------+------------------------+ | json_loads | 38.4 us | 37.9 us | 1.01x faster | Not significant | +-------------------------+---------------+--------------------+--------------+------------------------+ | logging_format | 15.0 us | 14.2 us | 1.05x faster | Significant (t=6.32) | +-------------------------+---------------+--------------------+--------------+------------------------+ | logging_silent | 328 ns | 305 ns | 1.07x faster | Significant (t=8.85) | +-------------------------+---------------+--------------------+--------------+------------------------+ | logging_simple | 13.8 us | 13.0 us | 1.06x faster | Significant (t=10.77) | +-------------------------+---------------+--------------------+--------------+------------------------+ | mako | 25.2 ms | 24.7 ms | 1.02x faster | Not significant | +-------------------------+---------------+--------------------+--------------+------------------------+ | meteor_contest | 133 ms | 130 ms | 1.03x faster | Significant (t=3.59) | +-------------------------+---------------+--------------------+--------------+------------------------+ | nbody | 222 ms | 201 ms | 1.10x faster | Significant (t=26.85) | +-------------------------+---------------+--------------------+--------------+------------------------+ | nqueens | 161 ms | 152 ms | 1.06x faster | Significant (t=8.84) | +-------------------------+---------------+--------------------+--------------+------------------------+ | pathlib | 28.4 ms | 26.3 ms | 1.08x faster | Significant (t=11.85) | +-------------------------+---------------+--------------------+--------------+------------------------+ | pickle | 13.8 us | 13.1 us | 1.06x faster | Significant (t=10.84) | +-------------------------+---------------+--------------------+--------------+------------------------+ | pickle_dict | 32.7 us | 26.6 us | 1.23x faster | Significant (t=32.43) | +-------------------------+---------------+--------------------+--------------+------------------------+ | pickle_list | 4.39 us | 4.34 us | 1.01x faster | Not significant | +-------------------------+---------------+--------------------+--------------+------------------------+ | pickle_pure_python | 782 us | 738 us | 1.06x faster | Significant (t=15.71) | +-------------------------+---------------+--------------------+--------------+------------------------+ | pidigits | 184 ms | 181 ms | 1.01x faster | Not significant | +-------------------------+---------------+--------------------+--------------+------------------------+ | pyflate | 1.02 sec | 959 ms | 1.07x faster | Significant (t=21.13) | +-------------------------+---------------+--------------------+--------------+------------------------+ | python_startup | 34.3 ms | 34.0 ms | 1.01x faster | Not significant | +-------------------------+---------------+--------------------+--------------+------------------------+ | python_startup_no_site | 15.5 ms | 15.3 ms | 1.01x faster | Not significant | +-------------------------+---------------+--------------------+--------------+------------------------+ | raytrace | 912 ms | 849 ms | 1.07x faster | Significant (t=11.21) | +-------------------------+---------------+--------------------+--------------+------------------------+ | regex_compile | 273 ms | 261 ms | 1.05x faster | Significant (t=5.82) | +-------------------------+---------------+--------------------+--------------+------------------------+ | regex_dna | 188 ms | 187 ms | 1.00x faster | Not significant | +-------------------------+---------------+--------------------+--------------+------------------------+ | regex_effbot | 3.37 ms | 3.66 ms | 1.09x slower | Significant (t=-16.12) | +-------------------------+---------------+--------------------+--------------+------------------------+ | regex_v8 | 29.8 ms | 29.7 ms | 1.01x faster | Not significant | +-------------------------+---------------+--------------------+--------------+------------------------+ | richards | 119 ms | 113 ms | 1.05x faster | Significant (t=5.71) | +-------------------------+---------------+--------------------+--------------+------------------------+ | scimark_fft | 625 ms | 592 ms | 1.06x faster | Significant (t=19.64) | +-------------------------+---------------+--------------------+--------------+------------------------+ | scimark_lu | 273 ms | 253 ms | 1.08x faster | Significant (t=16.68) | +-------------------------+---------------+--------------------+--------------+------------------------+ | scimark_monte_carlo | 186 ms | 170 ms | 1.10x faster | Significant (t=14.70) | +-------------------------+---------------+--------------------+--------------+------------------------+ | scimark_sor | 330 ms | 310 ms | 1.07x faster | Significant (t=11.89) | +-------------------------+---------------+--------------------+--------------+------------------------+ | scimark_sparse_mat_mult | 9.03 ms | 8.36 ms | 1.08x faster | Significant (t=17.71) | +-------------------------+---------------+--------------------+--------------+------------------------+ | spectral_norm | 247 ms | 232 ms | 1.06x faster | Significant (t=14.64) | +-------------------------+---------------+--------------------+--------------+------------------------+ | sqlalchemy_declarative | 194 ms | 185 ms | 1.04x faster | Significant (t=4.80) | +-------------------------+---------------+--------------------+--------------+------------------------+ | sqlalchemy_imperative | 28.2 ms | 27.4 ms | 1.03x faster | Significant (t=3.80) | +-------------------------+---------------+--------------------+--------------+------------------------+ | sqlite_synth | 4.89 us | 4.73 us | 1.03x faster | Significant (t=10.60) | +-------------------------+---------------+--------------------+--------------+------------------------+ | sympy_expand | 833 ms | 813 ms | 1.02x faster | Significant (t=4.82) | +-------------------------+---------------+--------------------+--------------+------------------------+ | sympy_integrate | 33.0 ms | 31.6 ms | 1.05x faster | Significant (t=6.47) | +-------------------------+---------------+--------------------+--------------+------------------------+ | sympy_str | 490 ms | 477 ms | 1.03x faster | Significant (t=7.58) | +-------------------------+---------------+--------------------+--------------+------------------------+ | sympy_sum | 254 ms | 245 ms | 1.04x faster | Significant (t=7.82) | +-------------------------+---------------+--------------------+--------------+------------------------+ | telco | 11.6 ms | 11.3 ms | 1.03x faster | Significant (t=5.62) | +-------------------------+---------------+--------------------+--------------+------------------------+ | tornado_http | 175 ms | 172 ms | 1.02x faster | Not significant | +-------------------------+---------------+--------------------+--------------+------------------------+ | unpack_sequence | 51.4 ns | 56.2 ns | 1.09x slower | Significant (t=-2.40) | +-------------------------+---------------+--------------------+--------------+------------------------+ | unpickle | 20.3 us | 19.8 us | 1.02x faster | Significant (t=4.09) | +-------------------------+---------------+--------------------+--------------+------------------------+ | unpickle_list | 5.41 us | 5.75 us | 1.06x slower | Significant (t=-26.56) | +-------------------------+---------------+--------------------+--------------+------------------------+ | unpickle_pure_python | 544 us | 524 us | 1.04x faster | Significant (t=6.47) | +-------------------------+---------------+--------------------+--------------+------------------------+ | xml_etree_generate | 154 ms | 148 ms | 1.04x faster | Significant (t=7.98) | +-------------------------+---------------+--------------------+--------------+------------------------+ | xml_etree_iterparse | 130 ms | 129 ms | 1.01x faster | Not significant | +-------------------------+---------------+--------------------+--------------+------------------------+ | xml_etree_parse | 178 ms | 179 ms | 1.00x slower | Not significant | +-------------------------+---------------+--------------------+--------------+------------------------+ | xml_etree_process | 123 ms | 118 ms | 1.04x faster | Significant (t=10.22) | +-------------------------+---------------+--------------------+--------------+------------------------+ [wiki page]: https://fedoraproject.org/wiki/Changes/PythonNoSemanticInterpositionSpeedup
This commit is contained in:
parent
94cd70bae2
commit
322eb3596b
|
@ -39,6 +39,8 @@
|
|||
, includeSiteCustomize ? true
|
||||
, static ? stdenv.hostPlatform.isStatic
|
||||
, enableOptimizations ? false
|
||||
# enableNoSemanticInterposition is a subset of the enableOptimizations flag that doesn't harm reproducibility.
|
||||
, enableNoSemanticInterposition ? true
|
||||
, reproducibleBuild ? true
|
||||
, pythonAttr ? "python${sourceVersion.major}${sourceVersion.minor}"
|
||||
}:
|
||||
|
@ -327,6 +329,17 @@ in with passthru; stdenv.mkDerivation {
|
|||
export DETERMINISTIC_BUILD=1;
|
||||
'' + optionalString stdenv.hostPlatform.isMusl ''
|
||||
export NIX_CFLAGS_COMPILE+=" -DTHREAD_STACK_SIZE=0x100000"
|
||||
'' +
|
||||
|
||||
# enableNoSemanticInterposition essentially sets that CFLAG -fno-semantic-interposition
|
||||
# which changes how symbols are looked up. This essentially means we can't override
|
||||
# libpython symbols via LD_PRELOAD anymore. This is common enough as every build
|
||||
# that uses --enable-optimizations has the same "issue".
|
||||
#
|
||||
# The Fedora wiki has a good article about their journey towards enabling this flag:
|
||||
# https://fedoraproject.org/wiki/Changes/PythonNoSemanticInterpositionSpeedup
|
||||
optionalString enableNoSemanticInterposition ''
|
||||
export CFLAGS_NODIST="-fno-semantic-interposition"
|
||||
'';
|
||||
|
||||
setupHook = python-setup-hook sitePackages;
|
||||
|
|
Loading…
Reference in a new issue