|
From: Philippe W. <phi...@sk...> - 2012-04-15 20:42:56
|
With some kludges, some of the Valgrind tools can be compiled&linked using -flto -fuse-linker-plugin (gcc 4.7.0, gold 2.20) (memcheck causes a crash in lto1, x86 tools are not ok). Below is the performance comparison. There are some significant differences in positive or negative. This might be due to the real time measurement, which is not always very reliable, or might be real changes due to -flto. I have some difficulties to draw a conclusion from the below ... Philippe perl perf/vg_perf --tools=none,callgrind,helgrind,massif,cachegrind,lackey,drd,exp-sgcheck,exp-bbv,exp-dhat --vg=../valgcc470 --vg=../lto_gold --reps=20 perf 2>&1 | tee lto_cmp.out -- Running tests in perf ---------------------------------------------- -- bigcode1 -- bigcode1 valgcc470 :0.13s no: 2.3s (17.7x, -----) ca:15.8s (121.4x, -----) he: 2.5s (19.5x, -----) ma: 2.6s (20.4x, -----) ca: 6.3s (48.6x, -----) la:20.8s (160.0x, -----) dr: 2.5s (19.2x, -----) ex: 4.6s (35.5x, -----) ex: 8.1s (62.2x, -----) ex: 2.8s (21.2x, -----) bigcode1 lto_gold :0.13s no: 2.3s (17.5x, 0.9%) ca:15.1s (116.3x, 4.2%) he: 2.5s (19.6x, -0.4%) ma: 2.6s (20.3x, 0.4%) ca: 6.3s (48.4x, 0.5%) la:21.0s (161.4x, -0.9%) dr: 2.3s (17.8x, 7.2%) ex: 4.7s (35.8x, -0.9%) ex: 8.5s (65.1x, -4.6%) ex: 2.6s (20.2x, 4.4%) -- bigcode2 -- bigcode2 valgcc470 :0.12s no: 4.5s (37.2x, -----) ca:27.7s (230.5x, -----) he: 5.4s (45.1x, -----) ma: 5.2s (43.3x, -----) ca: 9.6s (79.8x, -----) la:34.2s (284.7x, -----) dr: 5.1s (42.8x, -----) ex: 7.6s (63.2x, -----) ex:11.7s (97.7x, -----) ex: 5.5s (45.9x, -----) bigcode2 lto_gold :0.12s no: 4.5s (37.5x, -0.9%) ca:26.9s (224.2x, 2.7%) he: 5.5s (45.5x, -0.9%) ma: 5.2s (43.3x, 0.0%) ca: 9.6s (79.9x, -0.1%) la:34.1s (284.2x, 0.1%) dr: 5.2s (43.1x, -0.8%) ex: 7.5s (62.8x, 0.5%) ex:12.3s (102.2x, -4.6%) ex: 5.5s (46.0x, -0.2%) -- bz2 -- bz2 valgcc470 :0.64s no: 3.0s ( 4.7x, -----) ca:16.6s (25.9x, -----) he:10.5s (16.4x, -----) ma: 3.2s ( 5.0x, -----) ca:18.5s (28.9x, -----) la:46.1s (72.1x, -----) dr:13.7s (21.5x, -----) ex:25.1s (39.2x, -----) ex:14.9s (23.3x, -----) ex: 8.2s (12.8x, -----) bz2 lto_gold :0.64s no: 2.9s ( 4.5x, 3.6%) ca:15.8s (24.6x, 5.0%) he:10.0s (15.6x, 5.2%) ma: 3.3s ( 5.2x, -4.4%) ca:16.6s (25.9x, 10.2%) la:45.4s (70.9x, 1.7%) dr:12.7s (19.9x, 7.4%) ex:24.3s (38.0x, 3.0%) ex:16.0s (25.0x, -7.4%) ex: 7.8s (12.2x, 4.6%) -- fbench -- fbench valgcc470 :0.27s no: 1.3s ( 4.9x, -----) ca: 7.4s (27.3x, -----) he: 3.1s (11.6x, -----) ma: 1.4s ( 5.1x, -----) ca: 5.2s (19.3x, -----) la:15.2s (56.2x, -----) dr: 2.9s (10.8x, -----) ex:15.6s (57.6x, -----) ex: 4.4s (16.4x, -----) ex: 3.4s (12.7x, -----) fbench lto_gold :0.27s no: 1.4s ( 5.0x, -3.0%) ca: 7.1s (26.4x, 3.1%) he: 3.1s (11.5x, 1.0%) ma: 1.4s ( 5.3x, -3.6%) ca: 4.9s (18.0x, 6.7%) la:16.9s (62.7x,-11.6%) dr: 3.0s (11.0x, -1.7%) ex:14.5s (53.9x, 6.6%) ex: 4.8s (17.7x, -7.9%) ex: 3.2s (11.9x, 6.7%) -- ffbench -- ffbench valgcc470 :0.24s no: 1.1s ( 4.4x, -----) ca: 2.5s (10.2x, -----) he: 5.3s (22.3x, -----) ma: 1.0s ( 4.3x, -----) ca: 7.4s (30.8x, -----) la: 6.9s (28.9x, -----) dr: 3.6s (15.0x, -----) ex: 8.5s (35.4x, -----) ex: 5.0s (20.8x, -----) ex: 2.1s ( 8.6x, -----) ffbench lto_gold :0.24s no: 1.0s ( 4.0x, 8.5%) ca: 2.3s ( 9.8x, 4.9%) he: 5.8s (24.3x, -9.0%) ma: 1.0s ( 4.2x, 1.0%) ca: 5.6s (23.2x, 24.9%) la: 7.1s (29.6x, -2.4%) dr: 3.7s (15.5x, -3.6%) ex: 8.2s (34.0x, 4.0%) ex: 4.3s (17.8x, 14.5%) ex: 2.3s ( 9.5x,-10.6%) -- heap -- heap valgcc470 :0.10s no: 1.1s (11.3x, -----) ca: 8.4s (84.5x, -----) he: 7.8s (78.4x, -----) ma: 4.4s (44.4x, -----) ca: 5.6s (56.2x, -----) la:15.4s (153.9x, -----) dr: 4.4s (44.1x, -----) ex:13.1s (131.4x, -----) ex: 4.9s (48.7x, -----) ex: 8.8s (88.4x, -----) heap lto_gold :0.10s no: 1.0s ( 9.9x, 12.4%) ca: 7.5s (75.3x, 10.9%) he: 8.1s (80.7x, -2.9%) ma: 4.4s (43.8x, 1.4%) ca: 6.5s (64.9x,-15.5%) la:16.8s (167.7x, -9.0%) dr: 4.6s (46.4x, -5.2%) ex:12.7s (127.2x, 3.2%) ex: 5.1s (51.0x, -4.7%) ex: 8.5s (84.6x, 4.3%) -- heap_pdb4 -- heap_pdb4 valgcc470 :0.12s no: 1.1s ( 8.8x, -----) ca: 9.3s (77.9x, -----) he: 8.9s (74.4x, -----) ma: 4.6s (38.5x, -----) ca: 6.1s (50.9x, -----) la:18.8s (156.4x, -----) dr: 5.2s (42.9x, -----) ex:15.6s (129.8x, -----) ex: 5.7s (47.2x, -----) ex:10.7s (89.3x, -----) heap_pdb4 lto_gold :0.12s no: 1.2s ( 9.8x,-10.4%) ca: 8.6s (71.5x, 8.2%) he: 9.2s (77.1x, -3.6%) ma: 4.7s (38.8x, -0.9%) ca: 6.5s (54.6x, -7.2%) la:19.4s (161.3x, -3.1%) dr: 5.1s (42.3x, 1.4%) ex:14.1s (117.7x, 9.4%) ex: 5.7s (47.2x, 0.0%) ex:10.1s (84.0x, 5.9%) -- many-loss-records -- many-loss-records valgcc470 :0.01s no: 0.3s (26.0x, -----) ca: 1.4s (136.0x, -----) he: 1.4s (141.0x, -----) ma: 1.1s (108.0x, -----) ca: 1.1s (110.0x, -----) la: 2.7s (273.0x, -----) dr: 1.1s (105.0x, -----) ex: 4.2s (416.0x, -----) ex: 0.9s (86.0x, -----) ex: 1.9s (195.0x, -----) many-loss-records lto_gold :0.01s no: 0.3s (27.0x, -3.8%) ca: 1.3s (130.0x, 4.4%) he: 1.4s (138.0x, 2.1%) ma: 1.0s (103.0x, 4.6%) ca: 1.0s (103.0x, 6.4%) la: 2.7s (271.0x, 0.7%) dr: 1.0s (103.0x, 1.9%) ex: 4.1s (412.0x, 1.0%) ex: 0.9s (88.0x, -2.3%) ex: 1.9s (187.0x, 4.1%) -- many-xpts -- many-xpts valgcc470 :0.03s no: 0.4s (12.7x, -----) ca: 3.5s (117.0x, -----) he: 2.9s (95.0x, -----) ma: 1.7s (57.0x, -----) ca: 1.5s (51.0x, -----) la: 5.9s (196.3x, -----) dr: 1.5s (50.0x, -----) ex: 7.2s (240.3x, -----) ex: 1.5s (48.7x, -----) ex: 1.7s (56.7x, -----) many-xpts lto_gold :0.03s no: 0.4s (13.7x, -7.9%) ca: 3.5s (116.0x, 0.9%) he: 2.6s (88.0x, 7.4%) ma: 1.7s (56.3x, 1.2%) ca: 1.6s (52.7x, -3.3%) la: 6.0s (200.7x, -2.2%) dr: 1.5s (51.0x, -2.0%) ex: 7.1s (236.0x, 1.8%) ex: 1.6s (53.7x,-10.3%) ex: 1.6s (53.3x, 5.9%) -- sarp -- sarp valgcc470 :0.02s no: 0.3s (13.0x, -----) ca: 2.2s (108.5x, -----) he: 7.5s (373.0x, -----) ma: 0.3s (15.0x, -----) ca: 1.5s (73.0x, -----) la: 2.3s (114.5x, -----) dr: 0.9s (45.0x, -----) ex: 7.0s (349.0x, -----) ex: 1.1s (54.0x, -----) ex: 0.4s (22.0x, -----) sarp lto_gold :0.02s no: 0.2s (12.5x, 3.8%) ca: 2.1s (104.0x, 4.1%) he: 7.7s (383.0x, -2.7%) ma: 0.3s (15.5x, -3.3%) ca: 1.3s (65.5x, 10.3%) la: 2.1s (107.0x, 6.6%) dr: 1.0s (48.0x, -6.7%) ex: 7.0s (349.5x, -0.1%) ex: 1.2s (58.0x, -7.4%) ex: 0.4s (22.0x, 0.0%) -- tinycc -- tinycc valgcc470 :0.19s no: 2.5s (13.4x, -----) ca:14.5s (76.1x, -----) he:11.5s (60.7x, -----) ma: 3.8s (20.1x, -----) ca:12.4s (65.5x, -----) la:34.7s (182.6x, -----) dr:10.1s (52.9x, -----) ex:31.7s (166.9x, -----) ex: 8.4s (44.0x, -----) ex:11.5s (60.4x, -----) tinycc lto_gold :0.19s no: 2.5s (13.4x, 0.0%) ca:14.1s (74.4x, 2.3%) he:11.7s (61.5x, -1.4%) ma: 3.9s (20.3x, -1.3%) ca:11.9s (62.4x, 4.7%) la:34.7s (182.5x, 0.1%) dr: 9.3s (48.9x, 7.5%) ex:30.9s (162.4x, 2.7%) ex: 8.8s (46.2x, -4.9%) ex:11.4s (60.3x, 0.2%) -- Finished tests in perf ---------------------------------------------- == 11 programs, 220 timings ================= |