utils/pre-commit-benchmark

#!/usr/bin/env python

# Note: it doesn't matter what directory you invoke this in; it uses
# your SWIFT_BUILD_ROOT and SWIFT_SOURCE_ROOT settings, just like
# build-script

# Note2: To avoid expensive rebuilds, it's often better to develop
# this script somewhere outside the source tree and then move it back
# when your changes are finished.

import subprocess
import sys
import os
import re
import shutil
import argparse
from pipes import quote as shell_quote

HOME = os.environ['HOME']
SWIFT_SOURCE_ROOT = os.environ.get('SWIFT_SOURCE_ROOT', os.path.join(HOME, 'src', 's'))
SWIFT_BUILD_ROOT = os.environ.get('SWIFT_BUILD_ROOT', os.path.join(SWIFT_SOURCE_ROOT, 'build'))
VERBOSE = False

variantDir = os.path.join(SWIFT_BUILD_ROOT, 'Ninja-Release')
buildDir = os.path.join(variantDir, 'swift-macosx-x86_64')
binDir = os.path.join(buildDir, 'bin')
benchDir = os.path.join(variantDir, 'bench')
sourceDir = os.path.join(SWIFT_SOURCE_ROOT, 'swift')

import os

def print_call(*args, **kw):
    if isinstance(args[0], (str, unicode)):
        args = [args]
        
    print '$ ' + ' '.join(shell_quote(x) for x in args[0]) + '   # %r, %r' % (args[1:], kw)
    
def check_call(*args, **kw):
    print_call(*args, **kw)
    try:
        return subprocess.check_call(*args, **kw)
    except:
        print('failed command:', args, kw)
        sys.stdout.flush()
        raise
    
def check_output(*args, **kw):
    print_call(*args, **kw)
    try:
        return subprocess.check_output(*args, **kw)
    except:
        print('failed command:', args, kw)
        sys.stdout.flush()
        raise
    
def getTreeSha(treeish):
    return check_output(['git', 'show', treeish, '-s', '--format=%t'], cwd=sourceDir).rstrip()
    
def getWorkTreeSha():
    if check_output(['git', 'status', '--porcelain', '--untracked-files=no'], cwd=sourceDir) == '':
        return getTreeSha('HEAD')
        
    # Create a stash without updating the working tree
    stashId = check_output(['git', 'stash', 'create', 'benchmark stash'], cwd=sourceDir).rstrip()
    check_call(['git', 'update-ref', '-m', 'benchmark stash', 'refs/stash', stashId], cwd=sourceDir)
    sha = getTreeSha('stash@{0}')
    check_call(['git', 'stash', 'drop', '-q'], cwd=sourceDir)
    return sha
    
def buildBenchmarks(cacheDir, build_script_args):
    print('Building executables...')
    
    configVars = {'SWIFT_INCLUDE_BENCHMARKS':'TRUE', 'SWIFT_INCLUDE_PERF_TESTSUITE':'TRUE', 'SWIFT_STDLIB_BUILD_TYPE':'RelWithDebInfo', 'SWIFT_STDLIB_ASSERTIONS':'FALSE'}
    
    cmakeCache = os.path.join(buildDir, 'CMakeCache.txt')
    
    configArgs = ['-D%s=%s' % i for i in configVars.items()]
    
    # Ensure swift is built with the appropriate options
    if os.path.isfile(cmakeCache):
        check_call(['cmake', '.'] + configArgs, cwd=buildDir)

    check_call(
        [ os.path.join(sourceDir, 'utils', 'build-script'),
          '-R', '--no-assertions']
        + build_script_args)

    # Doing this requires copying or linking all the libraries to the
    # same executable-relative location.  Probably not worth it.
    # Instead we'll just run the executables where they're built and
    # cache the timings
    #
    # print('  Copying executables to cache directory %r...' % cacheDir)
    # for exe in exeNames:
    #    shutil.copy(os.path.join(binDir, exe), os.path.join(cacheDir, exe))
    print('done.')

def collectBenchmarks(exeNames, treeish = None, repeat = 3, build_script_args = []):
    treeSha = getWorkTreeSha() if treeish is None else getTreeSha(treeish)
    cacheDir = os.path.join(benchDir, treeSha)
    print('Collecting benchmarks for %s in %s ' % (treeish if treeish else 'working tree', cacheDir))
    
    if not os.path.isdir(cacheDir):
        os.makedirs(cacheDir)

    rebuilt = False
    for exe in exeNames:
        timingsFile = os.path.join(cacheDir, exe) + '.out'
        timingsText = ''
        if not os.path.exists(timingsFile):
            print('Missing timings file for %s' % exe)
            m = re.search(
                r'^\* (?:\(detached from (.*)\)|(.*))$',
                check_output(['git', 'branch'], cwd=sourceDir),
                re.MULTILINE
            )
            
            saveHead = m.group(1) or m.group(2)

            if not rebuilt:
                if treeish is not None:
                    check_call(['git', 'stash', 'save', '--include-untracked'], cwd=sourceDir)

                    try:
                        check_call(['git', 'checkout', treeish], cwd=sourceDir)
                        buildBenchmarks(cacheDir, build_script_args)
                    finally:
                        subprocess.call(['git', 'checkout', saveHead], cwd=sourceDir)
                        subprocess.call(['git', 'stash', 'pop'], cwd=sourceDir)
                else:
                    buildBenchmarks(cacheDir, build_script_args)
                    
                rebuilt = True
        else:
            timingsText = open(timingsFile).read()
            oldRepeat = timingsText.count('\nTotal')
            if oldRepeat < repeat:
                print('Only %s repeats in existing %s timings file' % (oldRepeat, exe))
                timingsText = ''
                
        if timingsText == '':
            print('Running new benchmarks...')
            for iteration in range(0, repeat):
                print('  %s iteration %s' % (exe, iteration))
                output = check_output(os.path.join(binDir, exe))
                print(output)
                timingsText += output
                
            open(timingsFile, 'w').write(timingsText)
            print('done.')
            
    return cacheDir

# Parse lines like this
# #,TEST,SAMPLES,MIN(ms),MAX(ms),MEAN(ms),SD(ms),MEDIAN(ms)
SCORERE=re.compile(r"(\d+),[ \t]*(\w+),[ \t]*([\d.]+),[ \t]*([\d.]+)")

# The Totals line would be parsed like this, but we ignore it for now.
TOTALRE=re.compile(r"()(Totals),[ \t]*([\d.]+),[ \t]*([\d.]+)")

KEYGROUP=2
VALGROUP=4

def parseFloat(word):
    try:
        return float(word)
    except:
        raise ScoreParserException("Expected float val, not "+word)

def getScores(fname):
    scores = {}
    runs = 0
    f = open(fname)
    try:
        for line in f:
            if VERBOSE: print "Parsing", line,
            m = SCORERE.match(line)
            if not m:
                continue

            if not m.group(KEYGROUP) in scores:
                scores[m.group(KEYGROUP)] = []
            scores[m.group(KEYGROUP)].append(parseFloat(m.group(VALGROUP)))
            if len(scores[m.group(KEYGROUP)]) > runs:
                runs = len(scores[m.group(KEYGROUP)])
    finally:
        f.close()
    return scores, runs

def compareScores(key, score1, score2, runs):
    row = [key]
    bestscore1 = None
    bestscore2 = None
    r = 0
    for score in score1:
        if not bestscore1 or score < bestscore1:
            bestscore1 = score
        row.append("%.2f" % score)
    for score in score2:
        if not bestscore2 or score < bestscore2:
            bestscore2 = score
        row.append("%.2f" % score)
        r += 1
    while r < runs:
        row.append("0.0")

    row.append("%.2f" % abs(bestscore1-bestscore2))
    Num=float(bestscore1)
    Den=float(bestscore2)
    row.append(("%.2f" % (Num/Den)) if Den > 0 else "*")
    return row

def compareTimingsFiles(file1, file2):
    scores1, runs1 = getScores(file1)
    scores2, runs2 = getScores(file2)
    runs = min(runs1, runs2)
    if VERBOSE: print scores1; print scores2
    keys = [f for f in set(scores1.keys() + scores2.keys())]
    keys.sort()
    if VERBOSE:
        print "comparing ", file1, "vs", file2, "=",
        print file1, "/", file2

    rows =  [["benchmark"]]
    for i in range(0,runs):
        rows[0].append("baserun%d" % i)
    for i in range(0,runs):
        rows[0].append("optrun%d" % i)
    rows[0] += ["delta", "speedup"]

    for key in keys:
        if not key in scores1:
            print key, "not in", file1
            continue
        if not key in scores2:
            print key, "not in", file2
            continue
        rows.append(compareScores(key, scores1[key], scores2[key], runs))
        
    widths = []
    for row in rows:
        for n, x in enumerate(row):
            while n >= len(widths):
                widths.append(0)
            if len(x) > widths[n]:
                widths[n] = len(x)
    for row in rows:
        for n, x in enumerate(row):
            if n != 0:
                print ',',
            print ((widths[n] - len(x)) * ' ') + x,
        print ''

def checkAndUpdatePerfTestSuite(sourceDir):
    """Check that the performance testsuite directory is in its appropriate
       location and attempt to update it to ToT."""
    # Compute our benchmark directory name.
    benchdir = os.path.join(sourceDir, 'benchmark', 'PerfTestSuite')

    # If it's not there, clone it on demand.
    if not os.path.isdir(benchdir):
        check_call(
            [
                'git', 'clone',
            ])

    # Make sure that our benchdir has a .git directory in it. This will ensure
    # that users update to the new repository location. We could do more in
    # depth checks, but this is meant just as a simple sanity check for careless
    # errors.
    gitdir = os.path.join(benchdir, '.git')
    if not os.path.exists(gitdir):
        raise RuntimeError("PerfTestSuite dir is not a .git repo?!")

    # We always update the benchmarks to ToT.
    check_call(['git', 'pull', 'origin', 'master'], cwd=benchdir)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Show performance improvements/regressions in your Git working tree state')
    parser.add_argument('-r', '--repeat', dest='repeat', metavar='int', default=1, type=int,
                        help='number of times to repeat each test')
    parser.add_argument('-O', dest='optimization', choices=('3', 'unchecked', 'none'),
                        action='append', help='Optimization levels to test')
    parser.add_argument(dest='baseline', nargs='?', metavar='tree-ish', default='origin/master',
                        help='the baseline Git commit to compare with.')
    parser.add_argument(dest='build_script_args', nargs=argparse.REMAINDER, metavar='build-script-args', default=[]
                        help='additional arguments to build script, e.g. -- --distcc --build-args=-j30')
    args = parser.parse_args()

    optimization = args.optimization or ['3']
    exeNames = ['PerfTests_O' + ('' if x == '3' else x) for x in optimization]

    # Update PerfTests bench to ToT. If it does not exist, throw an error.
    #
    # TODO: This name sucks.
    checkAndUpdatePerfTestSuite(sourceDir)

    workCacheDir = collectBenchmarks(exeNames, tree_ish=None, repeat=args.repeat, build_script_args=args.build_script_args)    
    baselineCacheDir = collectBenchmarks(exeNames, tree_ish=args.baseline, repeat=args.repeat, build_script_args=args.build_script_args)
    
    if baselineCacheDir == workCacheDir:
        print('No changes between work tree and %s; nothing to compare.' % args.baseline)
    else:
        for exe in exeNames:
            print '=' * 20, exe, '=' * 20
            timingsFileName = exe + '.out'
            compareTimingsFiles(
                os.path.join(baselineCacheDir, timingsFileName),
                os.path.join(workCacheDir, timingsFileName)
            )