taskset.py 12.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#!/usr/bin/env python3
# Copyright 2017 The Australian National University
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

16
import os
17
import sys
Zixian Cai's avatar
Zixian Cai committed
18
19
20
import yaml
import logging
import crayons
21
import json
Zixian Cai's avatar
Zixian Cai committed
22
23

from pathlib import Path
Zixian Cai's avatar
Zixian Cai committed
24
from types import SimpleNamespace
25

26
from mubench import SUITE_DIR, CALLBACKS_DIR
27
from mubench.exceptions import ExecutionFailure
John Zhang's avatar
John Zhang committed
28
from mubench.conf import settings
Zixian Cai's avatar
Zixian Cai committed
29
from mubench.util import expandenv, dictify
30
from mubench.lang import get_lang, Language
31
from mubench.models.result import Result
Zixian Cai's avatar
Zixian Cai committed
32
33
34
35
36
37
38
39
40
41
42
43

logger = logging.getLogger(__name__)


def task_print(task_name, message):
    logger.info("[{}] {}".format(
        crayons.yellow(task_name),
        message
    ))


class TaskSet:
Zixian Cai's avatar
Zixian Cai committed
44
45
    def __init__(self, name, benchmark, iterations, callback, runnerwrap=None,
                 **kwds):
46
47
48
        self.name = name
        self.benchmark = benchmark
        self.iterations = iterations
49
        self.callback = callback
50
        self.runnerwrap = runnerwrap
51
        self.output_dir = kwds['output_dir']
52
        self.resfile = kwds['resfile']
53
        self.tasks = []
54
        self.comparison = kwds['comparisons']
55

John Zhang's avatar
John Zhang committed
56
        # environ
Zixian Cai's avatar
Zixian Cai committed
57
58
59
60
61
        self.env = os.environ.copy()  # base on os.environ
        self.env.update(getattr(settings, 'ENVIRON', {}))  # local settings
        self.env['MUBENCH_TASKSET_NAME'] = name  # taskset name
        ts_env = kwds['env']  # taskset definitions
        for v in ts_env:  # first expand the environs (based on what's been defined so far)
62
63
            ts_env[v] = expandenv(ts_env[v], self.env)
        self.env.update(ts_env)
John Zhang's avatar
John Zhang committed
64
65

        # expand environs in benchmark args and callback param
Zixian Cai's avatar
Zixian Cai committed
66
67
        benchmark['args'] = list(
            map(lambda a: expandenv(str(a), self.env), benchmark['args']))
John Zhang's avatar
John Zhang committed
68
69
        callback['param'] = expandenv(callback['param'], self.env)

70
71
72
        # compiled callback shared library
        if sys.platform == 'darwin':
            self.env['DYLD_LIBRARY_PATH'] = str(self.output_dir)
73
            libext = '.dylib'
74
75
        else:
            self.env['LD_LIBRARY_PATH'] = str(self.output_dir)
76
77
            libext = '.so'
        self.callback['dylib'] = self.output_dir / ('libcb_%(name)s' % self.callback + libext)
78

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
    @staticmethod
    def from_config_dict(name, conf_d, conf_dir=None):
        # output directory
        output_dir = Path(conf_d.get('outdir', str(conf_dir)))

        # check iterations
        assert 'iterations' in conf_d, 'iterations not defined'

        # check benchmark
        # check name
        assert 'benchmark' in conf_d, 'benchmark not defined'
        assert (SUITE_DIR / conf_d['benchmark']['name']).exists(), \
            "benchmark %(name)s not found" % conf_d['benchmark']
        conf_d['benchmark'].setdefault('args', [])

        # check record file
        # check record
        resfile = Path(conf_d.get('recfile', '%(name)s.json' % locals()))
        if not resfile.is_absolute():
            resfile = output_dir / resfile
            resfile.parent.mkdir(parents=True, exist_ok=True)

        # check callback
        assert 'callback' in conf_d, 'callback not defined'
        d = dictify(conf_d['callback'])
        if 'param' not in d or d['param'] is None:
            d['param'] = ""  # default to ""
106
107
108
109
110
111
        if 'include_dirs' not in d or d['include_dirs'] is None:
            d['include_dirs'] = []  # default to []
        if 'library_dirs' not in d or d['library_dirs'] is None:
            d['library_dirs'] = []  # default to []
        if 'libraries' not in d or d['libraries'] is None:
            d['libraries'] = []  # default to []
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
        conf_d['callback'] = d

        # add comparison
        comparisons = []
        if 'compare' in conf_d:
            for cmp in conf_d["compare"]:
                comparisons.append(SimpleNamespace(op1=cmp[0], op2=cmp[1]))

        ts = TaskSet(name, conf_d['benchmark'], conf_d['iterations'],
                     conf_d['callback'],
                     output_dir=output_dir, resfile=resfile,
                     env=conf_d.get('environ', {}),
                     comparisons=comparisons)

        # add tasks
        for task_name, task_conf in conf_d['tasks'].items():
            try:
                ts.tasks.append(Task(ts, task_name, **task_conf))
            except Exception as e:
                task_print(task_name,
                           crayons.red('parsing configuration failed.'))
                logger.critical(crayons.red(str(e)))
134
                # ts.tasks.append(Task(ts, task_name, **task_conf))
135
136

        return ts
Zixian Cai's avatar
Zixian Cai committed
137

138
    def run(self, skipcomp_l):
139
140
141
        # compile callback into shared library first
        self.compile_callback()

John Zhang's avatar
John Zhang committed
142
143
        # compile first
        targets = {}
144
145
        for task in self.tasks:
            if not task.lang_cls.compiled:  # interpreted
John Zhang's avatar
John Zhang committed
146
                targets[task] = task.srcfile
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
            else:   # need compilation
                if task.name in skipcomp_l: # skip compilation -> assume default target
                    targets[task] = task.get_default_target()
                else:
                    task_print(task.name, 'compiling...')
                    try:
                        target = task.compile()
                        task_print(task.name, 'target %s generated' % target)
                        targets[task] = target
                    except ExecutionFailure as e:
                        task_print(task.name, crayons.red('FAILED'))
                        logger.critical(crayons.red(str(e)))
                        errlog_file = self.output_dir / (task.name + '.log')
                        e.dump(errlog_file)
                        task_print(task.name, crayons.red(
162
                            'error output written to %s' % errlog_file))
John Zhang's avatar
John Zhang committed
163
164

        # run
165
        data = {t: [] for t in targets}  # only run tasks that have a target
John Zhang's avatar
John Zhang committed
166

Zixian Cai's avatar
Zixian Cai committed
167
        # Generating record
John Zhang's avatar
John Zhang committed
168
        for i in range(self.iterations):
John Zhang's avatar
John Zhang committed
169
            logger.info("Running iteration %d..." % i)
John Zhang's avatar
John Zhang committed
170
            keys = list(data.keys())
171
            for task in keys:
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
                target = targets[task]
                try:
                    res, t_proc = task.run(target)
                    task.add_datapoint(res.stdout, res.stderr, t_proc)
                    data[task].append({
                        'stdout': str(res.stdout, encoding='utf-8'),
                        'stderr': str(res.stderr, encoding='utf-8'),
                        't_proc': t_proc
                    })
                except ExecutionFailure as e:
                    task_print(task.name, crayons.red('FAILED'))
                    logger.critical(crayons.red(str(e)))
                    errlog_file = self.output_dir / (task.name + '.log')
                    e.dump(errlog_file)
                    task_print(task.name, crayons.red(
187
                        'error output written to %s' % errlog_file))
188
                    del data[task]
John Zhang's avatar
John Zhang committed
189

190
191
192
193
        record = {
            'name': self.name,
            'iterations': self.iterations,
            'benchmark': self.benchmark,
194
            'callback': self.callback,
195
            'results': {t.name: data[t] for t in data},
196
197
198
199
        }

        # save to result file
        with self.resfile.open('w') as fp:
Zixian Cai's avatar
Zixian Cai committed
200
            json.dump(record, fp, indent=2, separators=(', ', ': '))
John Zhang's avatar
John Zhang committed
201

202
203
        # TODO: restructure this
        for task in data:
Zixian Cai's avatar
Zixian Cai committed
204
205
            task.aggregate_datapoint()

206
207
208
        self.results = {task.name: task.get_result() for task in data}
        return self.results
        # TODO: debug
Zixian Cai's avatar
Zixian Cai committed
209

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
    def compile_callback(self):
        cmd = []
        cc = self.env.get('CC', 'clang')
        cmd.append(cc)

        cmd.append('--shared')

        # include_dirs
        for d in self.callback['include_dirs']:
            p = Path(expandenv(d, self.env))
            if not p.is_absolute():
                p = CALLBACKS_DIR / p   # default relative to CALLBACKS_DIR
            cmd.append('-I%s' % p)

        # library_dirs
        for d in self.callback['library_dirs']:
            p = Path(expandenv(d, self.env))
            if not p.is_absolute():
                p = CALLBACKS_DIR / p   # default relative to CALLBACKS_DIR
            cmd.append('-L%s' % p)

        # libraries
        for lib in self.callback['libraries']:
            cmd.append('-l %s' % lib)

        # output
        cmd.extend(['-o', self.callback['dylib']])

        # source
        cmd.append(CALLBACKS_DIR / ('cb_%(name)s.c' % self.callback))

        Language.run_in_subproc(cmd, self.env)
242

243
244
245
246
247
class Task:
    """
    An task of benchmark performance measurement;
    corresponds to the outmost level mapping in YAML configuration file
    """
248

249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
    def __init__(self, taskset, name, **conf):
        self.taskset = taskset
        self.name = name
        self.env = taskset.env.copy()  # based on taskset environ
        self.env['MUBENCH_TASK_NAME'] = name
        task_env = conf.get('environ', {})
        for v in task_env:
            task_env[v] = expandenv(task_env[v], self.env)
        self.env.update(task_env)

        self.output_dir = taskset.output_dir

        # benchmark
        self.benchmark = taskset.benchmark

        # callback
        self.callback = taskset.callback

        # check source
        assert 'source' in conf, 'source not defined'
        src = SUITE_DIR / self.benchmark['name'] / conf['source']
        assert src.exists(), "source file %(src)s not found" % locals()
        conf['source'] = src
        self.srcfile = src

        # language
        lang_d = dictify(conf.get('language', {}))
        assert 'name' in lang_d, 'language not defined'
        self.lang_cls = get_lang(lang_d['name'])
        self.lang = self.lang_cls.check_lang(lang_d)

        # set defaults for others
        self.compiler = self.lang_cls.check_compiler(conf.get('compiler', {}),
                                                     self.lang, self)
        self.runner = self.lang_cls.check_runner(conf.get('runner', {}),
                                                 self.lang, self)

        self.config = conf

        self.data_callback = []
        self.data_t_proc = []

    def compile(self):
        if self.lang_cls.compiled:
            return self.lang_cls.compile(self)

    def run(self, target):
        res = self.lang_cls.run(target, self)
        return res

299
300
301
    def get_default_target(self):
        return self.lang_cls.get_default_target(self)

302
303
304
    # TODO: maybe refactor this.
    # Results and data should not be part of a taskset/task,
    # but rather the Result should be *about* a TaskSet
305
306
    def add_datapoint(self, stdout, stderr, t_proc):
        if self.callback['name'] == 'clock':
307
308
309
310
311
            try:
                dp = float(stdout.split()[-1])
            except:
                raise RuntimeError(
                    "Cannot extract duration from last line of stdout")
312
313
314
315
        else:
            msg = "'%(name)s' callback output processing not implemented" % self.callback
            raise NotImplementedError(msg)
        self.data_callback.append(dp)
316

317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
        self.data_t_proc.append(float(t_proc))

    def aggregate_datapoint(self):
        self.result_callback = Result(self.data_callback,
                                      "{}:{} callback".format(self.taskset.name,
                                                              self.name))
        self.result_t_proc = Result(self.data_t_proc,
                                    "{}:{} t_proc".format(self.taskset.name,
                                                          self.name))

    def get_result(self):
        return SimpleNamespace(callback=self.result_callback,
                               t_proc=self.result_t_proc)

    def __str__(self):
        return self.name


def load_yaml(yaml_s, run_dir):
    config_d = yaml.load(yaml_s)
    tasksets = []
    for name, ts_conf_d in config_d.items():
        tasksets.append(TaskSet.from_config_dict(name, ts_conf_d, run_dir))
    return tasksets
341
342


343
344
345
def load_file(config_file):
    with open(config_file) as fp:
        return load_yaml(fp.read(), Path(config_file).parent)