To protect your data, the CISO officer has suggested users to enable GitLab 2FA as soon as possible.

mutyper.py 16.2 KB
Newer Older
1
2
from rpython.flowspace.model import Variable, Constant, c_last_exception
from rpython.rtyper.lltypesystem import lltype, llmemory
3
from rpython.translator.backendopt.removenoops import remove_unaryops
4
from rpython.translator.backendopt.canraise import RaiseAnalyzer
5
from rpython.translator.mu import mutype
6
7
from rpython.translator.mu.ll2mu import LL2MuMapper, varof
from rpython.rlib.objectmodel import CDefinedIntSymbolic
8
9
from rpython.tool.ansi_mandelbrot import Driver
from rpython.tool.ansi_print import AnsiLogger
10
11
import re

12
13
14
15
16
17
18
19
20
log = AnsiLogger("MuTyper")
mdb = Driver()

class MuTyper:
    def __init__(self, tlc):
        # type: (rpython.translator.translator.TranslationContext) -> None
        self._graphname_cntr_dict = {}
        self.tlc = tlc
        self.ll2mu = LL2MuMapper(tlc.rtyper)
21
        self._objrefid2gcl_dic = {}
22
        self.raise_analyser = RaiseAnalyzer(None)   # translator argument not needed I think
23

John Zhang's avatar
John Zhang committed
24
    def init_threadlocal_struct_type(self):
25
        # determine thread local struct type
John Zhang's avatar
John Zhang committed
26
        tlflds = self.tlc.annotator.bookkeeper.thread_local_fields
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
        if len(tlflds) == 0:
            # self.TLSTT = mutype.MuStruct('mu_tlstt', ('dummy', mutype.char_t))  # use a dummy struct when empty
            self.TLStt = mutype.MU_VOID
        else:
            _tlflds = []
            for tlf in tlflds:
                _tlflds.append((tlf.fieldname, self.ll2mu.map_type(tlf.FIELDTYPE)))
            self.TLStt = mutype.MuStruct('mu_tlstt', *_tlflds)
        self.ll2mu.set_threadlocal_struct_type(self.TLStt)

    def prepare_all(self):
        self.graphs = prepare(self.tlc.graphs, self.tlc.entry_point_graph)

    def specialise_all(self):
        if not hasattr(self, 'graphs'):
            raise AttributeError("don't have graphs. Run prepare_all() first.")

44
45
        for g in self.ll2mu.llhelper_graphs:
            self.graphs = self.graphs.union(prepare([g], g))
46
47
48
49

        log.specialise_all('specialising graphs...')
        mdb.restart()

50
51
        processed = []
        while len(self.graphs) > 0:
52
            g = self.graphs.pop()
53
54
            self.specialise_graph(g)
            processed.append(g)
55
56
57
            mdb.dot()

        mdb.restart()
58
59
60

        self.tlc.graphs = self.graphs = processed

John Zhang's avatar
John Zhang committed
61
    def specialise_graph(self, g):
62
63
        ret_llt = g.returnblock.inputargs[0].concretetype if len(g.returnblock.inputargs) == 1 else lltype.Void
        arg_llts = map(lambda arg: arg.concretetype, g.startblock.inputargs)
64
65
        g.sig = mutype.MuFuncSig([self.ll2mu.map_type(arg_t) for arg_t in arg_llts],
                                 [self.ll2mu.map_type(ret_llt)] if ret_llt != lltype.Void else [])
John Zhang's avatar
John Zhang committed
66
67
68
        for blk in g.iterblocks():
            self.specialise_block(blk)

69
70
        remove_unaryops(g, ['same_as', 'likely'])

John Zhang's avatar
John Zhang committed
71
72
73
74
75
76
77
78
79
80
    def specialise_block(self, blk):
        # specialise inputargs
        blk.inputargs = [self.specialise_arg(arg) for arg in blk.inputargs]

        # specialise operations
        muops = []
        for op in blk.operations:
            muops.extend(self.specialise_operation(op))

        # specialise exits
81
        ldgcell_ops = []
John Zhang's avatar
John Zhang committed
82
83
        for e in blk.exits:
            e.args = [self.specialise_arg(arg) for arg in e.args]
84
85
            ldgcell_ops.extend(self.extract_load_gcell(e.args))

John Zhang's avatar
John Zhang committed
86
87
88
        if blk.exitswitch is not c_last_exception:
            if len(blk.exits) == 0:
                if len(muops) == 0 or muops[-1].opname not in ("mu_throw", "mu_comminst"):
89
                    muops.append(self.ll2mu.gen_mu_ret(blk.inputargs[0] if len(blk.inputargs) == 1 else None))
John Zhang's avatar
John Zhang committed
90
91
92
93
94
95

            elif len(blk.exits) == 1:
                muops.append(self.ll2mu.gen_mu_branch(blk.exits[0]))

            elif len(blk.exits) == 2:
                blk.exitswitch = self.specialise_arg(blk.exitswitch)
96
97
                if not (blk.exitswitch.concretetype is mutype.MU_INT1):
                    MuT = blk.exitswitch.concretetype
John Zhang's avatar
John Zhang committed
98
                    flag = varof(mutype.MU_INT1)
99
100
                    muops.append(self.ll2mu.gen_mu_cmpop('EQ', blk.exitswitch,
                                                         Constant(MuT._val_type(1), MuT), flag))
John Zhang's avatar
John Zhang committed
101
102
103
104
105
106
107
108
109
110
                    blk.exitswitch = flag
                muops.append(self.ll2mu.gen_mu_branch2(blk.exitswitch, blk.exits[1], blk.exits[0]))

            else:  # more than 2 exits -> use SWITCH statement
                blk.exitswitch = self.specialise_arg(blk.exitswitch)
                cases = filter(lambda e: e.exitcase != 'default', blk.exits)
                for e in cases:
                    e.exitcase = self.specialise_arg(Constant(e.llexitcase, lltype.typeOf(e.llexitcase)))
                defl_exit = next((e for e in blk.exits if e.exitcase == 'default'), cases[-1])
                muops.append(self.ll2mu.gen_mu_switch(blk.exitswitch, defl_exit, cases))
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
        else:
            last_op = muops[-1]
            try:
                assert self.raise_analyser.can_raise(last_op)
                if last_op.opname == 'mu_binop':
                    from rpython.config.translationoption import get_translation_config
                    config = get_translation_config()
                    if config.translation.mu.no_ovf:
                        # NOTE: if no_ovf flag is set, assume no overflow will happen, always take the default exit
                        muops.append(self.ll2mu.gen_mu_branch(blk.exits[0]))
                    else:
                        metainfo = muops[-1].args[-1].value
                        statres_V = metainfo['status'][1][0]  # only V is used at this moment
                        blk.exitswitch = statres_V
                        muops.append(self.ll2mu.gen_mu_branch2(blk.exitswitch, blk.exits[1], blk.exits[0]))
                else:
                    # exceptional branching for mu_call
                    metainfo = muops[-1].args[-1].value
                    metainfo['excclause'] = self.ll2mu.exc_clause(blk.exits[0], blk.exits[1])
            except AssertionError:
                # the last operation can't raise exception
                # in this case the exception branching is dropped
                # (see exceptiontransform.py:271 corresponding behaviour)
                # NOTE: in Mu, CCALL will NEVER throw a Mu exception.
                # still not sure why calling a native C library function will throw an RPython exception...
                # so this case follows dropping the exception branching
                assert last_op.opname != 'mu_comminst'  # why is COMMINST the last operation?
138
                muops.append(self.ll2mu.gen_mu_branch(blk.exits[0]))
John Zhang's avatar
John Zhang committed
139

140
        muops = muops[:-1] + ldgcell_ops + [muops[-1]]
John Zhang's avatar
John Zhang committed
141
142
143
144
        blk.operations = muops

    def specialise_arg(self, arg):
        if isinstance(arg.concretetype, lltype.LowLevelType):   # has not been processed
145
            LLT = arg.concretetype
John Zhang's avatar
John Zhang committed
146
            if isinstance(arg, Variable):
147
                arg.concretetype = self.ll2mu.map_type(LLT)
John Zhang's avatar
John Zhang committed
148
149
                self.ll2mu.resolve_ptr_types()
            elif isinstance(arg, Constant):
150
151
                llv = arg.value
                if LLT is lltype.Void:
John Zhang's avatar
John Zhang committed
152
                    if isinstance(arg.value, lltype.LowLevelType):  # a type constant
153
                        arg.__init__(self.ll2mu.map_type(llv), mutype.MU_VOID)
John Zhang's avatar
John Zhang committed
154
155
                        self.ll2mu.resolve_ptr_types()
                    else:   # for other non-translation constants, just keep the value
156
                        arg.__init__(llv, mutype.MU_VOID)
John Zhang's avatar
John Zhang committed
157
                else:
158
159
160
161
162
163
164
165
166
167
                    if isinstance(llv, CDefinedIntSymbolic) and llv.default == '?':
                        return arg  # ignore it; it should be dealt with when translating ops

                    MuT = self.ll2mu.map_type(LLT)
                    muv = self.ll2mu.map_value(llv)
                    self.ll2mu.resolve_ptr_types()
                    self.ll2mu.resolve_ptr_values()

                    if isinstance(muv, mutype._muufuncptr):
                        MuT = mutype.mutypeOf(muv)
John Zhang's avatar
John Zhang committed
168

169
170
                    assert mutype.mutypeOf(muv) == MuT

171
                    if isinstance(muv, mutype._muobject_reference) and not muv._is_null():
172
173
174
175
176
177
178
179
180
181
                        GCl_T = mutype.MuGlobalCell(MuT)
                        if id(muv) in self._objrefid2gcl_dic:
                            gcl = self._objrefid2gcl_dic[id(muv)]
                        else:
                            gcl = mutype.new(GCl_T)
                            gcl._store(muv)
                            self._objrefid2gcl_dic[id(muv)] = gcl
                        arg.__init__(gcl, GCl_T)
                    else:
                        arg.__init__(muv, MuT)
John Zhang's avatar
John Zhang committed
182
183
184
        return arg

    def specialise_operation(self, llop):
185
        def skip(llop):
186
187
188
            return llop.opname in (
                "hint",
                "likely",
189
                "debug_print",  # NOTE: skip now because it's too complex to implement
190
                "debug_fatalerror",
191
192
193
194
                "gc_add_memory_pressure",
                "gc_set_max_heap_size",
                "gc_thread_after_fork",
                "gc_writebarrier",
John Zhang's avatar
John Zhang committed
195
                'gc_fq_register',
196
197
198
199
                "jit_conditional_call",
                "jit_force_quasi_immutable",
                "jit_force_virtual",
                "jit_marker",
200
201
202
203
204
            ) or (llop.opname.startswith('mu_') and llop.opname not in (
                'mu_getgcidhash',
                'mu_setgcidhash',
                'mu_thread_exit',
                'mu_threadlocalref_init',
205
206
                'mu_meta_barebuf2cstriref',
                'mu_meta_lst2carr',
John Zhang's avatar
John Zhang committed
207
                'mu_meta_xxx_by_id'
208
            ))
209

John Zhang's avatar
John Zhang committed
210
211
212
        llop.args = [self.specialise_arg(arg) for arg in llop.args]
        llop.result = self.specialise_arg(llop.result)

213
214
215
        if skip(llop):      # translate the concretetype regardless
            return [llop]

John Zhang's avatar
John Zhang committed
216
        muops = []
217
        muops.extend(self.extract_load_gcell(llop.args))
218
        muops.extend(self.ll2mu.map_op(llop))
John Zhang's avatar
John Zhang committed
219
220
        return muops

221
222
223
224
    def extract_load_gcell(self, args):
        # find global cells in argument list,
        # replace them with load variables
        # return a list of load operations
John Zhang's avatar
John Zhang committed
225
        loadops = []
226
        for i, arg in enumerate(args):
John Zhang's avatar
John Zhang committed
227
228
229
230
            if isinstance(arg, Constant) and isinstance(arg.concretetype, mutype.MuGlobalCell):
                ldvar = Variable('ldgcl')
                ldvar.concretetype = arg.concretetype.TO
                loadops.append(self.ll2mu.gen_mu_load(arg, ldvar))
231
                args[i] = ldvar
John Zhang's avatar
John Zhang committed
232
        return loadops
233
234
235
236


# -----------------------------------------------------------------------------
# preparation before mutyper
237
def graph_closure(g_entry):
238
    """
239
240
241
    Find closure of graphs from g_entry, including graphs in:
    - direct/indirect calls
    - function references in heap objects
242
243

    :param g_entry: the graph in the list that is the entry point
244
    :return: a set of FunctionGraphs as closure
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
    """

    graph_closure = set()
    pending_graphs = []
    pending_objects = []
    is_ptr_const = lambda a: isinstance(a, Constant) and isinstance(a.value, lltype._ptr)
    visited_obj = set()

    def _find_funcrefs(obj):
        if isinstance(obj, lltype._ptr):
            refnt = obj._obj
            if isinstance(refnt, lltype._struct):
                refnt = refnt._normalizedcontainer()

            pending_objects.append(refnt)
        else:
            if isinstance(obj, lltype._struct):
                if obj in visited_obj:
                    return
                visited_obj.add(obj)
                fld_dic = lltype.typeOf(obj)._flds
                for fld in fld_dic:
                    _find_funcrefs(obj._getattr(fld))

            elif isinstance(obj, lltype._array):
                if obj in visited_obj:
                    return
                visited_obj.add(obj)
                if isinstance(lltype.typeOf(obj).OF, (lltype.ContainerType, lltype.Ptr)):
                    for i in range(len(obj.items)):
                        itm = obj.getitem(i)
                        _find_funcrefs(itm)

            elif isinstance(obj, lltype._opaque):
                if hasattr(obj, 'container'):
                    _find_funcrefs(obj._normalizedcontainer())

            elif isinstance(obj, llmemory._wref):
                _find_funcrefs(obj._dereference())

            elif isinstance(obj, lltype._func):
                if hasattr(obj, 'graph'):
                    pending_graphs.append(obj.graph)

    def visit(graph):
        if graph in graph_closure:
            return
        graph_closure.add(graph)

        for blk in graph.iterblocks():
            for op in blk.operations:
                if op.opname == 'indirect_call':
                    possible_graphs = op.args[-1].value
                    if possible_graphs:
                        pending_graphs.extend(possible_graphs)

                else:
                    for arg in filter(is_ptr_const, op.args):
                        _find_funcrefs(arg.value)
            for e in blk.exits:
                for arg in filter(is_ptr_const, e.args):
                    _find_funcrefs(arg.value)

        # process all pending objects before moving on to next graph
        while len(pending_objects) > 0:
            obj = pending_objects.pop()
            _find_funcrefs(obj)

    pending_graphs.append(g_entry)
    while len(pending_graphs) > 0:
        graph = pending_graphs.pop()
        visit(graph)

    return graph_closure


def prepare(graphs, entry_graph):
    def _keep_arg(arg, opname=''):
        from rpython.rtyper.lltypesystem.lloperation import LL_OPERATIONS
        _OPS_ALLOW_LLTYPE_ARGS = []
        _OPS_ALLOW_LLTYPE_ARGS += [_op for _op in LL_OPERATIONS if _op.startswith("int_")]
        _OPS_ALLOW_LLTYPE_ARGS += [_op for _op in LL_OPERATIONS if _op.startswith("adr_")]
        _OPS_KEEP_ALL_ARGS = ('setfield', 'setinteriorfield')

        # Returns True if the argument/parameter is to be kept
John Zhang's avatar
John Zhang committed
330
        if (opname in ('mu_comminst', 'mu_meta_xxx_by_id')) or ('malloc' in opname) or \
331
                ('setfield' in opname) or (arg.concretetype != lltype.Void):
332
            return True
333

334
335
336
337
338
339
340
341
342
343
344
345
346
347
        if isinstance(arg, Constant):
            if isinstance(arg.value, (str, list)):
                return True
            elif isinstance(arg.value, lltype.LowLevelType):
                return opname in _OPS_ALLOW_LLTYPE_ARGS
        if opname in _OPS_KEEP_ALL_ARGS:
            return True
        # log.keep_arg("Throwing argument %(arg)r from operation %(opname)s" % locals())
        return False

    name_dic = {}
    def rename(g):
        """ reassign graph names (shorter names more readable) """
        name = g.name if '.' in g.name else g.name.split('__')[0]
348
        name = re.sub(r'[^0-9a-zA-Z_-]', '_', name)     # replace illegal characters in names
349
350
351
352
353
354
355
356
357
358
359
360
361
        if name not in name_dic:
            ctr = 0
            name_dic[name] = ([g], ctr)
        else:
            gs, ctr = name_dic[name]
            if g not in gs:
                gs.append(g)
                ctr += 1
                name_dic[name] = (gs, ctr)
        g.name = "%s_%d" % (name, ctr)

    # Task 1: prune and remove inlined graphs
    n0 = len(graphs)
362
    graphs = graph_closure(entry_graph)
363
364
365
366
367
    log.prune("%d -> %d graphs" % (n0, len(graphs)))

    for g in graphs:
        rename(g)

John Zhang's avatar
John Zhang committed
368
        for blk in list(g.iterblocks()) + [g.returnblock]:  # force include return block, in case always throwing exceptions
369
370
371
372
373
374
375
            # Task 2: Remove Void args and parameters in inputargs, operations and links
            blk.inputargs = [arg for arg in blk.inputargs if arg.concretetype != lltype.Void]
            for lnk in blk.exits:
                lnk.args = [arg for arg in lnk.args if arg.concretetype != lltype.Void]
            for op in blk.operations:
                op.args = [arg for arg in op.args if _keep_arg(arg, op.opname)]

376
                if op.opname in ('force_cast', 'cast_primitive'):
377
378
379
                    # HACK: save original arg and result types to discern signedness.
                    op._src_llt = op.args[0].concretetype
                    op._res_llt = op.result.concretetype
380

381
382
383
384
385
386
387
388
389
            # replace constants with dummy variables in inputargs --> they shouldn't appear there
            idx_cnsts = filter(lambda _i: isinstance(blk.inputargs[_i], Constant), range(len(blk.inputargs)))
            if len(idx_cnsts) > 0:
                for i in idx_cnsts:
                    _v = Variable('dummy')
                    _v.concretetype = blk.inputargs[i].concretetype
                    blk.inputargs[i] = _v

    return graphs