aboutsummaryrefslogtreecommitdiff
blob: 8abd35b369b96ab043c095eea959779ef374c0af (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
from rpython.flowspace.model import Variable, Constant, SpaceOperation
from rpython.tool.algo.unionfind import UnionFind
from rpython.rtyper.lltypesystem import lltype
from rpython.translator import simplify
from rpython.translator.backendopt import removenoops
from rpython.translator.backendopt.support import log


class LifeTime:

    def __init__(self, (block, var)):
        assert isinstance(var, Variable)
        self.variables = {(block, var)}
        self.creationpoints = set()   # set of ("type of creation point", ...)
        self.usepoints = set()        # set of ("type of use point",      ...)

    def absorb(self, other):
        self.variables.update(other.variables)
        self.creationpoints.update(other.creationpoints)
        self.usepoints.update(other.usepoints)

    def __repr__(self):
        return "<LifeTime creationpoints=%r usepoints=%r variables=%r>" % (self.creationpoints, self.usepoints, self.variables)


class BaseMallocRemover(object):

    IDENTITY_OPS = ('same_as',)
    SUBSTRUCT_OPS = ()
    MALLOC_OP = None
    FIELD_ACCESS = {}
    SUBSTRUCT_ACCESS = {}
    CHECK_ARRAY_INDEX = {}

    def __init__(self, verbose=True):
        self.verbose = verbose

    def check_malloc(self, op):
        return op.opname == self.MALLOC_OP

    def recreate_malloc(self, c, v):
        return SpaceOperation(self.MALLOC_OP, [c], v)

    def get_STRUCT(self, TYPE):
        raise NotImplementedError

    def visit_substruct_op(self, node, union, op):
        raise NotImplementedError

    def do_substruct_access(self, op):
        raise NotImplementedError

    def union_wrapper(self, S):
        return False

    def RTTI_dtor(self, STRUCT):
        return False

    def flatten(self, S):
        raise NotImplementedError

    def key_for_field_access(self, S, fldname):
        raise NotImplementedError

    def inline_type(self, TYPE):
        raise NotImplementedError

    def flowin(self, block, count, vars, newvarsmap):
        # in this 'block', follow where the 'var' goes to and replace
        # it by a flattened-out family of variables.  This family is given
        # by newvarsmap, whose keys are the 'flatnames'.

        def list_newvars():
            return [newvarsmap[key] for key in self.flatnames]

        assert block.operations != ()
        self.newops = []
        for op in block.operations:
            for arg in op.args[1:]:   # should be the first arg only
                assert arg not in vars
            if op.args and op.args[0] in vars:
                self.flowin_op(op, vars, newvarsmap)
            elif op.result in vars:
                assert op.opname == self.MALLOC_OP
                progress = True
                # drop the "malloc" operation
                newvarsmap = self.flatconstants.copy()   # zero initial values
                # if there are substructures, they are now individually
                # malloc'ed in an exploded way.  (They will typically be
                # removed again by the next malloc removal pass.)
                for key in self.needsubmallocs:
                    v = Variable()
                    v.concretetype = self.newvarstype[key]
                    c = Constant(v.concretetype.TO, lltype.Void)
                    if c.value == op.args[0].value:
                        progress = False   # replacing a malloc with
                                           # the same malloc!
                    newop = self.recreate_malloc(c, v)
                    self.newops.append(newop)
                    newvarsmap[key] = v
                count[0] += progress
            else:
                self.newops.append(op)

        assert block.exitswitch not in vars

        for link in block.exits:
            appended = False
            newargs = []
            for arg in link.args:
                if arg in vars:
                    if not appended:
                        newargs += list_newvars()
                        appended = True
                else:
                    newargs.append(arg)
            link.args[:] = newargs

        block.operations[:] = self.newops

    def compute_lifetimes(self, graph):
        """Compute the static data flow of the graph: returns a list of LifeTime
        instances, each of which corresponds to a set of Variables from the graph.
        The variables are grouped in the same LifeTime if a value can pass from
        one to the other by following the links.  Each LifeTime also records all
        places where a Variable in the set is used (read) or build (created).
        """
        lifetimes = UnionFind(LifeTime)

        def set_creation_point(block, var, *cp):
            _, _, info = lifetimes.find((block, var))
            info.creationpoints.add(cp)

        def set_use_point(block, var, *usepoint):
            _, _, info = lifetimes.find((block, var))
            info.usepoints.add(usepoint)

        def union(block1, var1, block2, var2):
            if isinstance(var1, Variable):
                lifetimes.union((block1, var1), (block2, var2))
            elif isinstance(var1, Constant):
                set_creation_point(block2, var2, "constant", var1)
            else:
                raise TypeError(var1)

        for var in graph.startblock.inputargs:
            set_creation_point(graph.startblock, var, "inputargs")
        set_use_point(graph.returnblock, graph.returnblock.inputargs[0], "return")
        set_use_point(graph.exceptblock, graph.exceptblock.inputargs[0], "except")
        set_use_point(graph.exceptblock, graph.exceptblock.inputargs[1], "except")

        for node in graph.iterblocks():
                for op in node.operations:
                    if op.opname in self.IDENTITY_OPS:
                        # special-case these operations to identify their input
                        # and output variables
                        union(node, op.args[0], node, op.result)
                        continue
                    if op.opname in self.SUBSTRUCT_OPS:
                        if self.visit_substruct_op(node, union, op):
                            continue
                    for i in range(len(op.args)):
                        if isinstance(op.args[i], Variable):
                            set_use_point(node, op.args[i], "op", node, op, i)
                    set_creation_point(node, op.result, "op", node, op)
                if isinstance(node.exitswitch, Variable):
                    set_use_point(node, node.exitswitch, "exitswitch", node)

        for node in graph.iterlinks():
            if isinstance(node.last_exception, Variable):
                set_creation_point(node.prevblock, node.last_exception,
                                   "last_exception")
            if isinstance(node.last_exc_value, Variable):
                set_creation_point(node.prevblock, node.last_exc_value,
                                   "last_exc_value")
            duplicate_info = set()
            for i, arg in enumerate(node.args):
                union(node.prevblock, arg,
                      node.target, node.target.inputargs[i])
                if isinstance(arg, Variable):

                    _, _, info = lifetimes.find((node.prevblock, arg))
                    if info in duplicate_info and len(info.creationpoints) > 1:
                        # same variable (up to renaming via same_as or
                        # cast_pointer) present several times in link.args, and
                        # the variable is created in two different places:
                        # consider it as a 'use' of the variable, which will
                        # disable malloc optimization (aliasing problems)
                        set_use_point(node.prevblock, arg, "dup", node, i)
                    else:
                        duplicate_info.add(info)

        return lifetimes.infos()

    def _try_inline_malloc(self, info):
        """Try to inline the mallocs creation and manipulation of the Variables
        in the given LifeTime."""
        # the values must be only ever created by a "malloc"
        lltypes = set()
        for cp in info.creationpoints:
            if cp[0] != "op":
                return False
            op = cp[2]
            if not self.check_malloc(op):
                return False
            if not self.inline_type(op.args[0].value):
                return False

            lltypes.add(op.result.concretetype)

        # there must be a single largest malloced GcStruct;
        # all variables can point to it or to initial substructures
        if len(lltypes) != 1:
            return False
        concretetype, = lltypes
        STRUCT = self.get_STRUCT(concretetype)

        # must be only ever accessed via getfield/setfield/getsubstruct/
        # direct_fieldptr, or touched by ptr_iszero/ptr_nonzero.
        # Note that same_as and cast_pointer are not recorded in usepoints.
        self.accessed_substructs = set()

        for usepoint in info.usepoints:
            if usepoint[0] != "op":
                return False
            kind, node, op, index = usepoint
            if index != 0:
                return False
            if op.opname in self.CHECK_ARRAY_INDEX:
                if not isinstance(op.args[1], Constant):
                    return False    # non-constant array index
            if op.opname in self.FIELD_ACCESS:
                pass   # ok
            elif op.opname in self.SUBSTRUCT_ACCESS:
                self.do_substruct_access(op)
            else:
                return False

        # must not remove mallocs of structures that have a RTTI with a destructor
        if self.RTTI_dtor(STRUCT):
            return False

        # must not remove unions inlined as the only field of a GcStruct
        if self.union_wrapper(STRUCT):
            return False

        # success: replace each variable with a family of variables (one per field)

        # 'flatnames' is a list of (STRUCTTYPE, fieldname_in_that_struct) that
        # describes the list of variables that should replace the single
        # malloc'ed pointer variable that we are about to remove.  For primitive
        # or pointer fields, the new corresponding variable just stores the
        # actual value.  For substructures, if pointers to them are "equivalent"
        # to pointers to the parent structure (see equivalent_substruct()) then
        # they are just merged, and flatnames will also list the fields within
        # that substructure.  Other substructures are replaced by a single new
        # variable which is a pointer to a GcStruct-wrapper; each is malloc'ed
        # individually, in an exploded way.  (The next malloc removal pass will
        # get rid of them again, in the typical case.)
        self.flatnames = []
        self.flatconstants = {}
        self.needsubmallocs = []
        self.newvarstype = {}       # map {item-of-flatnames: concretetype}
        self.direct_fieldptr_key = {}
        self.flatten(STRUCT)
        assert len(self.direct_fieldptr_key) <= 1

        variables_by_block = {}
        for block, var in info.variables:
            vars = variables_by_block.setdefault(block, set())
            vars.add(var)

        count = [0]

        for block, vars in variables_by_block.items():

            # look for variables arriving from outside the block
            newvarsmap = None
            newinputargs = []
            inputvars = set()
            for var in block.inputargs:
                if var in vars:
                    inputvars.add(var)
                    if newvarsmap is None:
                        newvarsmap = {}
                        for key in self.flatnames:
                            newvar = Variable()
                            newvar.concretetype = self.newvarstype[key]
                            newvarsmap[key] = newvar
                            newinputargs.append(newvar)
                else:
                    newinputargs.append(var)
            block.inputargs[:] = newinputargs
            if inputvars:
                self.flowin(block, count, inputvars, newvarsmap)

            # look for variables created inside the block by a malloc
            vars_created_here = []
            for op in block.operations:
                if self.check_malloc(op) and op.result in vars:
                    vars_created_here.append(op.result)
            for var in vars_created_here:
                self.flowin(block, count, {var}, newvarsmap=None)

        return count[0]

    def remove_mallocs_once(self, graph):
        """Perform one iteration of malloc removal."""
        simplify.remove_identical_vars(graph)
        self.graph = graph # to make it possible to find in the debugger
        lifetimes = self.compute_lifetimes(graph)
        progress = 0
        for info in lifetimes:
            progress += self._try_inline_malloc(info)
        return progress

    def remove_simple_mallocs(self, graph):
        """Iteratively remove (inline) the mallocs that can be simplified away."""
        tot = 0
        while True:
            count = self.remove_mallocs_once(graph)
            if count:
                if self.verbose:
                    log.malloc('%d simple mallocs removed in %r' % (count, graph.name))
                else:
                    log.dot()
                tot += count
            else:
                break
        return tot


class LLTypeMallocRemover(BaseMallocRemover):

    IDENTITY_OPS = ("same_as", "cast_pointer")
    SUBSTRUCT_OPS = ("getsubstruct", "direct_fieldptr")
    MALLOC_OP = "malloc"
    FIELD_ACCESS =     dict.fromkeys(["getfield",
                                      "setfield",
                                      "ptr_iszero",
                                      "ptr_nonzero",
                                      "getarrayitem",
                                      "setarrayitem"])
    SUBSTRUCT_ACCESS = dict.fromkeys(["getsubstruct",
                                      "direct_fieldptr",
                                      "getarraysubstruct"])
    CHECK_ARRAY_INDEX = dict.fromkeys(["getarrayitem",
                                       "setarrayitem",
                                       "getarraysubstruct"])

    def check_malloc(self, op):
        if op.opname == 'malloc':
            flags = op.args[1].value
            if flags == {'flavor': 'gc'}:
                return True
        return False

    def recreate_malloc(self, c, v):
        return SpaceOperation(self.MALLOC_OP, [c,
                                               Constant({'flavor': 'gc'},
                                                        lltype.Void)],
                              v)

    def get_STRUCT(self, TYPE):
        STRUCT = TYPE.TO
        assert isinstance(STRUCT, lltype.GcStruct)
        return STRUCT

    def visit_substruct_op(self, node, union, op):
        S = op.args[0].concretetype.TO
        if self.equivalent_substruct(S, op.args[1].value):
            # assumed to be similar to a cast_pointer
            union(node, op.args[0], node, op.result)
            return True
        return False

    def do_substruct_access(self, op):
        S = op.args[0].concretetype.TO
        name = op.args[1].value
        if not isinstance(name, str):      # access by index
            name = 'item%d' % (name,)
        self.accessed_substructs.add((S, name))

    def inline_type(self, TYPE):
        return True

    def equivalent_substruct(self, S, fieldname):
        # we consider a pointer to a GcStruct S as equivalent to a
        # pointer to a substructure 'S.fieldname' if it's the first
        # inlined sub-GcStruct.  As an extension we also allow a pointer
        # to a GcStruct containing just one field to be equivalent to
        # a pointer to that field only (although a mere cast_pointer
        # would not allow casting).  This is needed to malloc-remove
        # the 'wrapper' GcStructs introduced by previous passes of
        # malloc removal.
        if not isinstance(S, lltype.GcStruct):
            return False
        if fieldname != S._names[0]:
            return False
        FIELDTYPE = S._flds[fieldname]
        if isinstance(FIELDTYPE, lltype.GcStruct):
            if FIELDTYPE._hints.get('union'):
                return False
            return True
        if len(S._names) == 1:
            return True
        return False

    def union_wrapper(self, S):
        # check if 'S' is a GcStruct containing a single inlined *union* Struct
        if not isinstance(S, lltype.GcStruct):
            return False
        assert not S._hints.get('union')    # not supported: "GcUnion"
        return (len(S._names) == 1 and
                isinstance(S._flds[S._names[0]], lltype.Struct) and
                S._flds[S._names[0]]._hints.get('union'))

    def RTTI_dtor(self, STRUCT):
        try:
            destr_ptr = lltype.getRuntimeTypeInfo(STRUCT)._obj.destructor_funcptr
            if destr_ptr:
                return True
        except (ValueError, AttributeError):
            pass
        return False

    def flatten(self, S):
        start = 0
        if S._names and self.equivalent_substruct(S, S._names[0]):
            SUBTYPE = S._flds[S._names[0]]
            if isinstance(SUBTYPE, lltype.Struct):
                self.flatten(SUBTYPE)
                start = 1
            else:
                ARRAY = lltype.FixedSizeArray(SUBTYPE, 1)
                self.direct_fieldptr_key[ARRAY, 'item0'] = S, S._names[0]
        for name in S._names[start:]:
            key = S, name
            FIELDTYPE = S._flds[name]
            if key in self.accessed_substructs:
                self.needsubmallocs.append(key)
                self.flatnames.append(key)
                self.newvarstype[key] = lltype.Ptr(lltype.GcStruct('wrapper',
                                                          ('data', FIELDTYPE)))
            elif not isinstance(FIELDTYPE, lltype.ContainerType):
                example = FIELDTYPE._defl()
                constant = Constant(example)
                constant.concretetype = FIELDTYPE
                self.flatconstants[key] = constant
                self.flatnames.append(key)
                self.newvarstype[key] = FIELDTYPE
            #else:
            #   the inlined substructure is never accessed, drop it

    def key_for_field_access(self, S, fldname):
        if isinstance(S, lltype.FixedSizeArray):
            if not isinstance(fldname, str):      # access by index
                fldname = 'item%d' % (fldname,)
            try:
                return self.direct_fieldptr_key[S, fldname]
            except KeyError:
                pass
        return S, fldname

    def handle_unreachable(self, v_result):
        from rpython.rtyper.lltypesystem.rstr import string_repr
        msg = "unreachable operation (from malloc.py)"
        ll_msg = string_repr.convert_const(msg)
        c_msg = Constant(ll_msg, lltype.typeOf(ll_msg))
        return SpaceOperation("debug_fatalerror", [c_msg], v_result)

    def flowin_op(self, op, vars, newvarsmap):
        if op.opname in ("getfield", "getarrayitem"):
            S = op.args[0].concretetype.TO
            fldname = op.args[1].value
            key = self.key_for_field_access(S, fldname)
            if key not in newvarsmap:
                newop = self.handle_unreachable(op.result)
            elif key in self.accessed_substructs:
                c_name = Constant('data', lltype.Void)
                newop = SpaceOperation("getfield",
                                       [newvarsmap[key], c_name],
                                       op.result)
            else:
                newop = SpaceOperation("same_as",
                                       [newvarsmap[key]],
                                       op.result)
            self.newops.append(newop)
        elif op.opname in ("setfield", "setarrayitem"):
            S = op.args[0].concretetype.TO
            fldname = op.args[1].value
            key = self.key_for_field_access(S, fldname)
            if key not in newvarsmap:
                newop = self.handle_unreachable(op.result)
                self.newops.append(newop)
            elif key in self.accessed_substructs:
                c_name = Constant('data', lltype.Void)
                newop = SpaceOperation("setfield",
                                 [newvarsmap[key], c_name, op.args[2]],
                                           op.result)
                self.newops.append(newop)
            else:
                newvarsmap[key] = op.args[2]
        elif op.opname in ("same_as", "cast_pointer"):
            vars.add(op.result)
            # Consider the two pointers (input and result) as
            # equivalent.  We can, and indeed must, use the same
            # flattened list of variables for both, as a "setfield"
            # via one pointer must be reflected in the other.
        elif op.opname in ("getsubstruct", "getarraysubstruct",
                           "direct_fieldptr"):
            S = op.args[0].concretetype.TO
            fldname = op.args[1].value
            if op.opname == "getarraysubstruct":
                fldname = 'item%d' % fldname
            equiv = self.equivalent_substruct(S, fldname)
            if equiv:
                # exactly like a cast_pointer
                assert op.result not in vars
                vars.add(op.result)
            else:
                # do it with a getsubstruct on the independently
                # malloc'ed GcStruct
                if op.opname == "direct_fieldptr":
                    opname = "direct_fieldptr"
                else:
                    opname = "getsubstruct"
                try:
                    v = newvarsmap[S, fldname]
                except KeyError:
                    newop = self.handle_unreachable(op.result)
                else:
                    cname = Constant('data', lltype.Void)
                    newop = SpaceOperation(opname,
                                           [v, cname],
                                           op.result)
                self.newops.append(newop)
        elif op.opname in ("ptr_iszero", "ptr_nonzero"):
            # we know the pointer is not NULL if it comes from
            # a successful malloc
            c = Constant(op.opname == "ptr_nonzero", lltype.Bool)
            newop = SpaceOperation('same_as', [c], op.result)
            self.newops.append(newop)
        else:
            raise AssertionError(op.opname)


def remove_simple_mallocs(graph, verbose=True):
    remover = LLTypeMallocRemover(verbose)
    return remover.remove_simple_mallocs(graph)


def remove_mallocs(translator, graphs=None):
    if graphs is None:
        graphs = translator.graphs
    tot = 0
    for graph in graphs:
        count = remove_simple_mallocs(graph, verbose=translator.config.translation.verbose)
        if count:
            # remove typical leftovers from malloc removal
            removenoops.remove_same_as(graph)
            simplify.eliminate_empty_blocks(graph)
            simplify.transform_dead_op_vars(graph, translator)
            tot += count
    log.malloc("removed %d simple mallocs in total" % tot)
    return tot