9 files changed, 159 insertions, 64 deletions
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
index 410f30635c..d33d5ebcde 100644
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -1019,58 +1019,55 @@ class TestUnicode(BaseApiTest):
                      0x2009, 0x200a,
                      #0x200b is in Other_Default_Ignorable_Code_Point in 4.1.0
                      0x2028, 0x2029, 0x202f, 0x205f, 0x3000]:
-            assert Py_UNICODE_ISSPACE(space, unichr(char))
-        assert not Py_UNICODE_ISSPACE(space, u'a')
-
-        assert Py_UNICODE_ISALPHA(space, u'a')
-        assert not Py_UNICODE_ISALPHA(space, u'0')
-        assert Py_UNICODE_ISALNUM(space, u'a')
-        assert Py_UNICODE_ISALNUM(space, u'0')
-        assert not Py_UNICODE_ISALNUM(space, u'+')
-
-        assert Py_UNICODE_ISDECIMAL(space, u'\u0660')
-        assert not Py_UNICODE_ISDECIMAL(space, u'a')
-        assert Py_UNICODE_ISDIGIT(space, u'9')
-        assert not Py_UNICODE_ISDIGIT(space, u'@')
-        assert Py_UNICODE_ISNUMERIC(space, u'9')
-        assert not Py_UNICODE_ISNUMERIC(space, u'@')
+            assert Py_UNICODE_ISSPACE(space, char)
+        assert not Py_UNICODE_ISSPACE(space, ord('a'))
+
+        assert Py_UNICODE_ISALPHA(space, ord('a'))
+        assert not Py_UNICODE_ISALPHA(space, ord('0'))
+        assert Py_UNICODE_ISALNUM(space, ord('a'))
+        assert Py_UNICODE_ISALNUM(space, ord('0'))
+        assert not Py_UNICODE_ISALNUM(space, ord('+'))
+
+        assert Py_UNICODE_ISDECIMAL(space, ord(u'\u0660'))
+        assert not Py_UNICODE_ISDECIMAL(space, ord('a'))
+        assert Py_UNICODE_ISDIGIT(space, ord('9'))
+        assert not Py_UNICODE_ISDIGIT(space, ord('@'))
+        assert Py_UNICODE_ISNUMERIC(space, ord('9'))
+        assert not Py_UNICODE_ISNUMERIC(space, ord('@'))
 
         for char in [0x0a, 0x0d, 0x1c, 0x1d, 0x1e, 0x85, 0x2028, 0x2029]:
-            assert Py_UNICODE_ISLINEBREAK(space, unichr(char))
+            assert Py_UNICODE_ISLINEBREAK(space, char)
 
-        assert Py_UNICODE_ISLOWER(space, u'\xdf') # sharp s
-        assert Py_UNICODE_ISUPPER(space, u'\xde') # capital thorn
-        assert Py_UNICODE_ISLOWER(space, u'a')
-        assert not Py_UNICODE_ISUPPER(space, u'a')
-        assert not Py_UNICODE_ISTITLE(space, u'\xce')
+        assert Py_UNICODE_ISLOWER(space, ord('\xdf')) # sharp s
+        assert Py_UNICODE_ISUPPER(space, ord('\xde')) # capital thorn
+        assert Py_UNICODE_ISLOWER(space, ord('a'))
+        assert not Py_UNICODE_ISUPPER(space, ord('a'))
+        assert not Py_UNICODE_ISTITLE(space, ord('\xce'))
         assert Py_UNICODE_ISTITLE(space,
-            u'\N{LATIN CAPITAL LETTER L WITH SMALL LETTER J}')
+            ord(u'\N{LATIN CAPITAL LETTER L WITH SMALL LETTER J}'))
 
     def test_TOLOWER(self, space):
-        assert Py_UNICODE_TOLOWER(space, u'�') == u'�'
-        assert Py_UNICODE_TOLOWER(space, u'�') == u'�'
+        assert Py_UNICODE_TOLOWER(space, ord(u'�') == ord(u'�'))
+        assert Py_UNICODE_TOLOWER(space, ord(u'�') == ord(u'�'))
 
     def test_TOUPPER(self, space):
-        assert Py_UNICODE_TOUPPER(space, u'�') == u'�'
-        assert Py_UNICODE_TOUPPER(space, u'�') == u'�'
+        assert Py_UNICODE_TOUPPER(space, ord(u'�') == ord(u'�'))
+        assert Py_UNICODE_TOUPPER(space, ord(u'�') == ord(u'�'))
 
     def test_TOTITLE(self, space):
-        assert Py_UNICODE_TOTITLE(space, u'/') == u'/'
-        assert Py_UNICODE_TOTITLE(space, u'�') == u'�'
-        assert Py_UNICODE_TOTITLE(space, u'�') == u'�'
+        assert Py_UNICODE_TOTITLE(space, ord('/') == ord('/'))
 
     def test_TODECIMAL(self, space):
-        assert Py_UNICODE_TODECIMAL(space, u'6') == 6
-        assert Py_UNICODE_TODECIMAL(space, u'A') == -1
+        assert Py_UNICODE_TODECIMAL(space, ord('6')) == 6
+        assert Py_UNICODE_TODECIMAL(space, ord('A')) == -1
 
     def test_TODIGIT(self, space):
-        assert Py_UNICODE_TODIGIT(space, u'6') == 6
-        assert Py_UNICODE_TODIGIT(space, u'A') == -1
+        assert Py_UNICODE_TODIGIT(space, ord('6')) == 6
+        assert Py_UNICODE_TODIGIT(space, ord('A')) == -1
 
     def test_TONUMERIC(self, space):
-        assert Py_UNICODE_TONUMERIC(space, u'6') == 6.0
-        assert Py_UNICODE_TONUMERIC(space, u'A') == -1.0
-        assert Py_UNICODE_TONUMERIC(space, u'\N{VULGAR FRACTION ONE HALF}') == .5
+        assert Py_UNICODE_TONUMERIC(space, ord('6')) == 6.0
+        assert Py_UNICODE_TONUMERIC(space, ord('A')) == -1.0
 
     def test_transform_decimal(self, space):
         def transform_decimal(s):
diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py
index cafafd4d9e..8c474bbd12 100644
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -225,14 +225,19 @@ class StringMethods(object):
                 ovfcheck(len(splitted) * tabsize)
         except OverflowError:
             raise oefmt(space.w_OverflowError, "new string is too long")
-        expanded = oldtoken = splitted.pop(0)
-
-        for token in splitted:
-            expanded += self._multi_chr(self._chr(' ')) * self._tabindent(oldtoken,
-                                                         tabsize) + token
+        newlen = self._len() - len(splitted) + 1
+        builder = self._builder(len(value))
+        oldtoken = splitted[0]
+        builder.append(oldtoken)
+
+        for index in range(1, len(splitted)):
+            token = splitted[index]
+            dist = self._tabindent(oldtoken, tabsize)
+            builder.append_multiple_char(' ', dist)
+            builder.append(token)
+            newlen += dist
             oldtoken = token
-
-        return self._new(expanded)
+        return self._new(builder.build())
 
     def _tabindent(self, token, tabsize):
         """calculates distance behind the token to the next tabstop"""
diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py
index 3d2d1a6dc6..a39432263e 100644
--- a/pypy/objspace/std/test/test_bytesobject.py
+++ b/pypy/objspace/std/test/test_bytesobject.py
@@ -542,8 +542,6 @@ class AppTestBytesObject:
 
     def test_expandtabs_overflows_gracefully(self):
         import sys
-        if sys.maxsize > (1 << 32):
-            skip("Wrong platform")
         raises((MemoryError, OverflowError), b't\tt\t'.expandtabs, sys.maxsize)
 
     def test_expandtabs_0(self):
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
index b33daa03ba..1fe0a792c1 100644
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -710,14 +710,16 @@ class AppTestUnicodeString:
 
     def test_expandtabs_overflows_gracefully(self):
         import sys
-        if sys.maxsize > (1 << 32):
-            skip("Wrong platform")
         raises((OverflowError, MemoryError), 't\tt\t'.expandtabs, sys.maxsize)
 
     def test_expandtabs_0(self):
         assert u'x\ty'.expandtabs(0) == u'xy'
         assert u'x\ty'.expandtabs(-42) == u'xy'
 
+    def test_expandtabs_bug(self):
+        assert u"a\u266f\ttest".expandtabs() == u'a\u266f      test'
+        assert u"a\u266f\ttest".expandtabs(0) == u'a\u266ftest'
+
     def test_translate(self):
         import sys
         assert 'bbbc' == 'abababc'.translate({ord('a'):None})
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
index b11fa1863a..9a641b952f 100644
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -605,24 +605,57 @@ class W_UnicodeObject(W_Root):
         value = self._utf8
         if not value:
             return self._empty()
+        if tabsize == 0:
+            res, replacements = replace_count(value, '\t', '')
+            if not replacements and type(self) is W_UnicodeObject:
+                return self
+            newlength = self._length - replacements
+            assert res is not None
+            return W_UnicodeObject(res, newlength)
 
         splitted = value.split('\t')
 
         try:
-            if tabsize > 0:
-                ovfcheck(len(splitted) * tabsize)
+            ovfcheck(len(splitted) * tabsize)
         except OverflowError:
             raise oefmt(space.w_OverflowError, "new string is too long")
-        expanded = oldtoken = splitted.pop(0)
-        newlen = self._len() - len(splitted)
+        newlen = self._len() - len(splitted) + 1
+        builder = StringBuilder(len(value))
+        oldtoken = splitted[0]
+        builder.append(oldtoken)
 
-        for token in splitted:
+        for index in range(1, len(splitted)):
+            token = splitted[index]
             dist = self._tabindent(oldtoken, tabsize)
-            expanded += ' ' * dist + token
+            builder.append_multiple_char(' ', dist)
+            builder.append(token)
             newlen += dist
             oldtoken = token
 
-        return W_UnicodeObject(expanded, newlen)
+        return W_UnicodeObject(builder.build(), newlen)
+
+    def _tabindent(self, token, tabsize):
+        if tabsize <= 0:
+            return 0
+        distance = tabsize
+        if token:
+            distance = 0
+            offset = len(token)
+
+            while 1:
+                if token[offset-1] == "\n" or token[offset-1] == "\r":
+                    break
+                distance += 1
+                offset = rutf8.prev_codepoint_pos(token, offset)
+                if offset == 0:
+                    break
+
+            # the same like distance = len(token) - (offset + 1)
+            distance = (tabsize - distance) % tabsize
+            if distance == 0:
+                distance = tabsize
+
+        return distance
 
     def _join_utf8_len_w(self, space, w_element, i):
         try:
diff --git a/pypy/objspace/test/test_descroperation.py b/pypy/objspace/test/test_descroperation.py
index ba4d7d3f6e..67f885ab0d 100644
--- a/pypy/objspace/test/test_descroperation.py
+++ b/pypy/objspace/test/test_descroperation.py
@@ -1,4 +1,5 @@
 # -*- encoding: utf-8 -*-
+from pytest import raises
 
 class Test_DescrOperation:
 
diff --git a/rpython/translator/backendopt/all.py b/rpython/translator/backendopt/all.py
index af0a36a171..9b96c18c67 100644
--- a/rpython/translator/backendopt/all.py
+++ b/rpython/translator/backendopt/all.py
@@ -90,6 +90,7 @@ def backend_optimizations(translator, graphs=None, secondary=False,
         constfold(config, graphs)
 
     if config.storesink:
+        remove_obvious_noops()
         for graph in graphs:
             storesink_graph(graph)
 
diff --git a/rpython/translator/backendopt/storesink.py b/rpython/translator/backendopt/storesink.py
index ab1c04565a..be59264741 100644
--- a/rpython/translator/backendopt/storesink.py
+++ b/rpython/translator/backendopt/storesink.py
@@ -1,6 +1,7 @@
 
 from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.flowspace.model import mkentrymap, Variable
+from rpython.rtyper.lltypesystem import lltype
+from rpython.flowspace.model import mkentrymap, Variable, Constant
 from rpython.translator.backendopt import removenoops
 from rpython.translator import simplify
 
@@ -75,25 +76,57 @@ def _storesink_block(block, cache, inputlink):
         for k in cache.keys():
             if k[0].concretetype == concretetype and k[1] == fieldname:
                 del cache[k]
+    replacements = {}
+    def replace(op, res):
+        op.opname = 'same_as'
+        op.args = [res]
+        replacements[op.result] = res
+
+    def get_rep(arg):
+        return replacements.get(arg, arg)
 
     added_some_same_as = False
     for op in block.operations:
         if op.opname == 'getfield':
-            tup = (op.args[0], op.args[1].value)
-            res = cache.get(tup, None)
-            if res is not None:
-                op.opname = 'same_as'
-                op.args = [res]
-                added_some_same_as = True
+            arg0 = get_rep(op.args[0])
+            field = op.args[1].value
+            if (
+                    isinstance(arg0, Constant) and
+                    arg0.concretetype.TO._immutable_field(field) and
+                    not isinstance(arg0.value._obj, int) # tagged int
+            ):
+                # reading an immutable field from a constant
+                llres = getattr(arg0.value, field)
+                concretetype = getattr(arg0.concretetype.TO, field)
+                res = Constant(llres, concretetype)
+                replace(op, res)
+            else:
+                tup = (arg0, op.args[1].value)
+                res = cache.get(tup, None)
+                if res is not None:
+                    replace(op, res)
+                else:
+                    cache[tup] = op.result
+        elif op.opname == 'cast_pointer':
+            arg0 = get_rep(op.args[0])
+            if isinstance(arg0, Constant):
+                llres = lltype.cast_pointer(op.result.concretetype, arg0.value)
+                res = Constant(llres, op.result.concretetype)
+                replace(op, res)
             else:
-                cache[tup] = op.result
+                tup = (arg0, op.result.concretetype)
+                res = cache.get(tup, None)
+                if res is not None:
+                    replace(op, res)
+                else:
+                    cache[tup] = op.result
         elif op.opname in ('setarrayitem', 'setinteriorfield', "malloc", "malloc_varsize"):
             pass
         elif op.opname == 'setfield':
-            target = op.args[0]
+            target = get_rep(op.args[0])
             field = op.args[1].value
             clear_cache_for(cache, target.concretetype, field)
             cache[target, field] = op.args[2]
         elif has_side_effects(op):
             cache.clear()
-    return added_some_same_as
+    return bool(replacements)
diff --git a/rpython/translator/backendopt/test/test_all.py b/rpython/translator/backendopt/test/test_all.py
index 0081fce223..346be88c90 100644
--- a/rpython/translator/backendopt/test/test_all.py
+++ b/rpython/translator/backendopt/test/test_all.py
@@ -305,3 +305,28 @@ class TestLLType(object):
         t = self.translateopt(f, [], replace_we_are_jitted=True)
         graph = graphof(t, f)
         assert graph.startblock.exits[0].args[0].value == 2
+
+    def test_getfield_vtable(self):
+        class Base(object):
+            pass
+        class A(Base):
+            def f(self):
+                return 1
+        class B(Base):
+            def f(self):
+                return 2
+        def g(i):
+            if i > 0:
+                return A()
+            return B()
+        def f(i):
+            if i > 0:
+                a = g(5)
+                return a.f()
+            else:
+                a = g(-5)
+                return a.f()
+        t = self.translateopt(f, [int])
+        graph = graphof(t, f)
+        s = summary(graph)
+        assert 'getfield' not in s