diff options
-rw-r--r-- | pypy/objspace/std/bytesobject.py | 22 | ||||
-rw-r--r-- | pypy/objspace/std/test/test_bytesobject.py | 4 | ||||
-rw-r--r-- | pypy/objspace/std/test/test_unicodeobject.py | 4 | ||||
-rw-r--r-- | pypy/objspace/std/unicodeobject.py | 2 | ||||
-rw-r--r-- | rpython/rlib/rstring.py | 8 | ||||
-rw-r--r-- | rpython/rlib/test/test_rstring.py | 11 |
6 files changed, 48 insertions, 3 deletions
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py index 6315c5d6cf..2316f6e513 100644 --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -690,15 +690,33 @@ class W_BytesObject(W_AbstractBytesObject): self_as_unicode._utf8.find(w_sub._utf8) >= 0) return self._StringMethods_descr_contains(space, w_sub) - _StringMethods_descr_replace = descr_replace @unwrap_spec(count=int) def descr_replace(self, space, w_old, w_new, count=-1): + from rpython.rlib.rstring import replace old_is_unicode = space.isinstance_w(w_old, space.w_unicode) new_is_unicode = space.isinstance_w(w_new, space.w_unicode) if old_is_unicode or new_is_unicode: self_as_uni = unicode_from_encoded_object(space, self, None, None) return self_as_uni.descr_replace(space, w_old, w_new, count) - return self._StringMethods_descr_replace(space, w_old, w_new, count) + + # almost copy of StringMethods.descr_replace :-( + input = self._value + + sub = self._op_val(space, w_old) + by = self._op_val(space, w_new) + # the following two lines are for being bug-to-bug compatible + # with CPython: see issue #2448 + if count >= 0 and len(input) == 0: + return self._empty() + try: + res = replace(input, sub, by, count) + except OverflowError: + raise oefmt(space.w_OverflowError, "replace string is too long") + # difference: reuse self if no replacement was done + if type(self) is W_BytesObject and res is input: + return self + + return self._new(res) _StringMethods_descr_join = descr_join def descr_join(self, space, w_list): diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py index cc15f97d54..2feca7ab5e 100644 --- a/pypy/objspace/std/test/test_bytesobject.py +++ b/pypy/objspace/std/test/test_bytesobject.py @@ -342,6 +342,10 @@ class AppTestBytesObject: assert 'one'.replace(buffer('o'), buffer('n'), 1) == 'nne' assert 'one'.replace(buffer('o'), buffer('n')) == 'nne' + def test_replace_no_occurrence(self): + x = b"xyz" + assert x.replace(b"a", b"b") is x + def test_strip(self): s = " a b " assert s.strip() == "a b" diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py index 51faff763d..6b1c7315da 100644 --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1303,3 +1303,7 @@ class AppTestUnicodeString: def test_newlist_utf8_non_ascii(self): 'ä'.split("\n")[0] # does not crash + + def test_replace_no_occurrence(self): + x = u"xyz" + assert x.replace(u"a", u"b") is x diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py index 1dcd415912..1b7a8d07b4 100644 --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -880,6 +880,8 @@ class W_UnicodeObject(W_Root): count, isutf8=True) except OverflowError: raise oefmt(space.w_OverflowError, "replace string is too long") + if type(self) is W_UnicodeObject and replacements == 0: + return self newlength = self._length + replacements * (w_by._length - w_sub._length) return W_UnicodeObject(res, newlength) diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py index 29e1495381..efb9b41cfd 100644 --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -101,9 +101,13 @@ def _split_by(value, by, maxsplit): start = 0 if bylen == 1: - # fast path: uses str.rfind(character) and str.count(character) + # fast path: uses str.find(character) and str.count(character) by = by[0] # annotator hack: string -> char cnt = count(value, by, 0, len(value)) + if cnt == 0: + if isinstance(value, str): + return [value] + return [value[0:len(value)]] if 0 <= maxsplit < cnt: cnt = maxsplit res = newlist_hint(cnt + 1) @@ -250,6 +254,8 @@ def replace_count(input, sub, by, maxsplit=-1, isutf8=False): # First compute the exact result size if sub: cnt = count(input, sub, 0, len(input)) + if isinstance(input, str) and cnt == 0: + return input, 0 else: assert isutf8 from rpython.rlib import rutf8 diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py index 18b5103e54..b8b0cd8482 100644 --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -27,6 +27,11 @@ def test_split(): check_split('endcase test', 'test', res=['endcase ', '']) py.test.raises(ValueError, split, 'abc', '') +def test_split_no_occurrence(): + x = "abc" + assert x.split("d")[0] is x + assert x.rsplit("d")[0] is x + def test_split_None(): assert split("") == [] assert split(' a\ta\na b') == ['a', 'a', 'a', 'b'] @@ -164,6 +169,12 @@ def test_unicode_replace_overflow(): with py.test.raises(OverflowError): replace(s, u"a", s, len(s) - 10) +def test_replace_no_occurrence(): + s = "xyz" + assert replace(s, "a", "b") is s + s = "xyz" + assert replace(s, "abc", "b") is s + def test_startswith(): def check_startswith(value, sub, *args, **kwargs): result = kwargs['res'] |