Skip to content

Commit 0099866

Browse files
author
skip.montanaro
committed
When splitting, avoid making a copy of the string if the split doesn't find
anything (issue 1538). git-svn-id: http://svn.python.org/projects/python/trunk@59420 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent d75e15d commit 0099866

2 files changed

Lines changed: 45 additions & 11 deletions

File tree

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ What's New in Python 2.6 alpha 1?
1212
Core and builtins
1313
-----------------
1414

15+
- Issue #1538: Avoid copying string in split/rsplit if the split
16+
char is not found.
17+
1518
- Issue #1553: An erroneous __length_hint__ can make list() raise a
1619
SystemError.
1720

Objects/stringobject.c

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1403,8 +1403,9 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
14031403
#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
14041404

14051405
Py_LOCAL_INLINE(PyObject *)
1406-
split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1406+
split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
14071407
{
1408+
const char *s = PyString_AS_STRING(self);
14081409
Py_ssize_t i, j, count=0;
14091410
PyObject *str;
14101411
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
@@ -1419,6 +1420,13 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
14191420
if (i==len) break;
14201421
j = i; i++;
14211422
SKIP_NONSPACE(s, i, len);
1423+
if (j == 0 && i == len && PyString_CheckExact(self)) {
1424+
/* No whitespace in self, so just use it as list[0] */
1425+
Py_INCREF(self);
1426+
PyList_SET_ITEM(list, 0, (PyObject *)self);
1427+
count++;
1428+
break;
1429+
}
14221430
SPLIT_ADD(s, j, i);
14231431
}
14241432

@@ -1437,8 +1445,9 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
14371445
}
14381446

14391447
Py_LOCAL_INLINE(PyObject *)
1440-
split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1448+
split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
14411449
{
1450+
const char *s = PyString_AS_STRING(self);
14421451
register Py_ssize_t i, j, count=0;
14431452
PyObject *str;
14441453
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
@@ -1457,7 +1466,13 @@ split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
14571466
}
14581467
}
14591468
}
1460-
if (i <= len) {
1469+
if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1470+
/* ch not in self, so just use self as list[0] */
1471+
Py_INCREF(self);
1472+
PyList_SET_ITEM(list, 0, (PyObject *)self);
1473+
count++;
1474+
}
1475+
else if (i <= len) {
14611476
SPLIT_ADD(s, i, len);
14621477
}
14631478
FIX_PREALLOC_SIZE(list);
@@ -1492,7 +1507,7 @@ string_split(PyStringObject *self, PyObject *args)
14921507
if (maxsplit < 0)
14931508
maxsplit = PY_SSIZE_T_MAX;
14941509
if (subobj == Py_None)
1495-
return split_whitespace(s, len, maxsplit);
1510+
return split_whitespace(self, len, maxsplit);
14961511
if (PyString_Check(subobj)) {
14971512
sub = PyString_AS_STRING(subobj);
14981513
n = PyString_GET_SIZE(subobj);
@@ -1509,7 +1524,7 @@ string_split(PyStringObject *self, PyObject *args)
15091524
return NULL;
15101525
}
15111526
else if (n == 1)
1512-
return split_char(s, len, sub[0], maxsplit);
1527+
return split_char(self, len, sub[0], maxsplit);
15131528

15141529
list = PyList_New(PREALLOC_SIZE(maxsplit));
15151530
if (list == NULL)
@@ -1609,8 +1624,9 @@ string_rpartition(PyStringObject *self, PyObject *sep_obj)
16091624
}
16101625

16111626
Py_LOCAL_INLINE(PyObject *)
1612-
rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1627+
rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
16131628
{
1629+
const char *s = PyString_AS_STRING(self);
16141630
Py_ssize_t i, j, count=0;
16151631
PyObject *str;
16161632
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
@@ -1625,6 +1641,13 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
16251641
if (i<0) break;
16261642
j = i; i--;
16271643
RSKIP_NONSPACE(s, i);
1644+
if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1645+
/* No whitespace in self, so just use it as list[0] */
1646+
Py_INCREF(self);
1647+
PyList_SET_ITEM(list, 0, (PyObject *)self);
1648+
count++;
1649+
break;
1650+
}
16281651
SPLIT_ADD(s, i + 1, j + 1);
16291652
}
16301653
if (i >= 0) {
@@ -1645,8 +1668,9 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
16451668
}
16461669

16471670
Py_LOCAL_INLINE(PyObject *)
1648-
rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1671+
rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
16491672
{
1673+
const char *s = PyString_AS_STRING(self);
16501674
register Py_ssize_t i, j, count=0;
16511675
PyObject *str;
16521676
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
@@ -1664,7 +1688,13 @@ rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
16641688
}
16651689
}
16661690
}
1667-
if (j >= -1) {
1691+
if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1692+
/* ch not in self, so just use self as list[0] */
1693+
Py_INCREF(self);
1694+
PyList_SET_ITEM(list, 0, (PyObject *)self);
1695+
count++;
1696+
}
1697+
else if (j >= -1) {
16681698
SPLIT_ADD(s, 0, j + 1);
16691699
}
16701700
FIX_PREALLOC_SIZE(list);
@@ -1691,15 +1721,15 @@ string_rsplit(PyStringObject *self, PyObject *args)
16911721
{
16921722
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
16931723
Py_ssize_t maxsplit = -1, count=0;
1694-
const char *s = PyString_AS_STRING(self), *sub;
1724+
const char *s, *sub;
16951725
PyObject *list, *str, *subobj = Py_None;
16961726

16971727
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
16981728
return NULL;
16991729
if (maxsplit < 0)
17001730
maxsplit = PY_SSIZE_T_MAX;
17011731
if (subobj == Py_None)
1702-
return rsplit_whitespace(s, len, maxsplit);
1732+
return rsplit_whitespace(self, len, maxsplit);
17031733
if (PyString_Check(subobj)) {
17041734
sub = PyString_AS_STRING(subobj);
17051735
n = PyString_GET_SIZE(subobj);
@@ -1716,7 +1746,7 @@ string_rsplit(PyStringObject *self, PyObject *args)
17161746
return NULL;
17171747
}
17181748
else if (n == 1)
1719-
return rsplit_char(s, len, sub[0], maxsplit);
1749+
return rsplit_char(self, len, sub[0], maxsplit);
17201750

17211751
list = PyList_New(PREALLOC_SIZE(maxsplit));
17221752
if (list == NULL)
@@ -1725,6 +1755,7 @@ string_rsplit(PyStringObject *self, PyObject *args)
17251755
j = len;
17261756
i = j - n;
17271757

1758+
s = PyString_AS_STRING(self);
17281759
while ( (i >= 0) && (maxsplit-- > 0) ) {
17291760
for (; i>=0; i--) {
17301761
if (Py_STRING_MATCH(s, i, sub, n)) {

0 commit comments

Comments
 (0)