Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions Lib/test/test_pyexpat.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,90 @@ def test_parse_again(self):
self.assertEqual(expat.ErrorString(cm.exception.code),
expat.errors.XML_ERROR_FINISHED)

@support.subTests("encoding", ("utf-8", "utf-16"))
def test_parse_reentrancy_with_encoding(self, encoding):
# See https://github.com/python/cpython/issues/146169.
parser = expat.ParserCreate(encoding=encoding)

def CharacterDataHandler(data):
return parser.Parse(data, False)
CharacterDataHandler = mock.Mock(wraps=CharacterDataHandler)
parser.CharacterDataHandler = CharacterDataHandler

payload = "<a>x".encode(encoding)
msg = re.escape("cannot call Parse() from within a handler")
with self.assertRaisesRegex(RuntimeError, msg):
for i in range(len(payload)):
parser.Parse(payload[i:i+1], i == len(payload) - 1)
CharacterDataHandler.assert_called_once_with("x")

@support.subTests("encoding", ("utf-8", "utf-16"))
def test_parse_file_reentrancy_with_encoding(self, encoding):
# See https://github.com/python/cpython/issues/146169.
parser = expat.ParserCreate(encoding=encoding)

def CharacterDataHandler(data):
return parser.ParseFile(BytesIO(data.encode(encoding)))
CharacterDataHandler = mock.Mock(wraps=CharacterDataHandler)
parser.CharacterDataHandler = CharacterDataHandler

payload = "<a>x".encode(encoding)
payload_buffer = BytesIO(payload)
msg = re.escape("cannot call ParseFile() from within a handler")
with self.assertRaisesRegex(RuntimeError, msg):
parser.ParseFile(payload_buffer)
CharacterDataHandler.assert_called_once_with("x")

@support.subTests("encoding", ("utf-8", "utf-16"))
def test_parse_reentrancy_allowed_for_external_parser(self, encoding):
parser = expat.ParserCreate(encoding=encoding)
subparser = parser.ExternalEntityParserCreate(None, encoding)
payload_extstr = '<!ENTITY ext SYSTEM "entity.file">'

def ExternalEntityRefHandler(*args):
subparser.Parse(payload_extstr, True)
# return a nonzero integer to indicate that parsing continues
return 1
ExternalEntityRefHandler = mock.Mock(wraps=ExternalEntityRefHandler)
parser.ExternalEntityRefHandler = ExternalEntityRefHandler

payload = textwrap.dedent(f"""\
<?xml version="1.0" standalone="no"?>
<!DOCTYPE quotations SYSTEM "quotations.dtd" [{payload_extstr}]>
<root>&ext;</root>
""").encode(encoding)

# Check that external parsers be called from parent's handlers.
for i in range(len(payload)):
parser.Parse(payload[i:i+1], i == len(payload) - 1)
external_ref_args = ('ext', None, 'entity.file', None)
ExternalEntityRefHandler.assert_called_once_with(*external_ref_args)

@support.subTests("encoding", ("utf-8", "utf-16"))
def test_parse_file_reentrancy_allowed_for_external_parser(self, encoding):
parser = expat.ParserCreate(encoding=encoding)
subparser = parser.ExternalEntityParserCreate(None, encoding)
payload_extstr = '<!ENTITY ext SYSTEM "entity.file">'

def ExternalEntityRefHandler(*args):
subparser.ParseFile(BytesIO(payload_extstr.encode(encoding)))
# return a nonzero integer to indicate that parsing continues
return 1
ExternalEntityRefHandler = mock.Mock(wraps=ExternalEntityRefHandler)
parser.ExternalEntityRefHandler = ExternalEntityRefHandler

payload = textwrap.dedent(f"""\
<?xml version="1.0" standalone="no"?>
<!DOCTYPE quotations SYSTEM "quotations.dtd" [{payload_extstr}]>
<root>&ext;</root>
""").encode(encoding)

# Check that external parsers be called from parent's handlers.
parser.ParseFile(BytesIO(payload))
external_ref_args = ('ext', None, 'entity.file', None)
ExternalEntityRefHandler.assert_called_once_with(*external_ref_args)


class NamespaceSeparatorTest(unittest.TestCase):
def test_legal(self):
# Tests that make sure we get errors when the namespace_separator value
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
:mod:`xml.parsers.expat`: raise :exc:`RuntimeError` when an Expat handler
calls :meth:`parser.Parse <xml.parsers.expat.xmlparser.Parse>` or
:meth:`parser.ParseFile <xml.parsers.expat.xmlparser.ParseFile>` on the parser
that called the handler. Patch by Bénédikt Tran.
18 changes: 18 additions & 0 deletions Modules/pyexpat.c
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,18 @@ VOID_HANDLER(StartDoctypeDecl,

VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))

/* check that the current function is not called from within a handler */
#define CHECK_NOT_IN_HANDLER(PARSER, FUNCNAME) \
do { \
if (PARSER->in_callback) { \
PyErr_SetString(PyExc_RuntimeError, \
"cannot call " FUNCNAME "() " \
"from within a handler"); \
return NULL; \
} \
} while (0)


/* ---------------------------------------------------------------- */
/*[clinic input]
class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
Expand Down Expand Up @@ -857,6 +869,9 @@ pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
PyObject *data, int isfinal)
/*[clinic end generated code: output=8faffe07fe1f862a input=053e0f047e55c05a]*/
{
// avoid re-entrant calls to XML_Parse()
CHECK_NOT_IN_HANDLER(self, "Parse");

const char *s;
Py_ssize_t slen;
Py_buffer view;
Expand Down Expand Up @@ -956,6 +971,9 @@ pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
PyObject *file)
/*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
{
// avoid re-entrant calls to XML_GetBuffer() or XML_ParseBuffer()
CHECK_NOT_IN_HANDLER(self, "ParseFile");

int rv = 1;
PyObject *readmethod = NULL;

Expand Down
Loading