aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/compat.py
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2015-10-26 16:41:24 +0100
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2015-10-26 16:41:24 +0100
commitf78546272cf7c4b10c8003870728ab69bec982fc (patch)
tree35449b6fd8c220411a30403396ef705d149650a8 /youtube_dl/compat.py
parent387db16a789fea25795433538d80513c18d0f699 (diff)
downloadyoutube-dl-f78546272cf7c4b10c8003870728ab69bec982fc.zip
youtube-dl-f78546272cf7c4b10c8003870728ab69bec982fc.tar.gz
youtube-dl-f78546272cf7c4b10c8003870728ab69bec982fc.tar.bz2
[compat] compat_etree_fromstring: also decode the text attribute
Deletes parse_xml from utils, because it also does it.
Diffstat (limited to 'youtube_dl/compat.py')
-rw-r--r--youtube_dl/compat.py18
1 files changed, 16 insertions, 2 deletions
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index f39d4e9..2d43ec8 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -216,9 +216,19 @@ except ImportError: # Python 2.6
if sys.version_info[0] >= 3:
compat_etree_fromstring = xml.etree.ElementTree.fromstring
else:
- # on python 2.x the the attributes of a node aren't always unicode objects
+ # on python 2.x the attributes and text of a node aren't always unicode
+ # objects
etree = xml.etree.ElementTree
+ try:
+ _etree_iter = etree.Element.iter
+ except AttributeError: # Python <=2.6
+ def _etree_iter(root):
+ for el in root.findall('*'):
+ yield el
+ for sub in _etree_iter(el):
+ yield sub
+
# on 2.6 XML doesn't have a parser argument, function copied from CPython
# 2.7 source
def _XML(text, parser=None):
@@ -235,7 +245,11 @@ else:
return el
def compat_etree_fromstring(text):
- return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
+ doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
+ for el in _etree_iter(doc):
+ if el.text is not None and isinstance(el.text, bytes):
+ el.text = el.text.decode('utf-8')
+ return doc
try:
from urllib.parse import parse_qs as compat_parse_qs