[compat] compat_etree_fromstring: also decode the text attribute

Deletes parse_xml from utils, because it also does it.
author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2015-10-26 16:41:24 +0100
committer: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2015-10-26 16:41:24 +0100
commit: f78546272cf7c4b10c8003870728ab69bec982fc (patch)
tree: 35449b6fd8c220411a30403396ef705d149650a8 /youtube_dl/compat.py
parent: 387db16a789fea25795433538d80513c18d0f699 (diff)
download: youtube-dl-f78546272cf7c4b10c8003870728ab69bec982fc.zip
youtube-dl-f78546272cf7c4b10c8003870728ab69bec982fc.tar.gz
youtube-dl-f78546272cf7c4b10c8003870728ab69bec982fc.tar.bz2
1 files changed, 16 insertions, 2 deletions
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index f39d4e9..2d43ec8 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -216,9 +216,19 @@ except ImportError:  # Python 2.6
 if sys.version_info[0] >= 3:
     compat_etree_fromstring = xml.etree.ElementTree.fromstring
 else:
-    # on python 2.x the the attributes of a node aren't always unicode objects
+    # on python 2.x the attributes and text of a node aren't always unicode
+    # objects
     etree = xml.etree.ElementTree
 
+    try:
+        _etree_iter = etree.Element.iter
+    except AttributeError:  # Python <=2.6
+        def _etree_iter(root):
+            for el in root.findall('*'):
+                yield el
+                for sub in _etree_iter(el):
+                    yield sub
+
     # on 2.6 XML doesn't have a parser argument, function copied from CPython
     # 2.7 source
     def _XML(text, parser=None):
@@ -235,7 +245,11 @@ else:
         return el
 
     def compat_etree_fromstring(text):
-        return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
+        doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
+        for el in _etree_iter(doc):
+            if el.text is not None and isinstance(el.text, bytes):
+                el.text = el.text.decode('utf-8')
+        return doc
 
 try:
     from urllib.parse import parse_qs as compat_parse_qs
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2015-10-26 16:41:24 +0100
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2015-10-26 16:41:24 +0100
commit	f78546272cf7c4b10c8003870728ab69bec982fc (patch)
tree	35449b6fd8c220411a30403396ef705d149650a8 /youtube_dl/compat.py
parent	387db16a789fea25795433538d80513c18d0f699 (diff)
download	youtube-dl-f78546272cf7c4b10c8003870728ab69bec982fc.zip youtube-dl-f78546272cf7c4b10c8003870728ab69bec982fc.tar.gz youtube-dl-f78546272cf7c4b10c8003870728ab69bec982fc.tar.bz2