aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/appletrailers.py
diff options
context:
space:
mode:
authorRogério Brito <rbrito@ime.usp.br>2013-12-26 16:41:27 -0200
committerRogério Brito <rbrito@ime.usp.br>2013-12-26 16:41:27 -0200
commitca4d08063804fb264eb0ae9cc57894198f66e1fb (patch)
tree417325d8523de104149ebbf967ffc14991921072 /youtube_dl/extractor/appletrailers.py
parentb238854ce845f3796daac74edab2e8a373e8ba1a (diff)
downloadyoutube-dl-ca4d08063804fb264eb0ae9cc57894198f66e1fb.zip
youtube-dl-ca4d08063804fb264eb0ae9cc57894198f66e1fb.tar.gz
youtube-dl-ca4d08063804fb264eb0ae9cc57894198f66e1fb.tar.bz2
Imported Upstream version 2013.12.23
Diffstat (limited to 'youtube_dl/extractor/appletrailers.py')
-rw-r--r--youtube_dl/extractor/appletrailers.py25
1 files changed, 12 insertions, 13 deletions
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index 4befff3..ef5644a 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -1,5 +1,4 @@
import re
-import xml.etree.ElementTree
import json
from .common import InfoExtractor
@@ -10,7 +9,7 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = {
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
u"playlist": [
@@ -65,18 +64,18 @@ class AppleTrailersIE(InfoExtractor):
uploader_id = mobj.group('company')
playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
- playlist_snippet = self._download_webpage(playlist_url, movie)
- playlist_cleaned = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', playlist_snippet)
- playlist_cleaned = re.sub(r'<img ([^<]*?)>', r'<img \1/>', playlist_cleaned)
- # The ' in the onClick attributes are not escaped, it couldn't be parsed
- # with xml.etree.ElementTree.fromstring
- # like: http://trailers.apple.com/trailers/wb/gravity/
- def _clean_json(m):
- return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
- playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned)
- playlist_html = u'<html>' + playlist_cleaned + u'</html>'
+ def fix_html(s):
+ s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s)
+ s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
+ # The ' in the onClick attributes are not escaped, it couldn't be parsed
+ # like: http://trailers.apple.com/trailers/wb/gravity/
+ def _clean_json(m):
+ return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
+ s = re.sub(self._JSON_RE, _clean_json, s)
+ s = u'<html>' + s + u'</html>'
+ return s
+ doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
- doc = xml.etree.ElementTree.fromstring(playlist_html)
playlist = []
for li in doc.findall('./div/ul/li'):
on_click = li.find('.//a').attrib['onClick']