aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2016-11-08 00:29:12 +0700
committerSergey M․ <dstftw@gmail.com>2016-11-08 00:29:12 +0700
commitebc7ab1e231483f189290608425a23590cae6af9 (patch)
tree8bb913c300fb6b28373d9799579cc4ca483f00cb /youtube_dl
parent97726317ac8e905dc72e75c7c2a823280c51af00 (diff)
downloadyoutube-dl-ebc7ab1e231483f189290608425a23590cae6af9.zip
youtube-dl-ebc7ab1e231483f189290608425a23590cae6af9.tar.gz
youtube-dl-ebc7ab1e231483f189290608425a23590cae6af9.tar.bz2
[espn] Fix extraction (closes #11041)
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/extractor/espn.py126
1 files changed, 94 insertions, 32 deletions
diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py
index 6d10f8e..8795e0d 100644
--- a/youtube_dl/extractor/espn.py
+++ b/youtube_dl/extractor/espn.py
@@ -1,38 +1,117 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import remove_end
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ unified_timestamp,
+)
class ESPNIE(InfoExtractor):
- _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+ _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079',
- 'md5': '60e5d097a523e767d06479335d1bdc58',
'info_dict': {
- 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
+ 'id': '10365079',
'ext': 'mp4',
'title': '30 for 30 Shorts: Judging Jewell',
- 'description': None,
+ 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
+ 'timestamp': 1390936111,
+ 'upload_date': '20140128',
},
'params': {
'skip_download': True,
},
- 'add_ie': ['OoyalaExternal'],
}, {
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
'url': 'http://espn.go.com/video/clip?id=2743663',
- 'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
'info_dict': {
- 'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
+ 'id': '2743663',
'ext': 'mp4',
'title': 'Must-See Moments: Best of the MLS season',
+ 'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
+ 'timestamp': 1449446454,
+ 'upload_date': '20151207',
},
'params': {
'skip_download': True,
},
- 'add_ie': ['OoyalaExternal'],
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
+ 'url': 'http://www.espn.com/video/clip?id=10365079',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/video/clip/_/id/17989860',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ clip = self._download_json(
+ 'http://api-app.espn.com/v1/video/clips/%s' % video_id,
+ video_id)['videos'][0]
+
+ title = clip['headline']
+
+ format_urls = set()
+ formats = []
+
+ def traverse_source(source, base_source_id=None):
+ for source_id, source in source.items():
+ if isinstance(source, compat_str):
+ extract_source(source, base_source_id)
+ elif isinstance(source, dict):
+ traverse_source(
+ source,
+ '%s-%s' % (base_source_id, source_id)
+ if base_source_id else source_id)
+
+ def extract_source(source_url, source_id=None):
+ if source_url in format_urls:
+ return
+ format_urls.add(source_url)
+ ext = determine_ext(source_url)
+ if ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ source_url, video_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ source_url, video_id, f4m_id=source_id, fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=source_id, fatal=False))
+ else:
+ formats.append({
+ 'url': source_url,
+ 'format_id': source_id,
+ })
+
+ traverse_source(clip['links']['source'])
+ self._sort_formats(formats)
+
+ description = clip.get('caption') or clip.get('description')
+ thumbnail = clip.get('thumbnail')
+ duration = int_or_none(clip.get('duration'))
+ timestamp = unified_timestamp(clip.get('originalPublishDate'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class ESPNArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+ _TESTS = [{
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
'only_matching': True,
}, {
@@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor):
}, {
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
'only_matching': True,
- }, {
- 'url': 'http://www.espn.com/video/clip?id=10365079',
- 'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
+
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor):
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
webpage, 'video id', group='id')
- cms = 'espn'
- if 'data-source="intl"' in webpage:
- cms = 'intl'
- player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
- player = self._download_webpage(
- player_url, video_id)
-
- pcode = self._search_regex(
- r'["\']pcode=([^"\']+)["\']', player, 'pcode')
-
- title = remove_end(
- self._og_search_title(webpage),
- '- ESPN Video').strip()
-
- return {
- '_type': 'url_transparent',
- 'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
- 'ie_key': 'OoyalaExternal',
- 'title': title,
- }
+ return self.url_result(
+ 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())