aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/nbc.py
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2016-06-10 13:31:55 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2016-06-10 13:31:55 +0800
commit5de008e8c3e4058c20956d19f69ac3347a2722e0 (patch)
tree9dbb3a05077aced27ea3afa7685a50cdb00b1b43 /youtube_dl/extractor/nbc.py
parent3e74b444e7324fdda956aa816240b938eabf9c93 (diff)
downloadyoutube-dl-5de008e8c3e4058c20956d19f69ac3347a2722e0.zip
youtube-dl-5de008e8c3e4058c20956d19f69ac3347a2722e0.tar.gz
youtube-dl-5de008e8c3e4058c20956d19f69ac3347a2722e0.tar.bz2
[nbcnews] Support embed widgets
Used in some Vulture videos
Diffstat (limited to 'youtube_dl/extractor/nbc.py')
-rw-r--r--youtube_dl/extractor/nbc.py22
1 files changed, 13 insertions, 9 deletions
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index f27c7f1..6b7da11 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -266,6 +266,11 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
'only_matching': True,
},
+ {
+ # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
+ 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
+ 'only_matching': True,
+ },
]
def _real_extract(self, url):
@@ -289,18 +294,17 @@ class NBCNewsIE(ThePlatformIE):
webpage = self._download_webpage(url, display_id)
info = None
bootstrap_json = self._search_regex(
- r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
+ [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
+ r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
webpage, 'bootstrap json', default=None)
- if bootstrap_json:
- bootstrap = self._parse_json(bootstrap_json, display_id)
+ bootstrap = self._parse_json(
+ bootstrap_json, display_id, transform_source=unescapeHTML)
+ if 'results' in bootstrap:
info = bootstrap['results'][0]['video']
+ elif 'video' in bootstrap:
+ info = bootstrap['video']
else:
- player_instance_json = self._search_regex(
- r'videoObj\s*:\s*({.+})', webpage, 'player instance', default=None)
- if not player_instance_json:
- player_instance_json = self._html_search_regex(
- r'data-video="([^"]+)"', webpage, 'video json')
- info = self._parse_json(player_instance_json, display_id)
+ info = bootstrap
video_id = info['mpxId']
title = info['title']