aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorRogério Brito <rbrito@ime.usp.br>2017-02-24 21:07:40 -0300
committerRogério Brito <rbrito@ime.usp.br>2017-02-24 21:07:40 -0300
commit4e090bc3ceacc4e3cd464d12ea97700e3acad37d (patch)
tree899b82420538d865e697fb071a03db254653e7c6 /youtube_dl/extractor
parent575f75429e2e479957d006a26940f4a5a51347ce (diff)
downloadyoutube-dl-4e090bc3ceacc4e3cd464d12ea97700e3acad37d.zip
youtube-dl-4e090bc3ceacc4e3cd464d12ea97700e3acad37d.tar.gz
youtube-dl-4e090bc3ceacc4e3cd464d12ea97700e3acad37d.tar.bz2
New upstream version 2017.02.24.1
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/adobepass.py5
-rw-r--r--youtube_dl/extractor/aenetworks.py6
-rw-r--r--youtube_dl/extractor/amcnetworks.py30
-rw-r--r--youtube_dl/extractor/archiveorg.py4
-rw-r--r--youtube_dl/extractor/bbc.py11
-rw-r--r--youtube_dl/extractor/bellmedia.py5
-rw-r--r--youtube_dl/extractor/bloomberg.py11
-rw-r--r--youtube_dl/extractor/brightcove.py4
-rw-r--r--youtube_dl/extractor/ceskatelevize.py163
-rw-r--r--youtube_dl/extractor/common.py219
-rw-r--r--youtube_dl/extractor/commonmistakes.py8
-rw-r--r--youtube_dl/extractor/corus.py72
-rw-r--r--youtube_dl/extractor/crackle.py1
-rw-r--r--youtube_dl/extractor/crunchyroll.py29
-rw-r--r--youtube_dl/extractor/dailymotion.py3
-rw-r--r--youtube_dl/extractor/disney.py60
-rw-r--r--youtube_dl/extractor/dramafever.py6
-rw-r--r--youtube_dl/extractor/einthusan.py120
-rw-r--r--youtube_dl/extractor/ellentv.py23
-rw-r--r--youtube_dl/extractor/elpais.py23
-rw-r--r--youtube_dl/extractor/extractors.py14
-rw-r--r--youtube_dl/extractor/facebook.py45
-rw-r--r--youtube_dl/extractor/generic.py49
-rw-r--r--youtube_dl/extractor/go.py100
-rw-r--r--youtube_dl/extractor/heise.py75
-rw-r--r--youtube_dl/extractor/hgtv.py44
-rw-r--r--youtube_dl/extractor/hotstar.py46
-rw-r--r--youtube_dl/extractor/instagram.py54
-rw-r--r--youtube_dl/extractor/iprima.py20
-rw-r--r--youtube_dl/extractor/iqiyi.py5
-rw-r--r--youtube_dl/extractor/itv.py5
-rw-r--r--youtube_dl/extractor/ivi.py8
-rw-r--r--youtube_dl/extractor/jwplatform.py132
-rw-r--r--youtube_dl/extractor/kaltura.py16
-rw-r--r--youtube_dl/extractor/leeco.py7
-rw-r--r--youtube_dl/extractor/lemonde.py32
-rw-r--r--youtube_dl/extractor/limelight.py38
-rw-r--r--youtube_dl/extractor/lynda.py19
-rw-r--r--youtube_dl/extractor/metacafe.py41
-rw-r--r--youtube_dl/extractor/mgtv.py50
-rw-r--r--youtube_dl/extractor/nbc.py69
-rw-r--r--youtube_dl/extractor/ninecninemedia.py1
-rw-r--r--youtube_dl/extractor/noco.py23
-rw-r--r--youtube_dl/extractor/nrk.py46
-rw-r--r--youtube_dl/extractor/ondemandkorea.py8
-rw-r--r--youtube_dl/extractor/onet.py61
-rw-r--r--youtube_dl/extractor/openload.py21
-rw-r--r--youtube_dl/extractor/pbs.py12
-rw-r--r--youtube_dl/extractor/pinkbike.py3
-rw-r--r--youtube_dl/extractor/pluralsight.py42
-rw-r--r--youtube_dl/extractor/pornhub.py78
-rw-r--r--youtube_dl/extractor/pornoxo.py4
-rw-r--r--youtube_dl/extractor/prosiebensat1.py3
-rw-r--r--youtube_dl/extractor/rentv.py3
-rw-r--r--youtube_dl/extractor/rudo.py4
-rw-r--r--youtube_dl/extractor/screencastomatic.py4
-rw-r--r--youtube_dl/extractor/scrippsnetworks.py60
-rw-r--r--youtube_dl/extractor/sendtonews.py4
-rw-r--r--youtube_dl/extractor/sixplay.py95
-rw-r--r--youtube_dl/extractor/skylinewebcams.py42
-rw-r--r--youtube_dl/extractor/sohu.py7
-rw-r--r--youtube_dl/extractor/spankbang.py6
-rw-r--r--youtube_dl/extractor/sprout.py52
-rw-r--r--youtube_dl/extractor/srgssr.py10
-rw-r--r--youtube_dl/extractor/svt.py6
-rw-r--r--youtube_dl/extractor/telequebec.py7
-rw-r--r--youtube_dl/extractor/tfo.py6
-rw-r--r--youtube_dl/extractor/theplatform.py11
-rw-r--r--youtube_dl/extractor/thescene.py26
-rw-r--r--youtube_dl/extractor/thisav.py14
-rw-r--r--youtube_dl/extractor/tubitv.py1
-rw-r--r--youtube_dl/extractor/tv4.py11
-rw-r--r--youtube_dl/extractor/tvn24.py76
-rw-r--r--youtube_dl/extractor/tvnoe.py4
-rw-r--r--youtube_dl/extractor/tvplayer.py75
-rw-r--r--youtube_dl/extractor/twentyfourvideo.py5
-rw-r--r--youtube_dl/extractor/vbox7.py3
-rw-r--r--youtube_dl/extractor/vgtv.py4
-rw-r--r--youtube_dl/extractor/vice.py6
-rw-r--r--youtube_dl/extractor/viceland.py14
-rw-r--r--youtube_dl/extractor/vidzi.py9
-rw-r--r--youtube_dl/extractor/viewster.py4
-rw-r--r--youtube_dl/extractor/viki.py6
-rw-r--r--youtube_dl/extractor/vodpl.py32
-rw-r--r--youtube_dl/extractor/wimp.py4
-rw-r--r--youtube_dl/extractor/xtube.py25
-rw-r--r--youtube_dl/extractor/youtube.py11
-rw-r--r--youtube_dl/extractor/zdf.py14
88 files changed, 1863 insertions, 772 deletions
diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py
index 12eeab2..4d655bd 100644
--- a/youtube_dl/extractor/adobepass.py
+++ b/youtube_dl/extractor/adobepass.py
@@ -31,6 +31,11 @@ MSO_INFO = {
'username_field': 'user',
'password_field': 'passwd',
},
+ 'TWC': {
+ 'name': 'Time Warner Cable | Spectrum',
+ 'username_field': 'Ecom_User_ID',
+ 'password_field': 'Ecom_Password',
+ },
'thr030': {
'name': '3 Rivers Communications'
},
diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
index c973174..dd96a47 100644
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@@ -23,7 +23,7 @@ class AENetworksBaseIE(ThePlatformIE):
class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks'
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
- _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime|lifetimemovieclub)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?)'
_TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
@@ -62,11 +62,15 @@ class AENetworksIE(AENetworksBaseIE):
}, {
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
'only_matching': True
+ }, {
+ 'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
+ 'only_matching': True
}]
_DOMAIN_TO_REQUESTOR_ID = {
'history.com': 'HISTORY',
'aetv.com': 'AETV',
'mylifetime.com': 'LIFETIME',
+ 'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
'fyi.tv': 'FYI',
}
diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py
index 87c803e..b71d1a0 100644
--- a/youtube_dl/extractor/amcnetworks.py
+++ b/youtube_dl/extractor/amcnetworks.py
@@ -53,20 +53,30 @@ class AMCNetworksIE(ThePlatformIE):
'mbr': 'true',
'manifest': 'm3u',
}
- media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
+ media_url = self._search_regex(
+ r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
+ webpage, 'media url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
- r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
+ r'link\.theplatform\.com/s/([^?]+)',
+ media_url, 'theplatform_path'), display_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
video_id = theplatform_metadata['pid']
title = theplatform_metadata['title']
rating = theplatform_metadata['ratings'][0]['rating']
- auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
+ auth_required = self._search_regex(
+ r'window\.authRequired\s*=\s*(true|false);',
+ webpage, 'auth required')
if auth_required == 'true':
- requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
- resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
- query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
+ requestor_id = self._search_regex(
+ r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
+ webpage, 'requestor id')
+ resource = self._get_mvpd_resource(
+ requestor_id, title, video_id, rating)
+ query['auth'] = self._extract_mvpd_auth(
+ url, video_id, requestor_id, resource)
media_url = update_url_query(media_url, query)
- formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
+ formats, subtitles = self._extract_theplatform_smil(
+ media_url, video_id)
self._sort_formats(formats)
info.update({
'id': video_id,
@@ -78,9 +88,11 @@ class AMCNetworksIE(ThePlatformIE):
if ns_keys:
ns = list(ns_keys)[0]
series = theplatform_metadata.get(ns + '$show')
- season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
+ season_number = int_or_none(
+ theplatform_metadata.get(ns + '$season'))
episode = theplatform_metadata.get(ns + '$episodeTitle')
- episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
+ episode_number = int_or_none(
+ theplatform_metadata.get(ns + '$episode'))
if season_number:
title = 'Season %d - %s' % (season_number, title)
if series:
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py
index 486dff8..e21045b 100644
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -1,13 +1,13 @@
from __future__ import unicode_literals
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
from ..utils import (
unified_strdate,
clean_html,
)
-class ArchiveOrgIE(JWPlatformBaseIE):
+class ArchiveOrgIE(InfoExtractor):
IE_NAME = 'archive.org'
IE_DESC = 'archive.org videos'
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index b179161..8a2ed0a 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -225,6 +225,8 @@ class BBCCoUkIE(InfoExtractor):
}
]
+ _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
+
class MediaSelectionError(Exception):
def __init__(self, id):
self.id = id
@@ -336,6 +338,15 @@ class BBCCoUkIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False))
+ if re.search(self._USP_RE, href):
+ usp_formats = self._extract_m3u8_formats(
+ re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
+ programme_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_id, fatal=False)
+ for f in usp_formats:
+ if f.get('height') and f['height'] > 720:
+ continue
+ formats.append(f)
elif transfer_format == 'hds':
formats.extend(self._extract_f4m_formats(
href, programme_id, f4m_id=format_id, fatal=False))
diff --git a/youtube_dl/extractor/bellmedia.py b/youtube_dl/extractor/bellmedia.py
index 32326ed..1f5b6ed 100644
--- a/youtube_dl/extractor/bellmedia.py
+++ b/youtube_dl/extractor/bellmedia.py
@@ -24,7 +24,7 @@ class BellMediaIE(InfoExtractor):
space
)\.ca|
much\.com
- )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})'''
+ )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
_TESTS = [{
'url': 'http://www.ctv.ca/video/player?vid=706966',
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
@@ -55,6 +55,9 @@ class BellMediaIE(InfoExtractor):
}, {
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
'only_matching': True,
+ }, {
+ 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
+ 'only_matching': True,
}]
_DOMAINS = {
'thecomedynetwork': 'comedy',
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py
index c5e11e8..2fbfad1 100644
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -34,6 +34,10 @@ class BloombergIE(InfoExtractor):
'format': 'best[format_id^=hds]',
},
}, {
+ # data-bmmrid=
+ 'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money',
+ 'only_matching': True,
+ }, {
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
'only_matching': True,
}, {
@@ -45,9 +49,10 @@ class BloombergIE(InfoExtractor):
name = self._match_id(url)
webpage = self._download_webpage(url, name)
video_id = self._search_regex(
- (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
- r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
- webpage, 'id', group='url', default=None)
+ (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+ r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+ r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'),
+ webpage, 'id', group='id', default=None)
if not video_id:
bplayer_data = self._parse_json(self._search_regex(
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 5c6e99d..27685ee 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -191,6 +191,10 @@ class BrightcoveLegacyIE(InfoExtractor):
# These fields hold the id of the video
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
if videoPlayer is not None:
+ if isinstance(videoPlayer, list):
+ videoPlayer = videoPlayer[0]
+ if not (videoPlayer.isdigit() or videoPlayer.startswith('ref:')):
+ return None
params['@videoPlayer'] = videoPlayer
linkBase = find_param('linkBaseURL')
if linkBase is not None:
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
index 4f88c31..b1dfacf 100644
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -13,6 +13,7 @@ from ..utils import (
float_or_none,
sanitized_Request,
urlencode_postdata,
+ USER_AGENTS,
)
@@ -21,10 +22,10 @@ class CeskaTelevizeIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
'info_dict': {
- 'id': '61924494876951776',
+ 'id': '61924494877246241',
'ext': 'mp4',
- 'title': 'Hyde Park Civilizace',
- 'description': 'md5:fe93f6eda372d150759d11644ebbfb4a',
+ 'title': 'Hyde Park Civilizace: Život v Grónsku',
+ 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 3350,
},
@@ -114,70 +115,100 @@ class CeskaTelevizeIE(InfoExtractor):
'requestSource': 'iVysilani',
}
- req = sanitized_Request(
- 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
- data=urlencode_postdata(data))
-
- req.add_header('Content-type', 'application/x-www-form-urlencoded')
- req.add_header('x-addr', '127.0.0.1')
- req.add_header('X-Requested-With', 'XMLHttpRequest')
- req.add_header('Referer', url)
-
- playlistpage = self._download_json(req, playlist_id)
-
- playlist_url = playlistpage['url']
- if playlist_url == 'error_region':
- raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
-
- req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
- req.add_header('Referer', url)
-
- playlist_title = self._og_search_title(webpage, default=None)
- playlist_description = self._og_search_description(webpage, default=None)
-
- playlist = self._download_json(req, playlist_id)['playlist']
- playlist_len = len(playlist)
-
entries = []
- for item in playlist:
- is_live = item.get('type') == 'LIVE'
- formats = []
- for format_id, stream_url in item['streamUrls'].items():
- formats.extend(self._extract_m3u8_formats(
- stream_url, playlist_id, 'mp4',
- entry_protocol='m3u8' if is_live else 'm3u8_native',
- fatal=False))
- self._sort_formats(formats)
-
- item_id = item.get('id') or item['assetId']
- title = item['title']
-
- duration = float_or_none(item.get('duration'))
- thumbnail = item.get('previewImageUrl')
-
- subtitles = {}
- if item.get('type') == 'VOD':
- subs = item.get('subtitles')
- if subs:
- subtitles = self.extract_subtitles(episode_id, subs)
-
- if playlist_len == 1:
- final_title = playlist_title or title
- if is_live:
- final_title = self._live_title(final_title)
- else:
- final_title = '%s (%s)' % (playlist_title, title)
-
- entries.append({
- 'id': item_id,
- 'title': final_title,
- 'description': playlist_description if playlist_len == 1 else None,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- 'is_live': is_live,
- })
+
+ for user_agent in (None, USER_AGENTS['Safari']):
+ req = sanitized_Request(
+ 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
+ data=urlencode_postdata(data))
+
+ req.add_header('Content-type', 'application/x-www-form-urlencoded')
+ req.add_header('x-addr', '127.0.0.1')
+ req.add_header('X-Requested-With', 'XMLHttpRequest')
+ if user_agent:
+ req.add_header('User-Agent', user_agent)
+ req.add_header('Referer', url)
+
+ playlistpage = self._download_json(req, playlist_id, fatal=False)
+
+ if not playlistpage:
+ continue
+
+ playlist_url = playlistpage['url']
+ if playlist_url == 'error_region':
+ raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+
+ req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
+ req.add_header('Referer', url)
+
+ playlist_title = self._og_search_title(webpage, default=None)
+ playlist_description = self._og_search_description(webpage, default=None)
+
+ playlist = self._download_json(req, playlist_id, fatal=False)
+ if not playlist:
+ continue
+
+ playlist = playlist.get('playlist')
+ if not isinstance(playlist, list):
+ continue
+
+ playlist_len = len(playlist)
+
+ for num, item in enumerate(playlist):
+ is_live = item.get('type') == 'LIVE'
+ formats = []
+ for format_id, stream_url in item.get('streamUrls', {}).items():
+ if 'playerType=flash' in stream_url:
+ stream_formats = self._extract_m3u8_formats(
+ stream_url, playlist_id, 'mp4',
+ entry_protocol='m3u8' if is_live else 'm3u8_native',
+ m3u8_id='hls-%s' % format_id, fatal=False)
+ else:
+ stream_formats = self._extract_mpd_formats(
+ stream_url, playlist_id,
+ mpd_id='dash-%s' % format_id, fatal=False)
+ # See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031
+ if format_id == 'audioDescription':
+ for f in stream_formats:
+ f['source_preference'] = -10
+ formats.extend(stream_formats)
+
+ if user_agent and len(entries) == playlist_len:
+ entries[num]['formats'].extend(formats)
+ continue
+
+ item_id = item.get('id') or item['assetId']
+ title = item['title']
+
+ duration = float_or_none(item.get('duration'))
+ thumbnail = item.get('previewImageUrl')
+
+ subtitles = {}
+ if item.get('type') == 'VOD':
+ subs = item.get('subtitles')
+ if subs:
+ subtitles = self.extract_subtitles(episode_id, subs)
+
+ if playlist_len == 1:
+ final_title = playlist_title or title
+ if is_live:
+ final_title = self._live_title(final_title)
+ else:
+ final_title = '%s (%s)' % (playlist_title, title)
+
+ entries.append({
+ 'id': item_id,
+ 'title': final_title,
+ 'description': playlist_description if playlist_len == 1 else None,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ })
+
+ for e in entries:
+ self._sort_formats(e['formats'])
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 0b4e2ac..4252d68 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -6,6 +6,7 @@ import hashlib
import json
import netrc
import os
+import random
import re
import socket
import sys
@@ -39,7 +40,10 @@ from ..utils import (
ExtractorError,
fix_xml_ampersands,
float_or_none,
+ GeoRestrictedError,
+ GeoUtils,
int_or_none,
+ js_to_json,
parse_iso8601,
RegexNotFoundError,
sanitize_filename,
@@ -319,17 +323,34 @@ class InfoExtractor(object):
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
+ _GEO_BYPASS attribute may be set to False in order to disable
+ geo restriction bypass mechanisms for a particular extractor.
+ Though it won't disable explicit geo restriction bypass based on
+ country code provided with geo_bypass_country. (experimental)
+
+ _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
+ countries for this extractor. One of these countries will be used by
+ geo restriction bypass mechanism right away in order to bypass
+ geo restriction, of course, if the mechanism is not disabled. (experimental)
+
+ NB: both these geo attributes are experimental and may change in future
+ or be completely removed.
+
Finally, the _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
"""
_ready = False
_downloader = None
+ _x_forwarded_for_ip = None
+ _GEO_BYPASS = True
+ _GEO_COUNTRIES = None
_WORKING = True
def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader."""
self._ready = False
+ self._x_forwarded_for_ip = None
self.set_downloader(downloader)
@classmethod
@@ -358,15 +379,59 @@ class InfoExtractor(object):
def initialize(self):
"""Initializes an instance (authentication, etc)."""
+ self._initialize_geo_bypass(self._GEO_COUNTRIES)
if not self._ready:
self._real_initialize()
self._ready = True
+ def _initialize_geo_bypass(self, countries):
+ """
+ Initialize geo restriction bypass mechanism.
+
+ This method is used to initialize geo bypass mechanism based on faking
+ X-Forwarded-For HTTP header. A random country from provided country list
+ is selected and a random IP belonging to this country is generated. This
+ IP will be passed as X-Forwarded-For HTTP header in all subsequent
+ HTTP requests.
+
+ This method will be used for initial geo bypass mechanism initialization
+ during the instance initialization with _GEO_COUNTRIES.
+
+ You may also manually call it from extractor's code if geo countries
+ information is not available beforehand (e.g. obtained during
+ extraction) or due to some another reason.
+ """
+ if not self._x_forwarded_for_ip:
+ country_code = self._downloader.params.get('geo_bypass_country', None)
+ # If there is no explicit country for geo bypass specified and
+ # the extractor is known to be geo restricted let's fake IP
+ # as X-Forwarded-For right away.
+ if (not country_code and
+ self._GEO_BYPASS and
+ self._downloader.params.get('geo_bypass', True) and
+ countries):
+ country_code = random.choice(countries)
+ if country_code:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_stdout(
+ '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
+ % (self._x_forwarded_for_ip, country_code.upper()))
+
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
try:
- self.initialize()
- return self._real_extract(url)
+ for _ in range(2):
+ try:
+ self.initialize()
+ ie_result = self._real_extract(url)
+ if self._x_forwarded_for_ip:
+ ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
+ return ie_result
+ except GeoRestrictedError as e:
+ if self.__maybe_fake_ip_and_retry(e.countries):
+ continue
+ raise
except ExtractorError:
raise
except compat_http_client.IncompleteRead as e:
@@ -374,6 +439,21 @@ class InfoExtractor(object):
except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occurred.', cause=e)
+ def __maybe_fake_ip_and_retry(self, countries):
+ if (not self._downloader.params.get('geo_bypass_country', None) and
+ self._GEO_BYPASS and
+ self._downloader.params.get('geo_bypass', True) and
+ not self._x_forwarded_for_ip and
+ countries):
+ country_code = random.choice(countries)
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+ if self._x_forwarded_for_ip:
+ self.report_warning(
+ 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
+ % (self._x_forwarded_for_ip, country_code.upper()))
+ return True
+ return False
+
def set_downloader(self, downloader):
"""Sets the downloader for this IE."""
self._downloader = downloader
@@ -433,6 +513,15 @@ class InfoExtractor(object):
if isinstance(url_or_request, (compat_str, str)):
url_or_request = url_or_request.partition('#')[0]
+ # Some sites check X-Forwarded-For HTTP header in order to figure out
+ # the origin of the client behind proxy. This allows bypassing geo
+ # restriction by faking this header's value to IP that belongs to some
+ # geo unrestricted country. We will do so once we encounter any
+ # geo restriction error.
+ if self._x_forwarded_for_ip:
+ if 'X-Forwarded-For' not in headers:
+ headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
if urlh is False:
assert not fatal
@@ -608,10 +697,8 @@ class InfoExtractor(object):
expected=True)
@staticmethod
- def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
- raise ExtractorError(
- '%s. You might want to use --proxy to workaround.' % msg,
- expected=True)
+ def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
+ raise GeoRestrictedError(msg, countries=countries)
# Methods for following #608
@staticmethod
@@ -1208,6 +1295,9 @@ class InfoExtractor(object):
m3u8_doc, urlh = res
m3u8_url = urlh.geturl()
+ if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
+ return []
+
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
format_url = lambda u: (
@@ -2070,6 +2160,123 @@ class InfoExtractor(object):
})
return formats
+ @staticmethod
+ def _find_jwplayer_data(webpage):
+ mobj = re.search(
+ r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
+ webpage)
+ if mobj:
+ return mobj.group('options')
+
+ def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+ jwplayer_data = self._parse_json(
+ self._find_jwplayer_data(webpage), video_id,
+ transform_source=js_to_json)
+ return self._parse_jwplayer_data(
+ jwplayer_data, video_id, *args, **kwargs)
+
+ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+ m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+ # JWPlayer backward compatibility: flattened playlists
+ # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
+ if 'playlist' not in jwplayer_data:
+ jwplayer_data = {'playlist': [jwplayer_data]}
+
+ entries = []
+
+ # JWPlayer backward compatibility: single playlist item
+ # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+ if not isinstance(jwplayer_data['playlist'], list):
+ jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
+ for video_data in jwplayer_data['playlist']:
+ # JWPlayer backward compatibility: flattened sources
+ # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
+ if 'sources' not in video_data:
+ video_data['sources'] = [video_data]
+
+ this_video_id = video_id or video_data['mediaid']
+
+ formats = []
+ for source in video_data['sources']:
+ source_url = self._proto_relative_url(source['file'])
+ if base_url:
+ source_url = compat_urlparse.urljoin(base_url, source_url)
+ source_type = source.get('type') or ''
+ ext = mimetype2ext(source_type) or determine_ext(source_url)
+ if source_type == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ source_url, this_video_id, mpd_id=mpd_id, fatal=False))
+ # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+ elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+ formats.append({
+ 'url': source_url,
+ 'vcodec': 'none',
+ 'ext': ext,
+ })
+ else:
+ height = int_or_none(source.get('height'))
+ if height is None:
+ # Often no height is provided but there is a label in
+ # format like 1080p.
+ height = int_or_none(self._search_regex(
+ r'^(\d{3,})[pP]$', source.get('label') or '',
+ 'height', default=None))
+ a_format = {
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ 'height': height,
+ 'ext': ext,
+ }
+ if source_url.startswith('rtmp'):
+ a_format['ext'] = 'flv'
+
+ # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+ # of jwplayer.flash.swf
+ rtmp_url_parts = re.split(
+ r'((?:mp4|mp3|flv):)', source_url, 1)
+ if len(rtmp_url_parts) == 3:
+ rtmp_url, prefix, play_path = rtmp_url_parts
+ a_format.update({
+ 'url': rtmp_url,
+ 'play_path': prefix + play_path,
+ })
+ if rtmp_params:
+ a_format.update(rtmp_params)
+ formats.append(a_format)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ tracks = video_data.get('tracks')
+ if tracks and isinstance(tracks, list):
+ for track in tracks:
+ if track.get('kind') != 'captions':
+ continue
+ track_url = urljoin(base_url, track.get('file'))
+ if not track_url:
+ continue
+ subtitles.setdefault(track.get('label') or 'en', []).append({
+ 'url': self._proto_relative_url(track_url)
+ })
+
+ entries.append({
+ 'id': this_video_id,
+ 'title': video_data['title'] if require_title else video_data.get('title'),
+ 'description': video_data.get('description'),
+ 'thumbnail': self._proto_relative_url(video_data.get('image')),
+ 'timestamp': int_or_none(video_data.get('pubdate')),
+ 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ })
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ return self.playlist_result(entries)
+
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()
diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py
index 2f86e23..79f7a9c 100644
--- a/youtube_dl/extractor/commonmistakes.py
+++ b/youtube_dl/extractor/commonmistakes.py
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
+import sys
+
from .common import InfoExtractor
from ..utils import ExtractorError
@@ -7,7 +9,7 @@ from ..utils import ExtractorError
class CommonMistakesIE(InfoExtractor):
IE_DESC = False # Do not list
_VALID_URL = r'''(?x)
- (?:url|URL)
+ (?:url|URL)$
'''
_TESTS = [{
@@ -33,7 +35,9 @@ class UnicodeBOMIE(InfoExtractor):
IE_DESC = False
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
- _TESTS = [{
+ # Disable test for python 3.2 since BOM is broken in re in this version
+ # (see https://github.com/rg3/youtube-dl/issues/9751)
+ _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
'only_matching': True,
}]
diff --git a/youtube_dl/extractor/corus.py b/youtube_dl/extractor/corus.py
new file mode 100644
index 0000000..7b2f500
--- /dev/null
+++ b/youtube_dl/extractor/corus.py
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .theplatform import ThePlatformFeedIE
+from ..utils import int_or_none
+
+
+class CorusIE(ThePlatformFeedIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
+ 'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
+ 'info_dict': {
+ 'id': '870923331648',
+ 'ext': 'mp4',
+ 'title': 'Movie Night Popcorn with Bryan',
+ 'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
+ 'uploader': 'SHWM-NEW',
+ 'upload_date': '20170206',
+ 'timestamp': 1486392197,
+ },
+ }, {
+ 'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
+ 'only_matching': True,
+ }]
+
+ _TP_FEEDS = {
+ 'globaltv': {
+ 'feed_id': 'ChQqrem0lNUp',
+ 'account_id': 2269680845,
+ },
+ 'etcanada': {
+ 'feed_id': 'ChQqrem0lNUp',
+ 'account_id': 2269680845,
+ },
+ 'hgtv': {
+ 'feed_id': 'L0BMHXi2no43',
+ 'account_id': 2414428465,
+ },
+ 'foodnetwork': {
+ 'feed_id': 'ukK8o58zbRmJ',
+ 'account_id': 2414429569,
+ },
+ 'slice': {
+ 'feed_id': '5tUJLgV2YNJ5',
+ 'account_id': 2414427935,
+ },
+ }
+
+ def _real_extract(self, url):
+ domain, video_id = re.match(self._VALID_URL, url).groups()
+ feed_info = self._TP_FEEDS[domain.split('.')[0]]
+ return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: {
+ 'episode_number': int_or_none(e.get('pl1$episode')),
+ 'season_number': int_or_none(e.get('pl1$season')),
+ 'series': e.get('pl1$show'),
+ }, {
+ 'HLS': {
+ 'manifest': 'm3u',
+ },
+ 'DesktopHLS Default': {
+ 'manifest': 'm3u',
+ },
+ 'MP4 MBR': {
+ 'manifest': 'm3u',
+ },
+ }, feed_info['account_id'])
diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py
index 377fb45..f919ed2 100644
--- a/youtube_dl/extractor/crackle.py
+++ b/youtube_dl/extractor/crackle.py
@@ -6,6 +6,7 @@ from ..utils import int_or_none
class CrackleIE(InfoExtractor):
+ _GEO_COUNTRIES = ['US']
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
_TEST = {
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 109d1c5..a1fc6a7 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -123,7 +123,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': {
'id': '645513',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
@@ -192,6 +192,21 @@ class CrunchyrollIE(CrunchyrollBaseIE):
# geo-restricted (US), 18+ maturity wall, non-premium available
'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
'only_matching': True,
+ }, {
+ # A description with double quotes
+ 'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
+ 'info_dict': {
+ 'id': '535080',
+ 'ext': 'mp4',
+ 'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
+ 'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
+ 'uploader': 'Marvelous AQL Inc.',
+ 'upload_date': '20091021',
+ },
+ 'params': {
+ # Just test metadata extraction
+ 'skip_download': True,
+ },
}]
_FORMAT_IDS = {
@@ -362,9 +377,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
webpage, 'video_title')
video_title = re.sub(r' {2,}', ' ', video_title)
- video_description = self._html_search_regex(
- r'<script[^>]*>\s*.+?\[media_id=%s\].+?"description"\s*:\s*"([^"]+)' % video_id,
- webpage, 'description', default=None)
+ video_description = self._parse_json(self._html_search_regex(
+ r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
+ webpage, 'description', default='{}'), video_id).get('description')
if video_description:
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
video_upload_date = self._html_search_regex(
@@ -519,11 +534,11 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
webpage, 'title')
episode_paths = re.findall(
- r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"',
+ r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
webpage)
entries = [
- self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll')
- for ep in episode_paths
+ self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
+ for ep_id, ep in episode_paths
]
entries.reverse()
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 31bf5fa..b312401 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -66,7 +66,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'uploader_id': 'xijv66',
'age_limit': 0,
'view_count': int,
- 'comment_count': int,
}
},
# Vevo video
@@ -140,7 +139,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
view_count = str_to_int(view_count_str)
comment_count = int_or_none(self._search_regex(
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
- webpage, 'comment count', fatal=False))
+ webpage, 'comment count', default=None))
player_v5 = self._search_regex(
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
diff --git a/youtube_dl/extractor/disney.py b/youtube_dl/extractor/disney.py
index 396873c..939d133 100644
--- a/youtube_dl/extractor/disney.py
+++ b/youtube_dl/extractor/disney.py
@@ -9,13 +9,15 @@ from ..utils import (
unified_strdate,
compat_str,
determine_ext,
+ ExtractorError,
)
class DisneyIE(InfoExtractor):
_VALID_URL = r'''(?x)
- https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|starwars\.com))/(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})'''
+ https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
_TESTS = [{
+ # Disney.EmbedVideo
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
'info_dict': {
'id': '545ed1857afee5a0ec239977',
@@ -29,6 +31,20 @@ class DisneyIE(InfoExtractor):
'skip_download': True,
}
}, {
+ # Grill.burger
+ 'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette',
+ 'info_dict': {
+ 'id': '5454e9f4e9804a552e3524c8',
+ 'ext': 'mp4',
+ 'title': '"Intro" Featurette: Rogue One: A Star Wars Story',
+ 'upload_date': '20170104',
+ 'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2',
'only_matching': True,
}, {
@@ -43,31 +59,55 @@ class DisneyIE(InfoExtractor):
}, {
'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097',
'only_matching': True,
+ }, {
+ 'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- domain, video_id = re.match(self._VALID_URL, url).groups()
- webpage = self._download_webpage(
- 'http://%s/embed/%s' % (domain, video_id), video_id)
- video_data = self._parse_json(self._search_regex(
- r'Disney\.EmbedVideo=({.+});', webpage, 'embed data'), video_id)['video']
+ domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
+ if not video_id:
+ webpage = self._download_webpage(url, display_id)
+ grill = re.sub(r'"\s*\+\s*"', '', self._search_regex(
+ r'Grill\.burger\s*=\s*({.+})\s*:',
+ webpage, 'grill data'))
+ page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video')
+ video_data = page_data['data'][0]
+ else:
+ webpage = self._download_webpage(
+ 'http://%s/embed/%s' % (domain, video_id), video_id)
+ page_data = self._parse_json(self._search_regex(
+ r'Disney\.EmbedVideo\s*=\s*({.+});',
+ webpage, 'embed data'), video_id)
+ video_data = page_data['video']
for external in video_data.get('externals', []):
if external.get('source') == 'vevo':
return self.url_result('vevo:' + external['data_id'], 'Vevo')
+ video_id = video_data['id']
title = video_data['title']
formats = []
for flavor in video_data.get('flavors', []):
flavor_format = flavor.get('format')
flavor_url = flavor.get('url')
- if not flavor_url or not re.match(r'https?://', flavor_url):
+ if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access':
continue
tbr = int_or_none(flavor.get('bitrate'))
if tbr == 99999:
formats.extend(self._extract_m3u8_formats(
- flavor_url, video_id, 'mp4', m3u8_id=flavor_format, fatal=False))
+ flavor_url, video_id, 'mp4',
+ m3u8_id=flavor_format, fatal=False))
continue
format_id = []
if flavor_format:
@@ -88,6 +128,10 @@ class DisneyIE(InfoExtractor):
'ext': ext,
'vcodec': 'none' if (width == 0 and height == 0) else None,
})
+ if not formats and video_data.get('expired'):
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']),
+ expected=True)
self._sort_formats(formats)
subtitles = {}
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
index bcd9fe2..e7abc88 100644
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -20,6 +20,7 @@ from ..utils import (
class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever'
+ _GEO_COUNTRIES = ['US', 'CA']
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
@@ -116,8 +117,9 @@ class DramaFeverIE(DramaFeverBaseIE):
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
- raise ExtractorError(
- 'Currently unavailable in your country.', expected=True)
+ self.raise_geo_restricted(
+ msg='Currently unavailable in your country',
+ countries=self._GEO_COUNTRIES)
raise
series_id, episode_number = video_id.split('.')
diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py
index 6ca07a1..3f62686 100644
--- a/youtube_dl/extractor/einthusan.py
+++ b/youtube_dl/extractor/einthusan.py
@@ -1,67 +1,97 @@
# coding: utf-8
from __future__ import unicode_literals
+import base64
+import json
+
from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+ compat_urlparse,
+ compat_str,
+)
from ..utils import (
- remove_start,
- sanitized_Request,
+ extract_attributes,
+ ExtractorError,
+ get_elements_by_class,
+ urlencode_postdata,
)
class EinthusanIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P<id>[0-9]+)'
- _TESTS = [
- {
- 'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
- 'md5': 'd71379996ff5b7f217eca034c34e3461',
- 'info_dict': {
- 'id': '2447',
- 'ext': 'mp4',
- 'title': 'Ek Villain',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'description': 'md5:9d29fc91a7abadd4591fb862fa560d93',
- }
- },
- {
- 'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
- 'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213',
- 'info_dict': {
- 'id': '1671',
- 'ext': 'mp4',
- 'title': 'Soodhu Kavvuum',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c',
- }
- },
- ]
+ _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://einthusan.tv/movie/watch/9097/',
+ 'md5': 'ff0f7f2065031b8a2cf13a933731c035',
+ 'info_dict': {
+ 'id': '9097',
+ 'ext': 'mp4',
+ 'title': 'Ae Dil Hai Mushkil',
+ 'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
+ 'only_matching': True,
+ }]
+
+ # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
+ def _decrypt(self, encrypted_data, video_id):
+ return self._parse_json(base64.b64decode((
+ encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
+ ).encode('ascii')).decode('utf-8'), video_id)
def _real_extract(self, url):
video_id = self._match_id(url)
- request = sanitized_Request(url)
- request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0')
- webpage = self._download_webpage(request, video_id)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
+
+ player_params = extract_attributes(self._search_regex(
+ r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
+
+ page_id = self._html_search_regex(
+ '<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
+ video_data = self._download_json(
+ 'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id,
+ data=urlencode_postdata({
+ 'xEvent': 'UIVideoPlayer.PingOutcome',
+ 'xJson': json.dumps({
+ 'EJOutcomes': player_params['data-ejpingables'],
+ 'NativeHLS': False
+ }),
+ 'arcVersion': 3,
+ 'appVersion': 59,
+ 'gorilla.csrf.Token': page_id,
+ }))['Data']
+
+ if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
+ raise ExtractorError(
+ 'Download rate reached. Please try again later.', expected=True)
+
+ ej_links = self._decrypt(video_data['EJLinks'], video_id)
+
+ formats = []
- title = self._html_search_regex(
- r'<h1><a[^>]+class=["\']movie-title["\'][^>]*>(.+?)</a></h1>',
- webpage, 'title')
+ m3u8_url = ej_links.get('HLSLink')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
- video_id = self._search_regex(
- r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id)
+ mp4_url = ej_links.get('MP4Link')
+ if mp4_url:
+ formats.append({
+ 'url': mp4_url,
+ })
- m3u8_url = self._download_webpage(
- 'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/'
- % video_id, video_id, headers={'Referer': url})
- formats = self._extract_m3u8_formats(
- m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')
+ self._sort_formats(formats)
- description = self._html_search_meta('description', webpage)
+ description = get_elements_by_class('synopsis', webpage)[0]
thumbnail = self._html_search_regex(
- r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',
- webpage, "thumbnail url", fatal=False)
+ r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
+ webpage, 'thumbnail url', fatal=False, group='url')
if thumbnail is not None:
- thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..'))
+ thumbnail = compat_urlparse.urljoin(url, thumbnail)
return {
'id': video_id,
diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py
index 74bbc5c..e0a13dd 100644
--- a/youtube_dl/extractor/ellentv.py
+++ b/youtube_dl/extractor/ellentv.py
@@ -1,13 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
-
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- NO_DEFAULT,
-)
+from .kaltura import KalturaIE
+from ..utils import NO_DEFAULT
class EllenTVIE(InfoExtractor):
@@ -65,7 +61,7 @@ class EllenTVIE(InfoExtractor):
if partner_id and kaltura_id:
break
- return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')
+ return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key())
class EllenTVClipsIE(InfoExtractor):
@@ -77,14 +73,14 @@ class EllenTVClipsIE(InfoExtractor):
'id': 'meryl-streep-vanessa-hudgens',
'title': 'Meryl Streep, Vanessa Hudgens',
},
- 'playlist_mincount': 7,
+ 'playlist_mincount': 5,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
- playlist = self._extract_playlist(webpage)
+ playlist = self._extract_playlist(webpage, playlist_id)
return {
'_type': 'playlist',
@@ -93,16 +89,13 @@ class EllenTVClipsIE(InfoExtractor):
'entries': self._extract_entries(playlist)
}
- def _extract_playlist(self, webpage):
+ def _extract_playlist(self, webpage, playlist_id):
json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
- try:
- return json.loads('[{' + json_string + '}]')
- except ValueError as ve:
- raise ExtractorError('Failed to download JSON', cause=ve)
+ return self._parse_json('[{' + json_string + '}]', playlist_id)
def _extract_entries(self, playlist):
return [
self.url_result(
'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']),
- 'Kaltura')
+ KalturaIE.ie_key(), video_id=item['kaltura_entry_id'])
for item in playlist]
diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py
index 99e00cf..b89f6db 100644
--- a/youtube_dl/extractor/elpais.py
+++ b/youtube_dl/extractor/elpais.py
@@ -39,6 +39,18 @@ class ElPaisIE(InfoExtractor):
'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas',
'upload_date': '20170127',
},
+ }, {
+ 'url': 'http://epv.elpais.com/epv/2017/02/14/programa_la_voz_de_inaki/1487062137_075943.html',
+ 'info_dict': {
+ 'id': '1487062137_075943',
+ 'ext': 'mp4',
+ 'title': 'Disyuntivas',
+ 'description': 'md5:a0fb1485c4a6a8a917e6f93878e66218',
+ 'upload_date': '20170214',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
def _real_extract(self, url):
@@ -59,14 +71,15 @@ class ElPaisIE(InfoExtractor):
video_url = prefix + video_suffix
thumbnail_suffix = self._search_regex(
r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
- webpage, 'thumbnail URL', fatal=False)
+ webpage, 'thumbnail URL', default=None)
thumbnail = (
None if thumbnail_suffix is None
- else prefix + thumbnail_suffix)
+ else prefix + thumbnail_suffix) or self._og_search_thumbnail(webpage)
title = self._html_search_regex(
- (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
- r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
- webpage, 'title')
+ (r"tituloVideo\s*=\s*'([^']+)'",
+ r'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
+ r'<h1[^>]+class="titulo"[^>]*>([^<]+)'),
+ webpage, 'title', default=None) or self._og_search_title(webpage)
upload_date = unified_strdate(self._search_regex(
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
webpage, 'upload date', default=None) or self._html_search_meta(
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 12cda36..83a170f 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -202,6 +202,7 @@ from .commonprotocols import (
RtmpIE,
)
from .condenast import CondeNastIE
+from .corus import CorusIE
from .cracked import CrackedIE
from .crackle import CrackleIE
from .criterion import CriterionIE
@@ -381,10 +382,7 @@ from .heise import HeiseIE
from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
-from .hgtv import (
- HGTVIE,
- HGTVComShowIE,
-)
+from .hgtv import HGTVComShowIE
from .historicfilms import HistoricFilmsIE
from .hitbox import HitboxIE, HitboxLiveIE
from .hitrecord import HitRecordIE
@@ -696,6 +694,8 @@ from .ondemandkorea import OnDemandKoreaIE
from .onet import (
OnetIE,
OnetChannelIE,
+ OnetMVPIE,
+ OnetPlIE,
)
from .onionstudios import OnionStudiosIE
from .ooyala import (
@@ -838,6 +838,7 @@ from .sbs import SBSIE
from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE
+from .scrippsnetworks import ScrippsNetworksWatchIE
from .seeker import SeekerIE
from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE
@@ -851,6 +852,7 @@ from .shared import (
from .showroomlive import ShowRoomLiveIE
from .sina import SinaIE
from .sixplay import SixPlayIE
+from .skylinewebcams import SkylineWebcamsIE
from .skynewsarabia import (
SkyNewsArabiaIE,
SkyNewsArabiaArticleIE,
@@ -895,6 +897,7 @@ from .sport5 import Sport5IE
from .sportbox import SportBoxEmbedIE
from .sportdeutschland import SportDeutschlandIE
from .sportschau import SportschauIE
+from .sprout import SproutIE
from .srgssr import (
SRGSSRIE,
SRGSSRPlayIE,
@@ -1007,6 +1010,7 @@ from .tvc import (
)
from .tvigle import TvigleIE
from .tvland import TVLandIE
+from .tvn24 import TVN24IE
from .tvnoe import TVNoeIE
from .tvp import (
TVPEmbedIE,
@@ -1017,6 +1021,7 @@ from .tvplay import (
TVPlayIE,
ViafreeIE,
)
+from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
@@ -1146,6 +1151,7 @@ from .vlive import (
VLiveChannelIE
)
from .vodlocker import VodlockerIE
+from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE
from .voicerepublic import VoiceRepublicIE
from .voxmedia import VoxMediaIE
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index b325c82..70b8c95 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -1,3 +1,4 @@
+# coding: utf-8
from __future__ import unicode_literals
import re
@@ -135,6 +136,46 @@ class FacebookIE(InfoExtractor):
'uploader': 'CNN',
},
}, {
+ # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
+ 'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
+ 'info_dict': {
+ 'id': '1417995061575415',
+ 'ext': 'mp4',
+ 'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
+ 'timestamp': 1486648217,
+ 'upload_date': '20170209',
+ 'uploader': 'Yaroslav Korpan',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
+ 'info_dict': {
+ 'id': '1072691702860471',
+ 'ext': 'mp4',
+ 'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
+ 'timestamp': 1477305000,
+ 'upload_date': '20161024',
+ 'uploader': 'La Guía Del Varón',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
+ 'info_dict': {
+ 'id': '1396382447100162',
+ 'ext': 'mp4',
+ 'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
+ 'timestamp': 1486035494,
+ 'upload_date': '20170202',
+ 'uploader': 'Elisabeth Ahtn',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True,
}, {
@@ -249,7 +290,7 @@ class FacebookIE(InfoExtractor):
for item in instances:
if item[1][0] == 'VideoConfig':
video_item = item[2][0]
- if video_item.get('video_id') == video_id:
+ if video_item.get('video_id'):
return video_item['videoData']
server_js_data = self._parse_json(self._search_regex(
@@ -262,7 +303,7 @@ class FacebookIE(InfoExtractor):
if not video_data:
server_js_data = self._parse_json(
self._search_regex(
- r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet',
+ r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)',
webpage, 'js data', default='{}'),
video_id, transform_source=js_to_json, fatal=False)
if server_js_data:
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 1c233f0..9868ca6 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -20,6 +20,7 @@ from ..utils import (
float_or_none,
HEADRequest,
is_html,
+ js_to_json,
orderedSet,
sanitized_Request,
smuggle_url,
@@ -961,6 +962,16 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
+ # Complex jwplayer
+ {
+ 'url': 'http://www.indiedb.com/games/king-machine/videos',
+ 'info_dict': {
+ 'id': 'videos',
+ 'ext': 'mp4',
+ 'title': 'king machine trailer 1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
# rtl.nl embed
{
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
@@ -991,19 +1002,6 @@ class GenericIE(InfoExtractor):
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
},
},
- # Kaltura embed protected with referrer
- {
- 'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
- 'info_dict': {
- 'id': '1_g4fbemnq',
- 'ext': 'mp4',
- 'title': 'Violetta - Achter De Schermen - Ruggero',
- 'description': 'Achter de schermen met Ruggero',
- 'timestamp': 1435133761,
- 'upload_date': '20150624',
- 'uploader_id': 'echojecka',
- },
- },
# Kaltura embed with single quotes
{
'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
@@ -1503,7 +1501,12 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
'add_ie': [VideoPressIE.ie_key()],
- }
+ },
+ {
+ # ThePlatform embedded with whitespaces in URLs
+ 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
+ 'only_matching': True,
+ },
# {
# # TODO: find another test
# # http://schema.org/VideoObject
@@ -2350,8 +2353,9 @@ class GenericIE(InfoExtractor):
'Channel': 'channel',
'ChannelList': 'channel_list',
}
- return self.url_result('limelight:%s:%s' % (
- lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
+ return self.url_result(smuggle_url('limelight:%s:%s' % (
+ lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
+ 'Limelight%s' % mobj.group(1), mobj.group(2))
mobj = re.search(
r'''(?sx)
@@ -2361,7 +2365,9 @@ class GenericIE(InfoExtractor):
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
''', webpage)
if mobj:
- return self.url_result('limelight:media:%s' % mobj.group('id'))
+ return self.url_result(smuggle_url(
+ 'limelight:media:%s' % mobj.group('id'),
+ {'source_url': url}), 'LimelightMedia', mobj.group('id'))
# Look for AdobeTVVideo embeds
mobj = re.search(
@@ -2498,6 +2504,15 @@ class GenericIE(InfoExtractor):
self._sort_formats(entry['formats'])
return self.playlist_result(entries)
+ jwplayer_data_str = self._find_jwplayer_data(webpage)
+ if jwplayer_data_str:
+ try:
+ jwplayer_data = self._parse_json(
+ jwplayer_data_str, video_id, transform_source=js_to_json)
+ return self._parse_jwplayer_data(jwplayer_data, video_id)
+ except ExtractorError:
+ pass
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py
index a34779b..21ed846 100644
--- a/youtube_dl/extractor/go.py
+++ b/youtube_dl/extractor/go.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
-from .common import InfoExtractor
+from .adobepass import AdobePassIE
from ..utils import (
int_or_none,
determine_ext,
@@ -13,15 +13,30 @@ from ..utils import (
)
-class GoIE(InfoExtractor):
- _BRANDS = {
- 'abc': '001',
- 'freeform': '002',
- 'watchdisneychannel': '004',
- 'watchdisneyjunior': '008',
- 'watchdisneyxd': '009',
+class GoIE(AdobePassIE):
+ _SITE_INFO = {
+ 'abc': {
+ 'brand': '001',
+ 'requestor_id': 'ABC',
+ },
+ 'freeform': {
+ 'brand': '002',
+ 'requestor_id': 'ABCFamily',
+ },
+ 'watchdisneychannel': {
+ 'brand': '004',
+ 'requestor_id': 'Disney',
+ },
+ 'watchdisneyjunior': {
+ 'brand': '008',
+ 'requestor_id': 'DisneyJunior',
+ },
+ 'watchdisneyxd': {
+ 'brand': '009',
+ 'requestor_id': 'DisneyXD',
+ }
}
- _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys())
+ _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
_TESTS = [{
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
'info_dict': {
@@ -47,7 +62,8 @@ class GoIE(InfoExtractor):
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
- brand = self._BRANDS[sub_domain]
+ site_info = self._SITE_INFO[sub_domain]
+ brand = site_info['brand']
video_data = self._download_json(
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
video_id)['video'][0]
@@ -62,28 +78,60 @@ class GoIE(InfoExtractor):
ext = determine_ext(asset_url)
if ext == 'm3u8':
video_type = video_data.get('type')
- if video_type == 'lf':
- entitlement = self._download_json(
- 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
- video_id, data=urlencode_postdata({
- 'video_id': video_data['id'],
- 'video_type': video_type,
- 'brand': brand,
- 'device': '001',
- }))
- errors = entitlement.get('errors', {}).get('errors', [])
- if errors:
- error_message = ', '.join([error['message'] for error in errors])
- raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
- asset_url += '?' + entitlement['uplynkData']['sessionKey']
+ data = {
+ 'video_id': video_data['id'],
+ 'video_type': video_type,
+ 'brand': brand,
+ 'device': '001',
+ }
+ if video_data.get('accesslevel') == '1':
+ requestor_id = site_info['requestor_id']
+ resource = self._get_mvpd_resource(
+ requestor_id, title, video_id, None)
+ auth = self._extract_mvpd_auth(
+ url, video_id, requestor_id, resource)
+ data.update({
+ 'token': auth,
+ 'token_type': 'ap',
+ 'adobe_requestor_id': requestor_id,
+ })
+ else:
+ self._initialize_geo_bypass(['US'])
+ entitlement = self._download_json(
+ 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
+ video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers())
+ errors = entitlement.get('errors', {}).get('errors', [])
+ if errors:
+ for error in errors:
+ if error.get('code') == 1002:
+ self.raise_geo_restricted(
+ error['message'], countries=['US'])
+ error_message = ', '.join([error['message'] for error in errors])
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
+ asset_url += '?' + entitlement['uplynkData']['sessionKey']
formats.extend(self._extract_m3u8_formats(
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
else:
- formats.append({
+ f = {
'format_id': format_id,
'url': asset_url,
'ext': ext,
- })
+ }
+ if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url):
+ f.update({
+ 'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE',
+ 'preference': 1,
+ })
+ else:
+ mobj = re.search(r'/(\d+)x(\d+)/', asset_url)
+ if mobj:
+ height = int(mobj.group(2))
+ f.update({
+ 'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height,
+ 'width': int(mobj.group(1)),
+ 'height': height,
+ })
+ formats.append(f)
self._sort_formats(formats)
subtitles = {}
diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py
index 1629cdb..382f327 100644
--- a/youtube_dl/extractor/heise.py
+++ b/youtube_dl/extractor/heise.py
@@ -6,59 +6,58 @@ from ..utils import (
determine_ext,
int_or_none,
parse_iso8601,
+ xpath_text,
)
class HeiseIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://(?:www\.)?heise\.de/video/artikel/
- .+?(?P<id>[0-9]+)\.html(?:$|[?#])
- '''
- _TEST = {
- 'url': (
- 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'
- ),
+ _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html',
'md5': 'ffed432483e922e88545ad9f2f15d30e',
'info_dict': {
'id': '2404147',
'ext': 'mp4',
- 'title': (
- "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
- ),
+ 'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone",
'format_id': 'mp4_720p',
'timestamp': 1411812600,
'upload_date': '20140927',
- 'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
- 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
+ 'thumbnail': r're:^https?://.*/gallery/$',
}
- }
+ }, {
+ 'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
container_id = self._search_regex(
- r'<div class="videoplayerjw".*?data-container="([0-9]+)"',
+ r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
webpage, 'container ID')
sequenz_id = self._search_regex(
- r'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"',
+ r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
webpage, 'sequenz ID')
- data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id)
- doc = self._download_xml(data_url, video_id)
- info = {
- 'id': video_id,
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'timestamp': parse_iso8601(
- self._html_search_meta('date', webpage)),
- 'description': self._og_search_description(webpage),
- }
+ title = self._html_search_meta('fulltitle', webpage, default=None)
+ if not title or title == "c't":
+ title = self._search_regex(
+ r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
+ webpage, 'title')
- title = self._html_search_meta('fulltitle', webpage)
- if title:
- info['title'] = title
- else:
- info['title'] = self._og_search_title(webpage)
+ doc = self._download_xml(
+ 'http://www.heise.de/videout/feed', video_id, query={
+ 'container': container_id,
+ 'sequenz': sequenz_id,
+ })
formats = []
for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
@@ -74,6 +73,18 @@ class HeiseIE(InfoExtractor):
'height': height,
})
self._sort_formats(formats)
- info['formats'] = formats
- return info
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'description', webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or
+ self._og_search_thumbnail(webpage)),
+ 'timestamp': parse_iso8601(
+ self._html_search_meta('date', webpage)),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/hgtv.py b/youtube_dl/extractor/hgtv.py
index 69543bf..e854300 100644
--- a/youtube_dl/extractor/hgtv.py
+++ b/youtube_dl/extractor/hgtv.py
@@ -2,50 +2,6 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- js_to_json,
- smuggle_url,
-)
-
-
-class HGTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?hgtv\.ca/[^/]+/video/(?P<id>[^/]+)/video.html'
- _TEST = {
- 'url': 'http://www.hgtv.ca/homefree/video/overnight-success/video.html?v=738081859718&p=1&s=da#video',
- 'md5': '',
- 'info_dict': {
- 'id': 'aFH__I_5FBOX',
- 'ext': 'mp4',
- 'title': 'Overnight Success',
- 'description': 'After weeks of hard work, high stakes, breakdowns and pep talks, the final 2 contestants compete to win the ultimate dream.',
- 'uploader': 'SHWM-NEW',
- 'timestamp': 1470320034,
- 'upload_date': '20160804',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- embed_vars = self._parse_json(self._search_regex(
- r'(?s)embed_vars\s*=\s*({.*?});',
- webpage, 'embed vars'), display_id, js_to_json)
- return {
- '_type': 'url_transparent',
- 'url': smuggle_url(
- 'http://link.theplatform.com/s/dtjsEC/%s?mbr=true&manifest=m3u' % embed_vars['pid'], {
- 'force_smil_url': True
- }),
- 'series': embed_vars.get('show'),
- 'season_number': int_or_none(embed_vars.get('season')),
- 'episode_number': int_or_none(embed_vars.get('episode')),
- 'ie_key': 'ThePlatform',
- }
class HGTVComShowIE(InfoExtractor):
diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py
index f05d765..3a7a66a 100644
--- a/youtube_dl/extractor/hotstar.py
+++ b/youtube_dl/extractor/hotstar.py
@@ -34,11 +34,9 @@ class HotStarIE(InfoExtractor):
'only_matching': True,
}]
- _GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
- _GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
-
- def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True):
- json_data = super(HotStarIE, self)._download_json(url_or_request, video_id, note, fatal=fatal)
+ def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
+ json_data = super(HotStarIE, self)._download_json(
+ url_or_request, video_id, note, fatal=fatal, query=query)
if json_data['resultCode'] != 'OK':
if fatal:
raise ExtractorError(json_data['errorDescription'])
@@ -48,20 +46,37 @@ class HotStarIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
- self._GET_CONTENT_TEMPLATE % video_id,
- video_id)['contentInfo'][0]
+ 'http://account.hotstar.com/AVS/besc', video_id, query={
+ 'action': 'GetAggregatedContentDetails',
+ 'channel': 'PCTV',
+ 'contentId': video_id,
+ })['contentInfo'][0]
+ title = video_data['episodeTitle']
+
+ if video_data.get('encrypted') == 'Y':
+ raise ExtractorError('This video is DRM protected.', expected=True)
formats = []
- # PCTV for extracting f4m manifest
- for f in ('TABLET',):
+ for f in ('JIO',):
format_data = self._download_json(
- self._GET_CDN_TEMPLATE % (f, video_id, 'VOD'),
- video_id, 'Downloading %s JSON metadata' % f, fatal=False)
+ 'http://getcdn.hotstar.com/AVS/besc',
+ video_id, 'Downloading %s JSON metadata' % f,
+ fatal=False, query={
+ 'action': 'GetCDN',
+ 'asJson': 'Y',
+ 'channel': f,
+ 'id': video_id,
+ 'type': 'VOD',
+ })
if format_data:
- format_url = format_data['src']
+ format_url = format_data.get('src')
+ if not format_url:
+ continue
ext = determine_ext(format_url)
if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ m3u8_id='hls', fatal=False))
elif ext == 'f4m':
# produce broken files
continue
@@ -75,9 +90,12 @@ class HotStarIE(InfoExtractor):
return {
'id': video_id,
- 'title': video_data['episodeTitle'],
+ 'title': title,
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'timestamp': int_or_none(video_data.get('broadcastDate')),
'formats': formats,
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('episodeNumber')),
+ 'series': video_data.get('contentTitle'),
}
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 98f408c..c1921cb 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
get_element_by_attribute,
int_or_none,
@@ -51,6 +52,33 @@ class InstagramIE(InfoExtractor):
'skip_download': True,
},
}, {
+ # multi video post
+ 'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/',
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'BQ0dSaohpPW',
+ 'ext': 'mp4',
+ 'title': 'Video 1',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'BQ0dTpOhuHT',
+ 'ext': 'mp4',
+ 'title': 'Video 2',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'BQ0dT7RBFeF',
+ 'ext': 'mp4',
+ 'title': 'Video 3',
+ },
+ }],
+ 'info_dict': {
+ 'id': 'BQ0eAlwhDrw',
+ 'title': 'Post by instagram',
+ 'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
+ },
+ }, {
'url': 'https://instagram.com/p/-Cmh1cukG2/',
'only_matching': True,
}, {
@@ -113,6 +141,32 @@ class InstagramIE(InfoExtractor):
'timestamp': int_or_none(comment.get('created_at')),
} for comment in media.get(
'comments', {}).get('nodes', []) if comment.get('text')]
+ if not video_url:
+ edges = try_get(
+ media, lambda x: x['edge_sidecar_to_children']['edges'],
+ list) or []
+ if edges:
+ entries = []
+ for edge_num, edge in enumerate(edges, start=1):
+ node = try_get(edge, lambda x: x['node'], dict)
+ if not node:
+ continue
+ node_video_url = try_get(node, lambda x: x['video_url'], compat_str)
+ if not node_video_url:
+ continue
+ entries.append({
+ 'id': node.get('shortcode') or node['id'],
+ 'title': 'Video %d' % edge_num,
+ 'url': node_video_url,
+ 'thumbnail': node.get('display_url'),
+ 'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
+ 'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
+ 'view_count': int_or_none(node.get('video_view_count')),
+ })
+ return self.playlist_result(
+ entries, video_id,
+ 'Post by %s' % uploader_id if uploader_id else None,
+ description)
if not video_url:
video_url = self._og_search_video_url(webpage, secure=False)
diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py
index 0fe5768..a29e6a5 100644
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@@ -8,12 +8,12 @@ from .common import InfoExtractor
from ..utils import (
determine_ext,
js_to_json,
- sanitized_Request,
)
class IPrimaIE(InfoExtractor):
_VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
+ _GEO_BYPASS = False
_TESTS = [{
'url': 'http://play.iprima.cz/gondici-s-r-o-33',
@@ -29,6 +29,10 @@ class IPrimaIE(InfoExtractor):
}, {
'url': 'http://play.iprima.cz/particka/particka-92',
'only_matching': True,
+ }, {
+ # geo restricted
+ 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -38,11 +42,13 @@ class IPrimaIE(InfoExtractor):
video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
- req = sanitized_Request(
- 'http://play.iprima.cz/prehravac/init?_infuse=1'
- '&_ts=%s&productId=%s' % (round(time.time()), video_id))
- req.add_header('Referer', url)
- playerpage = self._download_webpage(req, video_id, note='Downloading player')
+ playerpage = self._download_webpage(
+ 'http://play.iprima.cz/prehravac/init',
+ video_id, note='Downloading player', query={
+ '_infuse': 1,
+ '_ts': round(time.time()),
+ 'productId': video_id,
+ }, headers={'Referer': url})
formats = []
@@ -82,7 +88,7 @@ class IPrimaIE(InfoExtractor):
extract_formats(src)
if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
- self.raise_geo_restricted()
+ self.raise_geo_restricted(countries=['CZ'])
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py
index 01c7b30..2af6a6d 100644
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dl/extractor/iqiyi.py
@@ -173,11 +173,12 @@ class IqiyiIE(InfoExtractor):
}
}, {
'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
- 'md5': '667171934041350c5de3f5015f7f1152',
+ 'md5': 'b7dc800a4004b1b57749d9abae0472da',
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb',
'ext': 'mp4',
- 'title': '名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇',
+ # This can be either Simplified Chinese or Traditional Chinese
+ 'title': r're:^(?:名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇|名偵探柯南 國語版:第752集 迫近灰原秘密的黑影 下篇)$',
},
'skip': 'Geo-restricted to China',
}, {
diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py
index b0d8604..021c6b2 100644
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@@ -24,6 +24,7 @@ from ..utils import (
class ITVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
+ _GEO_COUNTRIES = ['GB']
_TEST = {
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
'info_dict': {
@@ -98,7 +99,11 @@ class ITVIE(InfoExtractor):
headers=headers, data=etree.tostring(req_env))
playlist = xpath_element(resp_env, './/Playlist')
if playlist is None:
+ fault_code = xpath_text(resp_env, './/faultcode')
fault_string = xpath_text(resp_env, './/faultstring')
+ if fault_code == 'InvalidGeoRegion':
+ self.raise_geo_restricted(
+ msg=fault_string, countries=self._GEO_COUNTRIES)
raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py
index 3d3c150..cb51cef 100644
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -16,6 +16,8 @@ class IviIE(InfoExtractor):
IE_DESC = 'ivi.ru'
IE_NAME = 'ivi'
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['RU']
_TESTS = [
# Single movie
@@ -91,7 +93,11 @@ class IviIE(InfoExtractor):
if 'error' in video_json:
error = video_json['error']
- if error['origin'] == 'NoRedisValidData':
+ origin = error['origin']
+ if origin == 'NotAllowedForLocation':
+ self.raise_geo_restricted(
+ msg=error['message'], countries=self._GEO_COUNTRIES)
+ elif origin == 'NoRedisValidData':
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
raise ExtractorError(
'Unable to download video %s: %s' % (video_id, error['message']),
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py
index aff7ab4..33d55f7 100644
--- a/youtube_dl/extractor/jwplatform.py
+++ b/youtube_dl/extractor/jwplatform.py
@@ -4,139 +4,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- determine_ext,
- float_or_none,
- int_or_none,
- js_to_json,
- mimetype2ext,
- urljoin,
-)
-class JWPlatformBaseIE(InfoExtractor):
- @staticmethod
- def _find_jwplayer_data(webpage):
- # TODO: Merge this with JWPlayer-related codes in generic.py
-
- mobj = re.search(
- r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
- webpage)
- if mobj:
- return mobj.group('options')
-
- def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
- jwplayer_data = self._parse_json(
- self._find_jwplayer_data(webpage), video_id,
- transform_source=js_to_json)
- return self._parse_jwplayer_data(
- jwplayer_data, video_id, *args, **kwargs)
-
- def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
- m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
- # JWPlayer backward compatibility: flattened playlists
- # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
- if 'playlist' not in jwplayer_data:
- jwplayer_data = {'playlist': [jwplayer_data]}
-
- entries = []
-
- # JWPlayer backward compatibility: single playlist item
- # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
- if not isinstance(jwplayer_data['playlist'], list):
- jwplayer_data['playlist'] = [jwplayer_data['playlist']]
-
- for video_data in jwplayer_data['playlist']:
- # JWPlayer backward compatibility: flattened sources
- # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
- if 'sources' not in video_data:
- video_data['sources'] = [video_data]
-
- this_video_id = video_id or video_data['mediaid']
-
- formats = []
- for source in video_data['sources']:
- source_url = self._proto_relative_url(source['file'])
- if base_url:
- source_url = compat_urlparse.urljoin(base_url, source_url)
- source_type = source.get('type') or ''
- ext = mimetype2ext(source_type) or determine_ext(source_url)
- if source_type == 'hls' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
- elif ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- source_url, this_video_id, mpd_id=mpd_id, fatal=False))
- # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
- elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
- formats.append({
- 'url': source_url,
- 'vcodec': 'none',
- 'ext': ext,
- })
- else:
- height = int_or_none(source.get('height'))
- if height is None:
- # Often no height is provided but there is a label in
- # format like 1080p.
- height = int_or_none(self._search_regex(
- r'^(\d{3,})[pP]$', source.get('label') or '',
- 'height', default=None))
- a_format = {
- 'url': source_url,
- 'width': int_or_none(source.get('width')),
- 'height': height,
- 'ext': ext,
- }
- if source_url.startswith('rtmp'):
- a_format['ext'] = 'flv'
-
- # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
- # of jwplayer.flash.swf
- rtmp_url_parts = re.split(
- r'((?:mp4|mp3|flv):)', source_url, 1)
- if len(rtmp_url_parts) == 3:
- rtmp_url, prefix, play_path = rtmp_url_parts
- a_format.update({
- 'url': rtmp_url,
- 'play_path': prefix + play_path,
- })
- if rtmp_params:
- a_format.update(rtmp_params)
- formats.append(a_format)
- self._sort_formats(formats)
-
- subtitles = {}
- tracks = video_data.get('tracks')
- if tracks and isinstance(tracks, list):
- for track in tracks:
- if track.get('kind') != 'captions':
- continue
- track_url = urljoin(base_url, track.get('file'))
- if not track_url:
- continue
- subtitles.setdefault(track.get('label') or 'en', []).append({
- 'url': self._proto_relative_url(track_url)
- })
-
- entries.append({
- 'id': this_video_id,
- 'title': video_data['title'] if require_title else video_data.get('title'),
- 'description': video_data.get('description'),
- 'thumbnail': self._proto_relative_url(video_data.get('image')),
- 'timestamp': int_or_none(video_data.get('pubdate')),
- 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
- 'subtitles': subtitles,
- 'formats': formats,
- })
- if len(entries) == 1:
- return entries[0]
- else:
- return self.playlist_result(entries)
-
-
-class JWPlatformIE(JWPlatformBaseIE):
+class JWPlatformIE(InfoExtractor):
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TEST = {
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index 5ef382f..54374ea 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -23,11 +23,11 @@ class KalturaIE(InfoExtractor):
(?:
kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
https?://
- (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/
+ (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
(?:
(?:
# flash player
- index\.php/kwidget|
+ index\.php/(?:kwidget|extwidget/preview)|
# html5 player
html5/html5lib/[^/]+/mwEmbedFrame\.php
)
@@ -94,6 +94,14 @@ class KalturaIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ },
+ {
+ 'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
+ 'only_matching': True,
}
]
@@ -112,7 +120,7 @@ class KalturaIE(InfoExtractor):
re.search(
r'''(?xs)
(?P<q1>["\'])
- (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
+ (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
(?P=q1).*?
(?:
entry_?[Ii]d|
@@ -209,6 +217,8 @@ class KalturaIE(InfoExtractor):
partner_id = params['wid'][0][1:]
elif 'p' in params:
partner_id = params['p'][0]
+ elif 'partner_id' in params:
+ partner_id = params['partner_id'][0]
else:
raise ExtractorError('Invalid URL', expected=True)
if 'entry_id' in params:
diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py
index 4321f90..9eda956 100644
--- a/youtube_dl/extractor/leeco.py
+++ b/youtube_dl/extractor/leeco.py
@@ -30,7 +30,7 @@ from ..utils import (
class LeIE(InfoExtractor):
IE_DESC = '乐视网'
_VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
-
+ _GEO_COUNTRIES = ['CN']
_URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
_TESTS = [{
@@ -126,10 +126,9 @@ class LeIE(InfoExtractor):
if playstatus['status'] == 0:
flag = playstatus['flag']
if flag == 1:
- msg = 'Country %s auth error' % playstatus['country']
+ self.raise_geo_restricted()
else:
- msg = 'Generic error. flag = %d' % flag
- raise ExtractorError(msg, expected=True)
+ raise ExtractorError('Generic error. flag = %d' % flag, expected=True)
def _real_extract(self, url):
media_id = self._match_id(url)
diff --git a/youtube_dl/extractor/lemonde.py b/youtube_dl/extractor/lemonde.py
index 42568f3..3306892 100644
--- a/youtube_dl/extractor/lemonde.py
+++ b/youtube_dl/extractor/lemonde.py
@@ -7,20 +7,40 @@ class LemondeIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
_TESTS = [{
'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
- 'md5': '01fb3c92de4c12c573343d63e163d302',
+ 'md5': 'da120c8722d8632eec6ced937536cc98',
'info_dict': {
'id': 'lqm3kl',
'ext': 'mp4',
'title': "Comprendre l'affaire Bygmalion en 5 minutes",
'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 320,
+ 'duration': 309,
'upload_date': '20160119',
'timestamp': 1453194778,
'uploader_id': '3pmkp',
},
}, {
+ # standard iframe embed
+ 'url': 'http://www.lemonde.fr/les-decodeurs/article/2016/10/18/tout-comprendre-du-ceta-le-petit-cousin-du-traite-transatlantique_5015920_4355770.html',
+ 'info_dict': {
+ 'id': 'uzsxms',
+ 'ext': 'mp4',
+ 'title': "CETA : quelles suites pour l'accord commercial entre l'Europe et le Canada ?",
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 325,
+ 'upload_date': '20161021',
+ 'timestamp': 1477044540,
+ 'uploader_id': '3pmkp',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
'only_matching': True,
+ }, {
+ # YouTube embeds
+ 'url': 'http://www.lemonde.fr/pixels/article/2016/12/09/pourquoi-pewdiepie-superstar-de-youtube-a-menace-de-fermer-sa-chaine_5046649_4408996.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -30,5 +50,9 @@ class LemondeIE(InfoExtractor):
digiteka_url = self._proto_relative_url(self._search_regex(
r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
- webpage, 'digiteka url', group='url'))
- return self.url_result(digiteka_url, 'Digiteka')
+ webpage, 'digiteka url', group='url', default=None))
+
+ if digiteka_url:
+ return self.url_result(digiteka_url, 'Digiteka')
+
+ return self.url_result(url, 'Generic')
diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py
index e635f3c..422be25 100644
--- a/youtube_dl/extractor/limelight.py
+++ b/youtube_dl/extractor/limelight.py
@@ -4,10 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_HTTPError
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
+ unsmuggle_url,
+ ExtractorError,
)
@@ -15,20 +18,31 @@ class LimelightBaseIE(InfoExtractor):
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
- def _call_playlist_service(self, item_id, method, fatal=True):
- return self._download_json(
- self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
- item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal)
+ def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
+ headers = {}
+ if referer:
+ headers['Referer'] = referer
+ try:
+ return self._download_json(
+ self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
+ item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
+ if error == 'CountryDisabled':
+ self.raise_geo_restricted()
+ raise ExtractorError(error, expected=True)
+ raise
def _call_api(self, organization_id, item_id, method):
return self._download_json(
self._API_URL % (organization_id, self._API_PATH, item_id, method),
item_id, 'Downloading API %s JSON' % method)
- def _extract(self, item_id, pc_method, mobile_method, meta_method):
- pc = self._call_playlist_service(item_id, pc_method)
+ def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
+ pc = self._call_playlist_service(item_id, pc_method, referer=referer)
metadata = self._call_api(pc['orgId'], item_id, meta_method)
- mobile = self._call_playlist_service(item_id, mobile_method, fatal=False)
+ mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer)
return pc, mobile, metadata
def _extract_info(self, streams, mobile_urls, properties):
@@ -207,10 +221,14 @@ class LimelightMediaIE(LimelightBaseIE):
_API_PATH = 'media'
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
+ self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
pc, mobile, metadata = self._extract(
- video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties')
+ video_id, 'getPlaylistByMediaId',
+ 'getMobilePlaylistByMediaId', 'properties',
+ smuggled_data.get('source_url'))
return self._extract_info(
pc['playlistItems'][0].get('streams', []),
@@ -247,11 +265,13 @@ class LimelightChannelIE(LimelightBaseIE):
_API_PATH = 'channels'
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
channel_id = self._match_id(url)
pc, mobile, medias = self._extract(
channel_id, 'getPlaylistByChannelId',
- 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media')
+ 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
+ 'media', smuggled_data.get('source_url'))
entries = [
self._extract_info(
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py
index da94eab..d2f7529 100644
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -260,9 +260,24 @@ class LyndaCourseIE(LyndaBaseIE):
course_path = mobj.group('coursepath')
course_id = mobj.group('courseid')
+ item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path
+
course = self._download_json(
'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
- course_id, 'Downloading course JSON')
+ course_id, 'Downloading course JSON', fatal=False)
+
+ if not course:
+ webpage = self._download_webpage(url, course_id)
+ entries = [
+ self.url_result(
+ item_template % video_id, ie=LyndaIE.ie_key(),
+ video_id=video_id)
+ for video_id in re.findall(
+ r'data-video-id=["\'](\d+)', webpage)]
+ return self.playlist_result(
+ entries, course_id,
+ self._og_search_title(webpage, fatal=False),
+ self._og_search_description(webpage))
if course.get('Status') == 'NotFound':
raise ExtractorError(
@@ -283,7 +298,7 @@ class LyndaCourseIE(LyndaBaseIE):
if video_id:
entries.append({
'_type': 'url_transparent',
- 'url': 'https://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
+ 'url': item_template % video_id,
'ie_key': LyndaIE.ie_key(),
'chapter': chapter.get('Title'),
'chapter_number': int_or_none(chapter.get('ChapterIndex')),
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py
index 9880924..28f59f6 100644
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -6,12 +6,12 @@ from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_unquote,
+ compat_urllib_parse_urlencode,
)
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
- urlencode_postdata,
get_element_by_attribute,
mimetype2ext,
)
@@ -50,6 +50,21 @@ class MetacafeIE(InfoExtractor):
},
'skip': 'Page is temporarily unavailable.',
},
+ # metacafe video with family filter
+ {
+ 'url': 'http://www.metacafe.com/watch/2155630/adult_art_by_david_hart_156/',
+ 'md5': 'b06082c5079bbdcde677a6291fbdf376',
+ 'info_dict': {
+ 'id': '2155630',
+ 'ext': 'mp4',
+ 'title': 'Adult Art By David Hart 156',
+ 'uploader': '63346',
+ 'description': 'md5:9afac8fc885252201ad14563694040fc',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
# AnyClip video
{
'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
@@ -112,22 +127,6 @@ class MetacafeIE(InfoExtractor):
def report_disclaimer(self):
self.to_screen('Retrieving disclaimer')
- def _confirm_age(self):
- # Retrieve disclaimer
- self.report_disclaimer()
- self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer')
-
- # Confirm age
- self.report_age_confirmation()
- self._download_webpage(
- self._FILTER_POST, None, False, 'Unable to confirm age',
- data=urlencode_postdata({
- 'filters': '0',
- 'submit': "Continue - I'm over 18",
- }), headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- })
-
def _real_extract(self, url):
# Extract id and simplified title from URL
video_id, display_id = re.match(self._VALID_URL, url).groups()
@@ -143,13 +142,15 @@ class MetacafeIE(InfoExtractor):
if prefix == 'cb':
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
- # self._confirm_age()
+ headers = {
+ # Disable family filter
+ 'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False})
+ }
# AnyClip videos require the flashversion cookie so that we get the link
# to the mp4 file
- headers = {}
if video_id.startswith('an-'):
- headers['Cookie'] = 'flashVersion=0;'
+ headers['Cookie'] += 'flashVersion=0; '
# Retrieve video webpage to extract further information
webpage = self._download_webpage(url, video_id, headers=headers)
diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py
index 659ede8..d53d96a 100644
--- a/youtube_dl/extractor/mgtv.py
+++ b/youtube_dl/extractor/mgtv.py
@@ -2,16 +2,17 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import int_or_none
class MGTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
IE_DESC = '芒果TV'
_TESTS = [{
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
- 'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
+ 'md5': 'b1ffc0fc163152acf6beaa81832c9ee7',
'info_dict': {
'id': '3116640',
'ext': 'mp4',
@@ -21,48 +22,45 @@ class MGTVIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
- # no tbr extracted from stream_url
- 'url': 'http://www.mgtv.com/v/1/1/f/3324755.html',
+ 'url': 'http://www.mgtv.com/b/301817/3826653.html',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
api_data = self._download_json(
- 'http://v.api.mgtv.com/player/video', video_id,
+ 'http://pcweb.api.mgtv.com/player/video', video_id,
query={'video_id': video_id},
headers=self.geo_verification_headers())['data']
info = api_data['info']
+ title = info['title'].strip()
+ stream_domain = api_data['stream_domain'][0]
formats = []
for idx, stream in enumerate(api_data['stream']):
- stream_url = stream.get('url')
- if not stream_url:
+ stream_path = stream.get('url')
+ if not stream_path:
+ continue
+ format_data = self._download_json(
+ stream_domain + stream_path, video_id,
+ note='Download video info for format #%d' % idx)
+ format_url = format_data.get('info')
+ if not format_url:
continue
tbr = int_or_none(self._search_regex(
- r'(\d+)\.mp4', stream_url, 'tbr', default=None))
-
- def extract_format(stream_url, format_id, idx, query={}):
- format_info = self._download_json(
- stream_url, video_id,
- note='Download video info for format %s' % (format_id or '#%d' % idx),
- query=query)
- return {
- 'format_id': format_id,
- 'url': format_info['info'],
- 'ext': 'mp4',
- 'tbr': tbr,
- }
-
- formats.append(extract_format(
- stream_url, 'hls-%d' % tbr if tbr else None, idx * 2))
- formats.append(extract_format(stream_url.replace(
- '/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031}))
+ r'_(\d+)_mp4/', format_url, 'tbr', default=None))
+ formats.append({
+ 'format_id': compat_str(tbr or idx),
+ 'url': format_url,
+ 'ext': 'mp4',
+ 'tbr': tbr,
+ 'protocol': 'm3u8_native',
+ })
self._sort_formats(formats)
return {
'id': video_id,
- 'title': info['title'].strip(),
+ 'title': title,
'formats': formats,
'description': info.get('desc'),
'duration': int_or_none(info.get('duration')),
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index 434a94d..d2a44d0 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -4,23 +4,26 @@ import re
from .common import InfoExtractor
from .theplatform import ThePlatformIE
+from .adobepass import AdobePassIE
+from ..compat import compat_urllib_parse_urlparse
from ..utils import (
find_xpath_attr,
lowercase_escape,
smuggle_url,
unescapeHTML,
update_url_query,
+ int_or_none,
)
-class NBCIE(InfoExtractor):
+class NBCIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
_TESTS = [
{
- 'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
+ 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
'info_dict': {
- 'id': '112966',
+ 'id': '2848237',
'ext': 'mp4',
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
@@ -69,7 +72,7 @@ class NBCIE(InfoExtractor):
# HLS streams requires the 'hdnea3' cookie
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
'info_dict': {
- 'id': 'n1806',
+ 'id': '101528f5a9e8127b107e98c5e6ce4638',
'ext': 'mp4',
'title': 'Goliath',
'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
@@ -87,21 +90,57 @@ class NBCIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
- [
- r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
- r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
- r'"embedURL"\s*:\s*"([^"]+)"'
- ],
- webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
- if theplatform_url.startswith('//'):
- theplatform_url = 'http:' + theplatform_url
- return {
+ info = {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
- 'url': smuggle_url(theplatform_url, {'source_url': url}),
'id': video_id,
}
+ video_data = None
+ preload = self._search_regex(
+ r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None)
+ if preload:
+ preload_data = self._parse_json(preload, video_id)
+ path = compat_urllib_parse_urlparse(url).path.rstrip('/')
+ entity_id = preload_data.get('xref', {}).get(path)
+ video_data = preload_data.get('entities', {}).get(entity_id)
+ if video_data:
+ query = {
+ 'mbr': 'true',
+ 'manifest': 'm3u',
+ }
+ video_id = video_data['guid']
+ title = video_data['title']
+ if video_data.get('entitlement') == 'auth':
+ resource = self._get_mvpd_resource(
+ 'nbcentertainment', title, video_id,
+ video_data.get('vChipRating'))
+ query['auth'] = self._extract_mvpd_auth(
+ url, video_id, 'nbcentertainment', resource)
+ theplatform_url = smuggle_url(update_url_query(
+ 'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
+ query), {'force_smil_url': True})
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'url': theplatform_url,
+ 'description': video_data.get('description'),
+ 'keywords': video_data.get('keywords'),
+ 'season_number': int_or_none(video_data.get('seasonNumber')),
+ 'episode_number': int_or_none(video_data.get('episodeNumber')),
+ 'series': video_data.get('showName'),
+ })
+ else:
+ theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
+ [
+ r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
+ r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
+ r'"embedURL"\s*:\s*"([^"]+)"'
+ ],
+ webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
+ if theplatform_url.startswith('//'):
+ theplatform_url = 'http:' + theplatform_url
+ info['url'] = smuggle_url(theplatform_url, {'source_url': url})
+ return info
class NBCSportsVPlayerIE(InfoExtractor):
diff --git a/youtube_dl/extractor/ninecninemedia.py b/youtube_dl/extractor/ninecninemedia.py
index ec4d675..d9943fc 100644
--- a/youtube_dl/extractor/ninecninemedia.py
+++ b/youtube_dl/extractor/ninecninemedia.py
@@ -19,6 +19,7 @@ class NineCNineMediaBaseIE(InfoExtractor):
class NineCNineMediaStackIE(NineCNineMediaBaseIE):
IE_NAME = '9c9media:stack'
+ _GEO_COUNTRIES = ['CA']
_VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py
index 70ff2ab..8b83e1f 100644
--- a/youtube_dl/extractor/noco.py
+++ b/youtube_dl/extractor/noco.py
@@ -23,7 +23,7 @@ from ..utils import (
class NocoIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
- _LOGIN_URL = 'http://noco.tv/do.php'
+ _LOGIN_URL = 'https://noco.tv/do.php'
_API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
_SUB_LANG_TEMPLATE = '&sub_lang=%s'
_NETRC_MACHINE = 'noco'
@@ -69,16 +69,17 @@ class NocoIE(InfoExtractor):
if username is None:
return
- login_form = {
- 'a': 'login',
- 'cookie': '1',
- 'username': username,
- 'password': password,
- }
- request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
- request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
-
- login = self._download_json(request, None, 'Logging in as %s' % username)
+ login = self._download_json(
+ self._LOGIN_URL, None, 'Logging in as %s' % username,
+ data=urlencode_postdata({
+ 'a': 'login',
+ 'cookie': '1',
+ 'username': username,
+ 'password': password,
+ }),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ })
if 'erreur' in login:
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index fc3c0cd..7fe79cb 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import random
import re
from .common import InfoExtractor
@@ -15,24 +14,7 @@ from ..utils import (
class NRKBaseIE(InfoExtractor):
- _faked_ip = None
-
- def _download_webpage_handle(self, *args, **kwargs):
- # NRK checks X-Forwarded-For HTTP header in order to figure out the
- # origin of the client behind proxy. This allows to bypass geo
- # restriction by faking this header's value to some Norway IP.
- # We will do so once we encounter any geo restriction error.
- if self._faked_ip:
- # NB: str is intentional
- kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip
- return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs)
-
- def _fake_ip(self):
- # Use fake IP from 37.191.128.0/17 in order to workaround geo
- # restriction
- def octet(lb=0, ub=255):
- return random.randint(lb, ub)
- self._faked_ip = '37.191.%d.%d' % (octet(128), octet())
+ _GEO_COUNTRIES = ['NO']
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -44,8 +26,6 @@ class NRKBaseIE(InfoExtractor):
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id
- http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {}
-
entries = []
conviva = data.get('convivaStatistics') or {}
@@ -90,7 +70,6 @@ class NRKBaseIE(InfoExtractor):
'duration': duration,
'subtitles': subtitles,
'formats': formats,
- 'http_headers': http_headers,
})
if not entries:
@@ -107,19 +86,17 @@ class NRKBaseIE(InfoExtractor):
}]
if not entries:
- message_type = data.get('messageType', '')
- # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
- if 'IsGeoBlocked' in message_type and not self._faked_ip:
- self.report_warning(
- 'Video is geo restricted, trying to fake IP')
- self._fake_ip()
- return self._real_extract(url)
-
MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
+ message_type = data.get('messageType', '')
+ # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
+ if 'IsGeoBlocked' in message_type:
+ self.raise_geo_restricted(
+ msg=MESSAGES.get('ProgramIsGeoBlocked'),
+ countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
message_type, message_type)),
@@ -188,12 +165,12 @@ class NRKIE(NRKBaseIE):
https?://
(?:
(?:www\.)?nrk\.no/video/PS\*|
- v8-psapi\.nrk\.no/mediaelement/
+ v8[-.]psapi\.nrk\.no/mediaelement/
)
)
- (?P<id>[^/?#&]+)
+ (?P<id>[^?#&]+)
'''
- _API_HOST = 'v8.psapi.nrk.no'
+ _API_HOST = 'v8-psapi.nrk.no'
_TESTS = [{
# video
'url': 'http://www.nrk.no/video/PS*150533',
@@ -220,6 +197,9 @@ class NRKIE(NRKBaseIE):
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
'only_matching': True,
}, {
+ 'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
+ 'only_matching': True,
+ }, {
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
'only_matching': True,
}]
diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py
index de1d6b0..df1ce3c 100644
--- a/youtube_dl/extractor/ondemandkorea.py
+++ b/youtube_dl/extractor/ondemandkorea.py
@@ -1,15 +1,16 @@
# coding: utf-8
from __future__ import unicode_literals
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
from ..utils import (
ExtractorError,
js_to_json,
)
-class OnDemandKoreaIE(JWPlatformBaseIE):
+class OnDemandKoreaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
+ _GEO_COUNTRIES = ['US', 'CA']
_TEST = {
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
'info_dict': {
@@ -35,7 +36,8 @@ class OnDemandKoreaIE(JWPlatformBaseIE):
if 'msg_block_01.png' in webpage:
self.raise_geo_restricted(
- 'This content is not available in your region')
+ msg='This content is not available in your region',
+ countries=self._GEO_COUNTRIES)
if 'This video is only available to ODK PLUS members.' in webpage:
raise ExtractorError(
diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py
index 0a501b3..94f5799 100644
--- a/youtube_dl/extractor/onet.py
+++ b/youtube_dl/extractor/onet.py
@@ -23,7 +23,7 @@ class OnetBaseIE(InfoExtractor):
return self._search_regex(
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
- def _extract_from_id(self, video_id, webpage):
+ def _extract_from_id(self, video_id, webpage=None):
response = self._download_json(
'http://qi.ckm.onetapi.pl/', video_id,
query={
@@ -74,8 +74,10 @@ class OnetBaseIE(InfoExtractor):
meta = video.get('meta', {})
- title = self._og_search_title(webpage, default=None) or meta['title']
- description = self._og_search_description(webpage, default=None) or meta.get('description')
+ title = (self._og_search_title(
+ webpage, default=None) if webpage else None) or meta['title']
+ description = (self._og_search_description(
+ webpage, default=None) if webpage else None) or meta.get('description')
duration = meta.get('length') or meta.get('lenght')
timestamp = parse_iso8601(meta.get('addDate'), ' ')
@@ -89,6 +91,18 @@ class OnetBaseIE(InfoExtractor):
}
+class OnetMVPIE(OnetBaseIE):
+ _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)'
+
+ _TEST = {
+ 'url': 'onetmvp:381027.1509591944',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ return self._extract_from_id(self._match_id(url))
+
+
class OnetIE(OnetBaseIE):
_VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
IE_NAME = 'onet.tv'
@@ -167,3 +181,44 @@ class OnetChannelIE(OnetBaseIE):
channel_title = strip_or_none(get_element_by_class('o_channelName', webpage))
channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage))
return self.playlist_result(entries, channel_id, channel_title, channel_description)
+
+
+class OnetPlIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)'
+ IE_NAME = 'onet.pl'
+
+ _TESTS = [{
+ 'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly',
+ 'md5': 'b94021eb56214c3969380388b6e73cb0',
+ 'info_dict': {
+ 'id': '1561707.1685479',
+ 'ext': 'mp4',
+ 'title': 'Ziobro wygrał kwalifikacje w Pjongczangu',
+ 'description': 'md5:61fb0740084d2d702ea96512a03585b4',
+ 'upload_date': '20170214',
+ 'timestamp': 1487078046,
+ },
+ }, {
+ 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ mvp_id = self._search_regex(
+ r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id')
+
+ return self.url_result(
+ 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
index 32289d8..fc7ff43 100644
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@@ -72,20 +72,25 @@ class OpenloadIE(InfoExtractor):
raise ExtractorError('File not found', expected=True)
ol_id = self._search_regex(
- '<span[^>]+id="[^"]+"[^>]*>([0-9]+)</span>',
+ '<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
webpage, 'openload ID')
- first_three_chars = int(float(ol_id[0:][:3]))
- fifth_char = int(float(ol_id[3:5]))
- urlcode = ''
- num = 5
+ first_char = int(ol_id[0])
+ urlcode = []
+ num = 1
while num < len(ol_id):
- urlcode += compat_chr(int(float(ol_id[num:][:3])) +
- first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2])))
+ i = ord(ol_id[num])
+ key = 0
+ if i <= 90:
+ key = i - 65
+ elif i >= 97:
+ key = 25 + i - 97
+ urlcode.append((key, compat_chr(int(ol_id[num + 2:num + 5]) // int(ol_id[num + 1]) - first_char)))
num += 5
- video_url = 'https://openload.co/stream/' + urlcode
+ video_url = 'https://openload.co/stream/' + ''.join(
+ [value for _, value in sorted(urlcode, key=lambda x: x[0])])
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 6baed77..3e51b4d 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -193,6 +193,8 @@ class PBSIE(InfoExtractor):
)
''' % '|'.join(list(zip(*_STATIONS))[0])
+ _GEO_COUNTRIES = ['US']
+
_TESTS = [
{
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
@@ -489,11 +491,13 @@ class PBSIE(InfoExtractor):
headers=self.geo_verification_headers())
if redirect_info['status'] == 'error':
+ message = self._ERRORS.get(
+ redirect_info['http_code'], redirect_info['message'])
+ if redirect_info['http_code'] == 403:
+ self.raise_geo_restricted(
+ msg=message, countries=self._GEO_COUNTRIES)
raise ExtractorError(
- '%s said: %s' % (
- self.IE_NAME,
- self._ERRORS.get(redirect_info['http_code'], redirect_info['message'])),
- expected=True)
+ '%s said: %s' % (self.IE_NAME, message), expected=True)
format_url = redirect_info.get('url')
if not format_url:
diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py
index 6a4580d..9f3501f 100644
--- a/youtube_dl/extractor/pinkbike.py
+++ b/youtube_dl/extractor/pinkbike.py
@@ -64,7 +64,8 @@ class PinkbikeIE(InfoExtractor):
'video:duration', webpage, 'duration'))
uploader = self._search_regex(
- r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
+ r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage,
+ 'uploader', fatal=False)
upload_date = unified_strdate(self._search_regex(
r'class="fullTime"[^>]+title="([^"]+)"',
webpage, 'upload date', fatal=False))
diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py
index 5c798e8..e0cbd04 100644
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dl/extractor/pluralsight.py
@@ -18,6 +18,7 @@ from ..utils import (
parse_duration,
qualities,
srt_subtitles_timecode,
+ update_url_query,
urlencode_postdata,
)
@@ -92,6 +93,10 @@ class PluralsightIE(PluralsightBaseIE):
raise ExtractorError('Unable to login: %s' % error, expected=True)
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
+ BLOCKED = 'Your account has been blocked due to suspicious activity'
+ if BLOCKED in response:
+ raise ExtractorError(
+ 'Unable to login: %s' % BLOCKED, expected=True)
raise ExtractorError('Unable to log in')
def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
@@ -327,25 +332,44 @@ class PluralsightCourseIE(PluralsightBaseIE):
# TODO: PSM cookie
course = self._download_json(
- '%s/data/course/%s' % (self._API_BASE, course_id),
- course_id, 'Downloading course JSON')
+ '%s/player/functions/rpc' % self._API_BASE, course_id,
+ 'Downloading course JSON',
+ data=json.dumps({
+ 'fn': 'bootstrapPlayer',
+ 'payload': {
+ 'courseId': course_id,
+ }
+ }).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json;charset=utf-8'
+ })['payload']['course']
title = course['title']
+ course_name = course['name']
+ course_data = course['modules']
description = course.get('description') or course.get('shortDescription')
- course_data = self._download_json(
- '%s/data/course/content/%s' % (self._API_BASE, course_id),
- course_id, 'Downloading course data JSON')
-
entries = []
for num, module in enumerate(course_data, 1):
+ author = module.get('author')
+ module_name = module.get('name')
+ if not author or not module_name:
+ continue
for clip in module.get('clips', []):
- player_parameters = clip.get('playerParameters')
- if not player_parameters:
+ clip_index = int_or_none(clip.get('index'))
+ if clip_index is None:
continue
+ clip_url = update_url_query(
+ '%s/player' % self._API_BASE, query={
+ 'mode': 'live',
+ 'course': course_name,
+ 'author': author,
+ 'name': module_name,
+ 'clip': clip_index,
+ })
entries.append({
'_type': 'url_transparent',
- 'url': '%s/training/player?%s' % (self._API_BASE, player_parameters),
+ 'url': clip_url,
'ie_key': PluralsightIE.ie_key(),
'chapter': module.get('title'),
'chapter_number': num,
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 017f6c5..9b41359 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -2,27 +2,27 @@
from __future__ import unicode_literals
import itertools
-import os
+# import os
import re
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
- compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
- compat_urllib_parse_urlparse,
+ # compat_urllib_parse_unquote,
+ # compat_urllib_parse_unquote_plus,
+ # compat_urllib_parse_urlparse,
)
from ..utils import (
ExtractorError,
int_or_none,
js_to_json,
orderedSet,
- sanitized_Request,
+ # sanitized_Request,
str_to_int,
)
-from ..aes import (
- aes_decrypt_text
-)
+# from ..aes import (
+# aes_decrypt_text
+# )
class PornHubIE(InfoExtractor):
@@ -109,10 +109,14 @@ class PornHubIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- req = sanitized_Request(
- 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
- req.add_header('Cookie', 'age_verified=1')
- webpage = self._download_webpage(req, video_id)
+ def dl_webpage(platform):
+ return self._download_webpage(
+ 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
+ video_id, headers={
+ 'Cookie': 'age_verified=1; platform=%s' % platform,
+ })
+
+ webpage = dl_webpage('pc')
error_msg = self._html_search_regex(
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
@@ -123,10 +127,19 @@ class PornHubIE(InfoExtractor):
'PornHub said: %s' % error_msg,
expected=True, video_id=video_id)
+ tv_webpage = dl_webpage('tv')
+
+ video_url = self._search_regex(
+ r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage,
+ 'video url', group='url')
+
+ title = self._search_regex(
+ r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
+
# video_title from flashvars contains whitespace instead of non-ASCII (see
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore.
- title = self._html_search_meta(
+ title = title or self._html_search_meta(
'twitter:title', webpage, default=None) or self._search_regex(
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
@@ -156,42 +169,6 @@ class PornHubIE(InfoExtractor):
comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
- video_urls = []
- for quote, video_url in re.findall(
- r'player_quality_[0-9]{3,4}p\s*=\s*(["\'])(.+?)\1;', webpage):
- video_urls.append(compat_urllib_parse_unquote(re.sub(
- r'{0}\s*\+\s*{0}'.format(quote), '', video_url)))
-
- if webpage.find('"encrypted":true') != -1:
- password = compat_urllib_parse_unquote_plus(
- self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
- video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
-
- formats = []
- for video_url in video_urls:
- path = compat_urllib_parse_urlparse(video_url).path
- extension = os.path.splitext(path)[1][1:]
- format = path.split('/')[5].split('_')[:2]
- format = '-'.join(format)
-
- m = re.match(r'^(?P<height>[0-9]+)[pP]-(?P<tbr>[0-9]+)[kK]$', format)
- if m is None:
- height = None
- tbr = None
- else:
- height = int(m.group('height'))
- tbr = int(m.group('tbr'))
-
- formats.append({
- 'url': video_url,
- 'ext': extension,
- 'format': format,
- 'format_id': format,
- 'tbr': tbr,
- 'height': height,
- })
- self._sort_formats(formats)
-
page_params = self._parse_json(self._search_regex(
r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
webpage, 'page parameters', group='data', default='{}'),
@@ -203,6 +180,7 @@ class PornHubIE(InfoExtractor):
return {
'id': video_id,
+ 'url': video_url,
'uploader': video_uploader,
'title': title,
'thumbnail': thumbnail,
@@ -211,7 +189,7 @@ class PornHubIE(InfoExtractor):
'like_count': like_count,
'dislike_count': dislike_count,
'comment_count': comment_count,
- 'formats': formats,
+ # 'formats': formats,
'age_limit': 18,
'tags': tags,
'categories': categories,
diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py
index 1a0cce7..2831368 100644
--- a/youtube_dl/extractor/pornoxo.py
+++ b/youtube_dl/extractor/pornoxo.py
@@ -2,13 +2,13 @@ from __future__ import unicode_literals
import re
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
from ..utils import (
str_to_int,
)
-class PornoXOIE(JWPlatformBaseIE):
+class PornoXOIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
_TEST = {
'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py
index 5091d84..1245309 100644
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -424,3 +424,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
return self._extract_clip(url, webpage)
elif page_type == 'playlist':
return self._extract_playlist(url, webpage)
+ else:
+ raise ExtractorError(
+ 'Unsupported page type %s' % page_type, expected=True)
diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py
index 422c02c..d338b3a 100644
--- a/youtube_dl/extractor/rentv.py
+++ b/youtube_dl/extractor/rentv.py
@@ -2,11 +2,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from .jwplatform import JWPlatformBaseIE
from ..compat import compat_str
-class RENTVIE(JWPlatformBaseIE):
+class RENTVIE(InfoExtractor):
_VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://ren.tv/video/epizod/118577',
diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py
index 3bfe934..5164401 100644
--- a/youtube_dl/extractor/rudo.py
+++ b/youtube_dl/extractor/rudo.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
from ..utils import (
js_to_json,
get_element_by_class,
@@ -11,7 +11,7 @@ from ..utils import (
)
-class RudoIE(JWPlatformBaseIE):
+class RudoIE(InfoExtractor):
_VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
_TEST = {
diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py
index 94a2a37..b5e76c9 100644
--- a/youtube_dl/extractor/screencastomatic.py
+++ b/youtube_dl/extractor/screencastomatic.py
@@ -1,11 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
from ..utils import js_to_json
-class ScreencastOMaticIE(JWPlatformBaseIE):
+class ScreencastOMaticIE(InfoExtractor):
_VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
_TEST = {
'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py
new file mode 100644
index 0000000..597d6f5
--- /dev/null
+++ b/youtube_dl/extractor/scrippsnetworks.py
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .adobepass import AdobePassIE
+from ..utils import (
+ int_or_none,
+ smuggle_url,
+ update_url_query,
+)
+
+
+class ScrippsNetworksWatchIE(AdobePassIE):
+ IE_NAME = 'scrippsnetworks:watch'
+ _VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://watch.hgtv.com/player.HNT.html#0256538',
+ 'md5': '26545fd676d939954c6808274bdb905a',
+ 'info_dict': {
+ 'id': '0256538',
+ 'ext': 'mp4',
+ 'title': 'Seeking a Wow House',
+ 'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.',
+ 'uploader': 'SCNI',
+ 'upload_date': '20170207',
+ 'timestamp': 1486450493,
+ },
+ 'skip': 'requires TV provider authentication',
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ channel = self._parse_json(self._search_regex(
+ r'"channels"\s*:\s*(\[.+\])',
+ webpage, 'channels'), video_id)[0]
+ video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id)
+ title = video_data['title']
+ release_url = video_data['releaseUrl']
+ if video_data.get('restricted'):
+ requestor_id = self._search_regex(
+ r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id')
+ resource = self._get_mvpd_resource(
+ requestor_id, title, video_id,
+ video_data.get('ratings', [{}])[0].get('rating'))
+ auth = self._extract_mvpd_auth(
+ url, video_id, requestor_id, resource)
+ release_url = update_url_query(release_url, {'auth': auth})
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'url': smuggle_url(release_url, {'force_smil_url': True}),
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnailUrl'),
+ 'series': video_data.get('showTitle'),
+ 'season_number': int_or_none(video_data.get('season')),
+ 'episode_number': int_or_none(video_data.get('episodeNumber')),
+ 'ie_key': 'ThePlatform',
+ }
diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py
index 9880a5a..9d96529 100644
--- a/youtube_dl/extractor/sendtonews.py
+++ b/youtube_dl/extractor/sendtonews.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
from ..utils import (
float_or_none,
parse_iso8601,
@@ -14,7 +14,7 @@ from ..utils import (
)
-class SendtoNewsIE(JWPlatformBaseIE):
+class SendtoNewsIE(InfoExtractor):
_VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)'
_TEST = {
diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py
index d3aba58..547be8f 100644
--- a/youtube_dl/extractor/sixplay.py
+++ b/youtube_dl/extractor/sixplay.py
@@ -1,64 +1,101 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
- qualities,
- int_or_none,
- mimetype2ext,
determine_ext,
+ int_or_none,
+ try_get,
+ qualities,
)
class SixPlayIE(InfoExtractor):
+ IE_NAME = '6play'
_VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
_TEST = {
- 'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320',
+ 'url': 'http://www.6play.fr/le-meilleur-patissier-p_1807/le-meilleur-patissier-special-fetes-mercredi-a-21-00-sur-m6-c_11638450',
'md5': '42310bffe4ba3982db112b9cd3467328',
'info_dict': {
- 'id': '11495320',
+ 'id': '11638450',
'ext': 'mp4',
- 'title': 'Jamel et ses amis au Marrakech du rire 2015',
- 'description': 'md5:ba2149d5c321d5201b78070ee839d872',
+ 'title': 'Le Meilleur Pâtissier, spécial fêtes mercredi à 21:00 sur M6',
+ 'description': 'md5:308853f6a5f9e2d55a30fc0654de415f',
+ 'duration': 39,
+ 'series': 'Le meilleur pâtissier',
+ },
+ 'params': {
+ 'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
- clip_data = self._download_json(
- 'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id,
- video_id)
- video_data = clip_data['videoInfo']
+ data = self._download_json(
+ 'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/6play/videos/clip_%s' % video_id,
+ video_id, query={
+ 'csa': 5,
+ 'with': 'clips',
+ })
+
+ clip_data = data['clips'][0]
+ title = clip_data['title']
+
+ urls = []
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
formats = []
- for source in clip_data['sources']:
- source_type, source_url = source.get('type'), source.get('src')
- if not source_url or source_type == 'hls/primetime':
+ for asset in clip_data['assets']:
+ asset_url = asset.get('full_physical_path')
+ protocol = asset.get('protocol')
+ if not asset_url or protocol == 'primetime' or asset_url in urls:
continue
- ext = mimetype2ext(source_type) or determine_ext(source_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- source_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- formats.extend(self._extract_f4m_formats(
- source_url.replace('.m3u8', '.f4m'),
- video_id, f4m_id='hds', fatal=False))
- elif ext == 'mp4':
- quality = source.get('quality')
+ urls.append(asset_url)
+ container = asset.get('video_container')
+ ext = determine_ext(asset_url)
+ if container == 'm3u8' or ext == 'm3u8':
+ if protocol == 'usp':
+ asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
+ formats.extend(self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ formats.extend(self._extract_f4m_formats(
+ asset_url.replace('.m3u8', '.f4m'),
+ video_id, f4m_id='hds', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ asset_url.replace('.m3u8', '.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ formats.extend(self._extract_ism_formats(
+ re.sub(r'/[^/]+\.m3u8', '/Manifest', asset_url),
+ video_id, ism_id='mss', fatal=False))
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif container == 'mp4' or ext == 'mp4':
+ quality = asset.get('video_quality')
formats.append({
- 'url': source_url,
+ 'url': asset_url,
'format_id': quality,
'quality': quality_key(quality),
'ext': ext,
})
self._sort_formats(formats)
+ def get(getter):
+ for src in (data, clip_data):
+ v = try_get(src, getter, compat_str)
+ if v:
+ return v
+
return {
'id': video_id,
- 'title': video_data['title'].strip(),
- 'description': video_data.get('description'),
- 'duration': int_or_none(video_data.get('duration')),
- 'series': video_data.get('titlePgm'),
+ 'title': title,
+ 'description': get(lambda x: x['description']),
+ 'duration': int_or_none(clip_data.get('duration')),
+ 'series': get(lambda x: x['program']['title']),
'formats': formats,
}
diff --git a/youtube_dl/extractor/skylinewebcams.py b/youtube_dl/extractor/skylinewebcams.py
new file mode 100644
index 0000000..5b4aaac
--- /dev/null
+++ b/youtube_dl/extractor/skylinewebcams.py
@@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class SkylineWebcamsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?skylinewebcams\.com/[^/]+/webcam/(?:[^/]+/)+(?P<id>[^/]+)\.html'
+ _TEST = {
+ 'url': 'https://www.skylinewebcams.com/it/webcam/italia/lazio/roma/scalinata-piazza-di-spagna-barcaccia.html',
+ 'info_dict': {
+ 'id': 'scalinata-piazza-di-spagna-barcaccia',
+ 'ext': 'mp4',
+ 'title': 're:^Live Webcam Scalinata di Piazza di Spagna - La Barcaccia [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'Roma, veduta sulla Scalinata di Piazza di Spagna e sulla Barcaccia',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ stream_url = self._search_regex(
+ r'url\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage,
+ 'stream url', group='url')
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+
+ return {
+ 'id': video_id,
+ 'url': stream_url,
+ 'ext': 'mp4',
+ 'title': self._live_title(title),
+ 'description': description,
+ 'is_live': True,
+ }
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index 30760ca..7da12ce 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -108,12 +108,11 @@ class SohuIE(InfoExtractor):
if vid_data['play'] != 1:
if vid_data.get('status') == 12:
raise ExtractorError(
- 'Sohu said: There\'s something wrong in the video.',
+ '%s said: There\'s something wrong in the video.' % self.IE_NAME,
expected=True)
else:
- raise ExtractorError(
- 'Sohu said: The video is only licensed to users in Mainland China.',
- expected=True)
+ self.raise_geo_restricted(
+ '%s said: The video is only licensed to users in Mainland China.' % self.IE_NAME)
formats_json = {}
for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py
index 123c33a..3394c7e 100644
--- a/youtube_dl/extractor/spankbang.py
+++ b/youtube_dl/extractor/spankbang.py
@@ -23,6 +23,10 @@ class SpankBangIE(InfoExtractor):
# 480p only
'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
'only_matching': True,
+ }, {
+ # no uploader
+ 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -48,7 +52,7 @@ class SpankBangIE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._search_regex(
r'class="user"[^>]*><img[^>]+>([^<]+)',
- webpage, 'uploader', fatal=False)
+ webpage, 'uploader', default=None)
age_limit = self._rta_search(webpage)
diff --git a/youtube_dl/extractor/sprout.py b/youtube_dl/extractor/sprout.py
new file mode 100644
index 0000000..8467bf4
--- /dev/null
+++ b/youtube_dl/extractor/sprout.py
@@ -0,0 +1,52 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .adobepass import AdobePassIE
+from ..utils import (
+ extract_attributes,
+ update_url_query,
+ smuggle_url,
+)
+
+
+class SproutIE(AdobePassIE):
+ _VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
+ 'md5': '74bf14128578d1e040c3ebc82088f45f',
+ 'info_dict': {
+ 'id': '9dexnwtmh8_X',
+ 'ext': 'mp4',
+ 'title': 'A Cowboy Adventure',
+ 'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.',
+ 'timestamp': 1437758640,
+ 'upload_date': '20150724',
+ 'uploader': 'NBCU-SPROUT-NEW',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ video_component = self._search_regex(
+ r'(?s)(<div[^>]+data-component="video"[^>]*?>)',
+ webpage, 'video component', default=None)
+ if video_component:
+ options = self._parse_json(extract_attributes(
+ video_component)['data-options'], video_id)
+ theplatform_url = options['video']
+ query = {
+ 'mbr': 'true',
+ 'manifest': 'm3u',
+ }
+ if options.get('protected'):
+ query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout')
+ theplatform_url = smuggle_url(update_url_query(
+ theplatform_url, query), {'force_smil_url': True})
+ else:
+ iframe = self._search_regex(
+ r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)',
+ webpage, 'iframe')
+ theplatform_url = extract_attributes(iframe)['src']
+
+ return self.url_result(theplatform_url, 'ThePlatform')
diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py
index 319a48a..bb73eb1 100644
--- a/youtube_dl/extractor/srgssr.py
+++ b/youtube_dl/extractor/srgssr.py
@@ -14,6 +14,8 @@ from ..utils import (
class SRGSSRIE(InfoExtractor):
_VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['CH']
_ERRORS = {
'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
@@ -40,8 +42,12 @@ class SRGSSRIE(InfoExtractor):
media_id)[media_type.capitalize()]
if media_data.get('block') and media_data['block'] in self._ERRORS:
- raise ExtractorError('%s said: %s' % (
- self.IE_NAME, self._ERRORS[media_data['block']]), expected=True)
+ message = self._ERRORS[media_data['block']]
+ if media_data['block'] == 'GEOBLOCK':
+ self.raise_geo_restricted(
+ msg=message, countries=self._GEO_COUNTRIES)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, message), expected=True)
return media_data
diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py
index 10cf808..1b5afb7 100644
--- a/youtube_dl/extractor/svt.py
+++ b/youtube_dl/extractor/svt.py
@@ -13,6 +13,8 @@ from ..utils import (
class SVTBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['SE']
+
def _extract_video(self, video_info, video_id):
formats = []
for vr in video_info['videoReferences']:
@@ -38,7 +40,9 @@ class SVTBaseIE(InfoExtractor):
'url': vurl,
})
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
- self.raise_geo_restricted('This video is only available in Sweden')
+ self.raise_geo_restricted(
+ 'This video is only available in Sweden',
+ countries=self._GEO_COUNTRIES)
self._sort_formats(formats)
subtitles = {}
diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py
index 4043fcb..82d73c3 100644
--- a/youtube_dl/extractor/telequebec.py
+++ b/youtube_dl/extractor/telequebec.py
@@ -2,7 +2,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ smuggle_url,
+)
class TeleQuebecIE(InfoExtractor):
@@ -28,7 +31,7 @@ class TeleQuebecIE(InfoExtractor):
return {
'_type': 'url_transparent',
'id': media_id,
- 'url': 'limelight:media:' + media_data['streamInfo']['sourceId'],
+ 'url': smuggle_url('limelight:media:' + media_data['streamInfo']['sourceId'], {'geo_countries': ['CA']}),
'title': media_data['title'],
'description': media_data.get('descriptions', [{'text': None}])[0].get('text'),
'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000),
diff --git a/youtube_dl/extractor/tfo.py b/youtube_dl/extractor/tfo.py
index 6f1eeac..0e2370c 100644
--- a/youtube_dl/extractor/tfo.py
+++ b/youtube_dl/extractor/tfo.py
@@ -8,10 +8,12 @@ from ..utils import (
HEADRequest,
ExtractorError,
int_or_none,
+ clean_html,
)
class TFOIE(InfoExtractor):
+ _GEO_COUNTRIES = ['CA']
_VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
_TEST = {
'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
@@ -36,7 +38,9 @@ class TFOIE(InfoExtractor):
'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value,
})
if infos.get('success') == 0:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, infos['msg']), expected=True)
+ if infos.get('code') == 'ErrGeoBlocked':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(infos['msg'])), expected=True)
video_data = infos['data']
return {
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index 192d8fa..9a424b1 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -179,10 +179,12 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
if m:
return [m.group('url')]
+ # Are whitesapces ignored in URLs?
+ # https://github.com/rg3/youtube-dl/issues/12044
matches = re.findall(
- r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
+ r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
if matches:
- return list(zip(*matches))[1]
+ return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
@@ -306,9 +308,10 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
},
}]
- def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
+ def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
entry = self._download_json(real_url, video_id)['entries'][0]
+ main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
formats = []
subtitles = {}
@@ -333,7 +336,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
if asset_type in asset_types_query:
query.update(asset_types_query[asset_type])
cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
- smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
+ main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
formats.extend(cur_formats)
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py
index ce1326c..b8504f0 100644
--- a/youtube_dl/extractor/thescene.py
+++ b/youtube_dl/extractor/thescene.py
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urlparse
-from ..utils import qualities
+from ..utils import (
+ int_or_none,
+ qualities,
+)
class TheSceneIE(InfoExtractor):
@@ -16,6 +19,11 @@ class TheSceneIE(InfoExtractor):
'ext': 'mp4',
'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear',
'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear',
+ 'duration': 127,
+ 'series': 'Style.com Fashion Shows',
+ 'season': 'Ready To Wear Spring 2013',
+ 'tags': list,
+ 'categories': list,
},
}
@@ -32,21 +40,29 @@ class TheSceneIE(InfoExtractor):
player = self._download_webpage(player_url, display_id)
info = self._parse_json(
self._search_regex(
- r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'),
+ r'(?m)video\s*:\s*({.+?}),$', player, 'info json'),
display_id)
+ video_id = info['id']
+ title = info['title']
+
qualities_order = qualities(('low', 'high'))
formats = [{
'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']),
'url': f['src'],
'quality': qualities_order(f['quality']),
- } for f in info['sources'][0]]
+ } for f in info['sources']]
self._sort_formats(formats)
return {
- 'id': info['id'],
+ 'id': video_id,
'display_id': display_id,
- 'title': info['title'],
+ 'title': title,
'formats': formats,
'thumbnail': info.get('poster_frame'),
+ 'duration': int_or_none(info.get('duration')),
+ 'series': info.get('series_title'),
+ 'season': info.get('season_title'),
+ 'tags': info.get('tags'),
+ 'categories': info.get('categories'),
}
diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py
index 4473a3c..33683b1 100644
--- a/youtube_dl/extractor/thisav.py
+++ b/youtube_dl/extractor/thisav.py
@@ -3,13 +3,14 @@ from __future__ import unicode_literals
import re
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
from ..utils import remove_end
-class ThisAVIE(JWPlatformBaseIE):
+class ThisAVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
_TESTS = [{
+ # jwplayer
'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
'md5': '0480f1ef3932d901f0e0e719f188f19b',
'info_dict': {
@@ -20,6 +21,7 @@ class ThisAVIE(JWPlatformBaseIE):
'uploader_id': 'dj7970'
}
}, {
+ # html5 media
'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html',
'md5': 'ba90c076bd0f80203679e5b60bf523ee',
'info_dict': {
@@ -48,8 +50,12 @@ class ThisAVIE(JWPlatformBaseIE):
}],
}
else:
- info_dict = self._extract_jwplayer_data(
- webpage, video_id, require_title=False)
+ entries = self._parse_html5_media_entries(url, webpage, video_id)
+ if entries:
+ info_dict = entries[0]
+ else:
+ info_dict = self._extract_jwplayer_data(
+ webpage, video_id, require_title=False)
uploader = self._html_search_regex(
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
webpage, 'uploader name', fatal=False)
diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py
index 3a37df2..c44018a 100644
--- a/youtube_dl/extractor/tubitv.py
+++ b/youtube_dl/extractor/tubitv.py
@@ -16,6 +16,7 @@ class TubiTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)'
_LOGIN_URL = 'http://tubitv.com/login'
_NETRC_MACHINE = 'tubitv'
+ _GEO_COUNTRIES = ['US']
_TEST = {
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
'md5': '43ac06be9326f41912dc64ccf7a80320',
diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py
index ad79db9..7aeb2c6 100644
--- a/youtube_dl/extractor/tv4.py
+++ b/youtube_dl/extractor/tv4.py
@@ -24,6 +24,7 @@ class TV4IE(InfoExtractor):
sport/|
)
)(?P<id>[0-9]+)'''
+ _GEO_COUNTRIES = ['SE']
_TESTS = [
{
'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
@@ -71,16 +72,12 @@ class TV4IE(InfoExtractor):
'http://www.tv4play.se/player/assets/%s.json' % video_id,
video_id, 'Downloading video info JSON')
- # If is_geo_restricted is true, it doesn't necessarily mean we can't download it
- if info.get('is_geo_restricted'):
- self.report_warning('This content might not be available in your country due to licensing restrictions.')
-
title = info['title']
subtitles = {}
formats = []
# http formats are linked with unresolvable host
- for kind in ('hls', ''):
+ for kind in ('hls3', ''):
data = self._download_json(
'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id,
video_id, 'Downloading sources JSON', query={
@@ -113,6 +110,10 @@ class TV4IE(InfoExtractor):
'url': manifest_url,
'ext': 'vtt',
}]})
+
+ if not formats and info.get('is_geo_restricted'):
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+
self._sort_formats(formats)
return {
diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py
new file mode 100644
index 0000000..12ed603
--- /dev/null
+++ b/youtube_dl/extractor/tvn24.py
@@ -0,0 +1,76 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unescapeHTML,
+)
+
+
+class TVN24IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
+ 'md5': 'fbdec753d7bc29d96036808275f2130c',
+ 'info_dict': {
+ 'id': '1584444',
+ 'ext': 'mp4',
+ 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
+ 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
+ 'thumbnail': 're:http://.*[.]jpeg',
+ }
+ }, {
+ 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://sport.tvn24.pl/pilka-nozna,105/ligue-1-kamil-glik-rozcial-glowe-monaco-tylko-remisuje-z-bastia,716522.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage)
+
+ def extract_json(attr, name, fatal=True):
+ return self._parse_json(
+ self._search_regex(
+ r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage,
+ name, group='json', fatal=fatal) or '{}',
+ video_id, transform_source=unescapeHTML, fatal=fatal)
+
+ quality_data = extract_json('data-quality', 'formats')
+
+ formats = []
+ for format_id, url in quality_data.items():
+ formats.append({
+ 'url': url,
+ 'format_id': format_id,
+ 'height': int_or_none(format_id.rstrip('p')),
+ })
+ self._sort_formats(formats)
+
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_regex(
+ r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage,
+ 'thumbnail', group='url')
+
+ share_params = extract_json(
+ 'data-share-params', 'share params', fatal=False)
+ if isinstance(share_params, dict):
+ video_id = share_params.get('id') or video_id
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py
index 6d5c748..1a5b76b 100644
--- a/youtube_dl/extractor/tvnoe.py
+++ b/youtube_dl/extractor/tvnoe.py
@@ -1,7 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_class,
@@ -9,7 +9,7 @@ from ..utils import (
)
-class TVNoeIE(JWPlatformBaseIE):
+class TVNoeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.tvnoe.cz/video/10362',
diff --git a/youtube_dl/extractor/tvplayer.py b/youtube_dl/extractor/tvplayer.py
new file mode 100644
index 0000000..b653714
--- /dev/null
+++ b/youtube_dl/extractor/tvplayer.py
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ extract_attributes,
+ urlencode_postdata,
+ ExtractorError,
+)
+
+
+class TVPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'http://tvplayer.com/watch/bbcone',
+ 'info_dict': {
+ 'id': '89',
+ 'ext': 'mp4',
+ 'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ current_channel = extract_attributes(self._search_regex(
+ r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
+ webpage, 'channel element'))
+ title = current_channel['data-name']
+
+ resource_id = self._search_regex(
+ r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id')
+ platform = self._search_regex(
+ r'platform\s*=\s*"([^"]+)"', webpage, 'platform')
+ token = self._search_regex(
+ r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null')
+ validate = self._search_regex(
+ r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null')
+
+ try:
+ response = self._download_json(
+ 'http://api.tvplayer.com/api/v2/stream/live',
+ resource_id, headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ }, data=urlencode_postdata({
+ 'service': 1,
+ 'platform': platform,
+ 'id': resource_id,
+ 'token': token,
+ 'validate': validate,
+ }))['tvplayer']['response']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ response = self._parse_json(
+ e.cause.read().decode(), resource_id)['tvplayer']['response']
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
+ raise
+
+ formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': resource_id,
+ 'display_id': display_id,
+ 'title': self._live_title(title),
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py
index a983ebf..f3541b6 100644
--- a/youtube_dl/extractor/twentyfourvideo.py
+++ b/youtube_dl/extractor/twentyfourvideo.py
@@ -12,7 +12,7 @@ from ..utils import (
class TwentyFourVideoIE(InfoExtractor):
IE_NAME = '24video'
- _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex|tube)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.24video.net/video/view/1044982',
@@ -37,6 +37,9 @@ class TwentyFourVideoIE(InfoExtractor):
}, {
'url': 'http://www.24video.me/video/view/1044982',
'only_matching': True,
+ }, {
+ 'url': 'http://www.24video.tube/video/view/2363750',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index bef6394..8152ace 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor):
)
(?P<id>[\da-fA-F]+)
'''
+ _GEO_COUNTRIES = ['BG']
_TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c',
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
@@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor):
video_url = video['src']
if '/na.mp4' in video_url:
- self.raise_geo_restricted()
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
uploader = video.get('uploader')
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index 8a574bc..0f8c156 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -14,6 +14,7 @@ from ..utils import (
class VGTVIE(XstreamIE):
IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
+ _GEO_BYPASS = False
_HOST_TO_APPNAME = {
'vgtv.no': 'vgtv',
@@ -217,7 +218,8 @@ class VGTVIE(XstreamIE):
properties = try_get(
data, lambda x: x['streamConfiguration']['properties'], list)
if properties and 'geoblocked' in properties:
- raise self.raise_geo_restricted()
+ raise self.raise_geo_restricted(
+ countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
self._sort_formats(info['formats'])
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
index 8a00c8f..f0a7fd7 100644
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@@ -70,10 +70,10 @@ class ViceBaseIE(AdobePassIE):
'url': uplynk_preplay_url,
'id': video_id,
'title': title,
- 'description': base.get('body'),
+ 'description': base.get('body') or base.get('display_body'),
'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
- 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
- 'timestamp': int_or_none(video_data.get('created_at')),
+ 'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')),
+ 'timestamp': int_or_none(video_data.get('created_at'), 1000),
'age_limit': parse_age_limit(video_data.get('video_rating')),
'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py
index 0eff055..87f9216 100644
--- a/youtube_dl/extractor/viceland.py
+++ b/youtube_dl/extractor/viceland.py
@@ -7,16 +7,16 @@ from .vice import ViceBaseIE
class VicelandIE(ViceBaseIE):
_VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)'
_TEST = {
- 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e',
+ 'url': 'https://www.viceland.com/en_us/video/trapped/588a70d0dba8a16007de7316',
'info_dict': {
- 'id': '57608447973ee7705f6fbd4e',
+ 'id': '588a70d0dba8a16007de7316',
'ext': 'mp4',
- 'title': 'CYBERWAR (Trailer)',
- 'description': 'Tapping into the geopolitics of hacking and surveillance, Ben Makuch travels the world to meet with hackers, government officials, and dissidents to investigate the ecosystem of cyberwarfare.',
+ 'title': 'TRAPPED (Series Trailer)',
+ 'description': 'md5:7a8e95c2b6cd86461502a2845e581ccf',
'age_limit': 14,
- 'timestamp': 1466008539,
- 'upload_date': '20160615',
- 'uploader_id': '11',
+ 'timestamp': 1485474122,
+ 'upload_date': '20170126',
+ 'uploader_id': '57a204098cb727dec794c6a3',
'uploader': 'Viceland',
},
'params': {
diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py
index 9950c62..d055629 100644
--- a/youtube_dl/extractor/vidzi.py
+++ b/youtube_dl/extractor/vidzi.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
from ..utils import (
decode_packed_codes,
js_to_json,
@@ -12,8 +12,8 @@ from ..utils import (
)
-class VidziIE(JWPlatformBaseIE):
- _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
+class VidziIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'http://vidzi.tv/cghql9yq6emu.html',
'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
@@ -29,6 +29,9 @@ class VidziIE(JWPlatformBaseIE):
}, {
'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
'skip_download': True,
+ }, {
+ 'url': 'http://vidzi.cc/cghql9yq6emu.html',
+ 'skip_download': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py
index 52dd95e..fcf0cb1 100644
--- a/youtube_dl/extractor/viewster.py
+++ b/youtube_dl/extractor/viewster.py
@@ -86,7 +86,9 @@ class ViewsterIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
# Get 'api_token' cookie
- self._request_webpage(HEADRequest('http://www.viewster.com/'), video_id)
+ self._request_webpage(
+ HEADRequest('http://www.viewster.com/'),
+ video_id, headers=self.geo_verification_headers())
cookies = self._get_cookies('http://www.viewster.com/')
self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index 9c48701..e9c8bf8 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -27,6 +27,7 @@ class VikiBaseIE(InfoExtractor):
_APP_VERSION = '2.2.5.1428709186'
_APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
+ _GEO_BYPASS = False
_NETRC_MACHINE = 'viki'
_token = None
@@ -77,8 +78,11 @@ class VikiBaseIE(InfoExtractor):
def _check_errors(self, data):
for reason, status in data.get('blocking', {}).items():
if status and reason in self._ERRORS:
+ message = self._ERRORS[reason]
+ if reason == 'geo':
+ self.raise_geo_restricted(msg=message)
raise ExtractorError('%s said: %s' % (
- self.IE_NAME, self._ERRORS[reason]), expected=True)
+ self.IE_NAME, message), expected=True)
def _real_initialize(self):
self._login()
diff --git a/youtube_dl/extractor/vodpl.py b/youtube_dl/extractor/vodpl.py
new file mode 100644
index 0000000..9e91970
--- /dev/null
+++ b/youtube_dl/extractor/vodpl.py
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .onet import OnetBaseIE
+
+
+class VODPlIE(OnetBaseIE):
+ _VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
+
+ _TESTS = [{
+ 'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns',
+ 'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74',
+ 'info_dict': {
+ 'id': '3ep3jns',
+ 'ext': 'mp4',
+ 'title': 'Chłopaki nie płaczą',
+ 'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224',
+ 'timestamp': 1463415154,
+ 'duration': 5765,
+ 'upload_date': '20160516',
+ },
+ }, {
+ 'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage)
+ info_dict['id'] = video_id
+ return info_dict
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py
index 54eb514..c022fb3 100644
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -1,10 +1,10 @@
from __future__ import unicode_literals
+from .common import InfoExtractor
from .youtube import YoutubeIE
-from .jwplatform import JWPlatformBaseIE
-class WimpIE(JWPlatformBaseIE):
+class WimpIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.wimp.com/maru-is-exhausted/',
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py
index 83bc1fe..5584674 100644
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -44,6 +44,9 @@ class XTubeIE(InfoExtractor):
}, {
'url': 'xtube:625837',
'only_matching': True,
+ }, {
+ 'url': 'xtube:kVTUy_G222_',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -53,14 +56,20 @@ class XTubeIE(InfoExtractor):
if not display_id:
display_id = video_id
- url = 'http://www.xtube.com/watch.php?v=%s' % video_id
- req = sanitized_Request(url)
- req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1')
- webpage = self._download_webpage(req, display_id)
+ if video_id.isdigit() and len(video_id) < 11:
+ url_pattern = 'http://www.xtube.com/video-watch/-%s'
+ else:
+ url_pattern = 'http://www.xtube.com/watch.php?v=%s'
+
+ webpage = self._download_webpage(
+ url_pattern % video_id, display_id, headers={
+ 'Cookie': 'age_verified=1; cookiesAccepted=1',
+ })
sources = self._parse_json(self._search_regex(
- r'sources\s*:\s*({.+?}),', webpage, 'sources'), video_id)
+ r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),',
+ webpage, 'sources', group='sources'), video_id)
formats = []
for format_id, format_url in sources.items():
@@ -72,7 +81,7 @@ class XTubeIE(InfoExtractor):
self._sort_formats(formats)
title = self._search_regex(
- (r'<h1>(?P<title>[^<]+)</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
+ (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
webpage, 'title', group='title')
description = self._search_regex(
r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
@@ -81,10 +90,10 @@ class XTubeIE(InfoExtractor):
r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
webpage, 'uploader', fatal=False)
duration = parse_duration(self._search_regex(
- r'<dt>Runtime:</dt>\s*<dd>([^<]+)</dd>',
+ r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex(
- r'<dt>Views:</dt>\s*<dd>([\d,\.]+)</dd>',
+ r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>',
webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
r'>Comments? \(([\d,\.]+)\)<',
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 7671093..dec0280 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -34,6 +34,7 @@ from ..utils import (
int_or_none,
mimetype2ext,
orderedSet,
+ parse_codecs,
parse_duration,
remove_quotes,
remove_start,
@@ -1696,15 +1697,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
codecs = mobj.group('val')
break
if codecs:
- codecs = codecs.split(',')
- if len(codecs) == 2:
- acodec, vcodec = codecs[1], codecs[0]
- else:
- acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
- dct.update({
- 'acodec': acodec,
- 'vcodec': vcodec,
- })
+ dct.update(parse_codecs(codecs))
formats.append(dct)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index a365923..523bb5c 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -20,9 +20,9 @@ from ..utils import (
class ZDFBaseIE(InfoExtractor):
- def _call_api(self, url, player, referrer, video_id):
+ def _call_api(self, url, player, referrer, video_id, item):
return self._download_json(
- url, video_id, 'Downloading JSON content',
+ url, video_id, 'Downloading JSON %s' % item,
headers={
'Referer': referrer,
'Api-Auth': 'Bearer %s' % player['apiToken'],
@@ -104,7 +104,7 @@ class ZDFIE(ZDFBaseIE):
})
formats.append(f)
- def _extract_entry(self, url, content, video_id):
+ def _extract_entry(self, url, player, content, video_id):
title = content.get('title') or content['teaserHeadline']
t = content['mainVideoContent']['http://zdf.de/rels/target']
@@ -116,7 +116,8 @@ class ZDFIE(ZDFBaseIE):
'http://zdf.de/rels/streams/ptmd-template'].replace(
'{playerId}', 'portal')
- ptmd = self._download_json(urljoin(url, ptmd_path), video_id)
+ ptmd = self._call_api(
+ urljoin(url, ptmd_path), player, url, video_id, 'metadata')
formats = []
track_uris = set()
@@ -174,8 +175,9 @@ class ZDFIE(ZDFBaseIE):
}
def _extract_regular(self, url, player, video_id):
- content = self._call_api(player['content'], player, url, video_id)
- return self._extract_entry(player['content'], content, video_id)
+ content = self._call_api(
+ player['content'], player, url, video_id, 'content')
+ return self._extract_entry(player['content'], player, content, video_id)
def _extract_mobile(self, video_id):
document = self._download_json(