aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRogério Brito <rbrito@ime.usp.br>2017-10-19 15:24:13 -0200
committerRogério Brito <rbrito@ime.usp.br>2017-10-19 15:24:13 -0200
commit4231113a7be907539052ce59df44e23c54d21d38 (patch)
tree60ac9873daf0428612155884c512f7934612bf78
parentfd552b246272b2ccc3051ce4a112669cf19b81d8 (diff)
parent4eb7d8923b3bad26edab01df0fd2650b0563ea8b (diff)
downloadyoutube-dl-4231113a7be907539052ce59df44e23c54d21d38.zip
youtube-dl-4231113a7be907539052ce59df44e23c54d21d38.tar.gz
youtube-dl-4231113a7be907539052ce59df44e23c54d21d38.tar.bz2
Updated version 2017.10.15.1 from 'upstream/2017.10.15.1'
with Debian dir e04fc0593bd02862ee48ad1d1a4629d9f3e1df96
-rw-r--r--ChangeLog100
-rw-r--r--README.md2
-rw-r--r--docs/supportedsites.md8
-rw-r--r--test/test_YoutubeDL.py16
-rwxr-xr-xyoutube-dlbin1582485 -> 1588299 bytes
-rw-r--r--youtube-dl.12
-rwxr-xr-xyoutube_dl/YoutubeDL.py63
-rw-r--r--youtube_dl/downloader/fragment.py10
-rw-r--r--youtube_dl/downloader/hls.py28
-rw-r--r--youtube_dl/extractor/aenetworks.py2
-rw-r--r--youtube_dl/extractor/afreecatv.py131
-rw-r--r--youtube_dl/extractor/anvato.py20
-rw-r--r--youtube_dl/extractor/appletrailers.py4
-rw-r--r--youtube_dl/extractor/ard.py2
-rw-r--r--youtube_dl/extractor/bbc.py2
-rw-r--r--youtube_dl/extractor/beeg.py10
-rw-r--r--youtube_dl/extractor/canvas.py142
-rw-r--r--youtube_dl/extractor/channel9.py6
-rw-r--r--youtube_dl/extractor/comedycentral.py6
-rw-r--r--youtube_dl/extractor/common.py20
-rw-r--r--youtube_dl/extractor/dailymotion.py2
-rw-r--r--youtube_dl/extractor/deezer.py2
-rw-r--r--youtube_dl/extractor/extractors.py18
-rw-r--r--youtube_dl/extractor/facebook.py18
-rw-r--r--youtube_dl/extractor/fox.py33
-rw-r--r--youtube_dl/extractor/freespeech.py2
-rw-r--r--youtube_dl/extractor/funk.py43
-rw-r--r--youtube_dl/extractor/gamespot.py2
-rw-r--r--youtube_dl/extractor/generic.py133
-rw-r--r--youtube_dl/extractor/gfycat.py5
-rw-r--r--youtube_dl/extractor/googleplus.py2
-rw-r--r--youtube_dl/extractor/howstuffworks.py39
-rw-r--r--youtube_dl/extractor/hrti.py10
-rw-r--r--youtube_dl/extractor/ign.py2
-rw-r--r--youtube_dl/extractor/infoq.py30
-rw-r--r--youtube_dl/extractor/jeuxvideo.py2
-rw-r--r--youtube_dl/extractor/kaltura.py3
-rw-r--r--youtube_dl/extractor/ketnet.py23
-rw-r--r--youtube_dl/extractor/livestream.py2
-rw-r--r--youtube_dl/extractor/lnkgo.py5
-rw-r--r--youtube_dl/extractor/makertv.py2
-rw-r--r--youtube_dl/extractor/mangomolo.py2
-rw-r--r--youtube_dl/extractor/meipai.py2
-rw-r--r--youtube_dl/extractor/mixcloud.py2
-rw-r--r--youtube_dl/extractor/mtv.py2
-rw-r--r--youtube_dl/extractor/myvideo.py2
-rw-r--r--youtube_dl/extractor/nationalgeographic.py2
-rw-r--r--youtube_dl/extractor/naver.py2
-rw-r--r--youtube_dl/extractor/nba.py2
-rw-r--r--youtube_dl/extractor/nbc.py3
-rw-r--r--youtube_dl/extractor/nexx.py28
-rw-r--r--youtube_dl/extractor/npo.py2
-rw-r--r--youtube_dl/extractor/once.py2
-rw-r--r--youtube_dl/extractor/onionstudios.py9
-rw-r--r--youtube_dl/extractor/pornflip.py8
-rw-r--r--youtube_dl/extractor/reddit.py11
-rw-r--r--youtube_dl/extractor/rtlnl.py7
-rw-r--r--youtube_dl/extractor/rtve.py36
-rw-r--r--youtube_dl/extractor/ruhd.py2
-rw-r--r--youtube_dl/extractor/scrippsnetworks.py216
-rw-r--r--youtube_dl/extractor/shahid.py61
-rw-r--r--youtube_dl/extractor/slideslive.py34
-rw-r--r--youtube_dl/extractor/spike.py1
-rw-r--r--youtube_dl/extractor/stanfordoc.py4
-rw-r--r--youtube_dl/extractor/steam.py129
-rw-r--r--youtube_dl/extractor/theplatform.py2
-rw-r--r--youtube_dl/extractor/thisav.py4
-rw-r--r--youtube_dl/extractor/tubitv.py12
-rw-r--r--youtube_dl/extractor/tva.py48
-rw-r--r--youtube_dl/extractor/tvn24.py7
-rw-r--r--youtube_dl/extractor/tvp.py15
-rw-r--r--youtube_dl/extractor/twitter.py2
-rw-r--r--youtube_dl/extractor/udn.py32
-rw-r--r--youtube_dl/extractor/vh1.py138
-rw-r--r--youtube_dl/extractor/vice.py2
-rw-r--r--youtube_dl/extractor/videopremium.py2
-rw-r--r--youtube_dl/extractor/voxmedia.py66
-rw-r--r--youtube_dl/extractor/vvvvid.py2
-rw-r--r--youtube_dl/extractor/wdr.py9
-rw-r--r--youtube_dl/extractor/xhamster.py2
-rw-r--r--youtube_dl/extractor/xtube.py5
-rw-r--r--youtube_dl/extractor/xvideos.py42
-rw-r--r--youtube_dl/extractor/yahoo.py58
-rw-r--r--youtube_dl/extractor/youtube.py38
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py2
-rw-r--r--youtube_dl/utils.py2
-rw-r--r--youtube_dl/version.py2
87 files changed, 1295 insertions, 718 deletions
diff --git a/ChangeLog b/ChangeLog
index da60c1b..d728e4d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,103 @@
+version 2017.10.15.1
+
+Core
+* [downloader/hls] Ignore anvato ad fragments (#14496)
+* [downloader/fragment] Output ad fragment count
+
+Extractors
+* [scrippsnetworks:watch] Bypass geo restriction
++ [anvato] Add ability to bypass geo restriction
+* [redditr] Fix extraction for URLs with query (#14495)
+
+
+version 2017.10.15
+
+Core
++ [common] Add support for jwplayer youtube embeds
+
+Extractors
+* [scrippsnetworks:watch] Fix extraction (#14389)
+* [anvato] Process master m3u8 manifests
+* [youtube] Fix relative URLs in description
+* [spike] Bypass geo restriction
++ [howstuffworks] Add support for more domains
+* [infoq] Fix http format downloading
++ [rtlnl] Add support for another type of embeds
++ [onionstudios] Add support for bulbs-video embeds
+* [udn] Fix extraction
+* [shahid] Fix extraction (#14448)
+* [kaltura] Ignore Widevine encrypted video (.wvm) (#14471)
+* [vh1] Fix extraction (#9613)
+
+
+version 2017.10.12
+
+Core
+* [YoutubeDL] Improve _default_format_spec (#14461)
+
+Extractors
+* [steam] Fix extraction (#14067)
++ [funk] Add support for funk.net (#14464)
++ [nexx] Add support for shortcuts and relax domain id extraction
++ [voxmedia] Add support for recode.net (#14173)
++ [once] Add support for vmap URLs
++ [generic] Add support for channel9 embeds (#14469)
+* [tva] Fix extraction (#14328)
++ [tubitv] Add support for new URL format (#14460)
+- [afreecatv:global] Remove extractor
+- [youtube:shared] Removed extractor (#14420)
++ [slideslive] Add support for slideslive.com (#2680)
++ [facebook] Support thumbnails (#14416)
+* [vvvvid] Fix episode number extraction (#14456)
+* [hrti:playlist] Relax URL regular expression
+* [wdr] Relax media link regular expression (#14447)
+* [hrti] Relax URL regular expression (#14443)
+* [fox] Delegate extraction to uplynk:preplay (#14147)
++ [youtube] Add support for hooktube.com (#14437)
+
+
+version 2017.10.07
+
+Core
+* [YoutubeDL] Ignore duplicates in --playlist-items
+* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and
+ reduce code duplication (#14425)
++ [utils] Use cache in OnDemandPagedList by default
+* [postprocessor/ffmpeg] Convert to opus using libopus (#14381)
+
+Extractors
+* [reddit] Sort formats (#14430)
+* [lnkgo] Relax URL regular expression (#14423)
+* [pornflip] Extend URL regular expression (#14405, #14406)
++ [xtube] Add support for embed URLs (#14417)
++ [xvideos] Add support for embed URLs and improve extraction (#14409)
+* [beeg] Fix extraction (#14403)
+* [tvn24] Relax URL regular expression (#14395)
+* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378,
+ #14392, #14414, #14419, #14431)
++ [ketnet] Add support for videos without direct sources (#14377)
+* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een
++ [afreecatv] Add support for adult videos (#14376)
+
+
+version 2017.10.01
+
+Core
+* [YoutubeDL] Document youtube_include_dash_manifest
+
+Extractors
++ [tvp] Add support for new URL schema (#14368)
++ [generic] Add support for single format Video.js embeds (#14371)
+* [yahoo] Bypass geo restriction for brightcove (#14210)
+* [yahoo] Use extracted brightcove account id (#14210)
+* [rtve:alacarta] Fix extraction (#14290)
++ [yahoo] Add support for custom brigthcove embeds (#14210)
++ [generic] Add support for Video.js embeds
++ [gfycat] Add support for /gifs/detail URLs (#14322)
+* [generic] Fix infinite recursion for twitter:player URLs (#14339)
+* [xhamsterembed] Fix extraction (#14308)
+
+
version 2017.09.24
Core
diff --git a/README.md b/README.md
index 7818e58..2879aad 100644
--- a/README.md
+++ b/README.md
@@ -1167,7 +1167,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
```
-Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
+Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index d36a07c..7071450 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -36,7 +36,6 @@
- **AdultSwim**
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
- **afreecatv**: afreecatv.com
- - **afreecatv:global**: afreecatv.com
- **AirMozilla**
- **AliExpressLive**
- **AlJazeera**
@@ -130,7 +129,8 @@
- **CamWithHer**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- - **Canvas**: canvas.be and een.be
+ - **Canvas**
+ - **CanvasEen**: canvas.be and een.be
- **CarambaTV**
- **CarambaTVPage**
- **CartoonNetwork**
@@ -295,6 +295,7 @@
- **freespeech.org**
- **FreshLive**
- **Funimation**
+ - **Funk**
- **FunnyOrDie**
- **Fusion**
- **Fux**
@@ -737,6 +738,7 @@
- **skynewsarabia:video**
- **SkySports**
- **Slideshare**
+ - **SlidesLive**
- **Slutload**
- **smotri**: Smotri.com
- **smotri:broadcast**: Smotri.com broadcasts
@@ -968,6 +970,7 @@
- **VoiceRepublic**
- **Voot**
- **VoxMedia**
+ - **VoxMediaVolume**
- **Vporn**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
@@ -1043,7 +1046,6 @@
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search_url**: YouTube.com search URLs
- - **youtube:shared**
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index e70cbcd..4af92fb 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -466,12 +466,18 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL({'simulate': True})
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
+ ydl = YDL({'is_live': True})
+ self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
+
+ ydl = YDL({'simulate': True, 'is_live': True})
+ self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
+
ydl = YDL({'outtmpl': '-'})
- self.assertEqual(ydl._default_format_spec({}), 'best')
+ self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
ydl = YDL({})
self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
- self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best')
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
class TestYoutubeDL(unittest.TestCase):
@@ -770,6 +776,12 @@ class TestYoutubeDL(unittest.TestCase):
result = get_ids({'playlist_items': '10'})
self.assertEqual(result, [])
+ result = get_ids({'playlist_items': '3-10'})
+ self.assertEqual(result, [3, 4])
+
+ result = get_ids({'playlist_items': '2-4,3-4,3'})
+ self.assertEqual(result, [2, 3, 4])
+
def test_urlopen_no_file_protocol(self):
# see https://github.com/rg3/youtube-dl/issues/8227
ydl = YDL()
diff --git a/youtube-dl b/youtube-dl
index b87f23e..15c016a 100755
--- a/youtube-dl
+++ b/youtube-dl
Binary files differ
diff --git a/youtube-dl.1 b/youtube-dl.1
index 6c8c7bc..9ab22b0 100644
--- a/youtube-dl.1
+++ b/youtube-dl.1
@@ -2323,7 +2323,7 @@ with\ youtube_dl.YoutubeDL(ydl_opts)\ as\ ydl:
.PP
Most likely, you\[aq]ll want to use various options.
For a list of options available, have a look at
-\f[C]youtube_dl/YoutubeDL.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279).
+\f[C]youtube_dl/YoutubeDL.py\f[] (https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312).
For a start, if you want to intercept youtube\-dl\[aq]s output, set a
\f[C]logger\f[] object.
.PP
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 0a7f36c..342d6b4 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -65,6 +65,7 @@ from .utils import (
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
+ orderedSet,
PagedList,
parse_filesize,
PerRequestProxyHandler,
@@ -304,6 +305,12 @@ class YoutubeDL(object):
otherwise prefer avconv.
postprocessor_args: A list of additional command-line arguments for the
postprocessor.
+
+ The following options are used by the Youtube extractor:
+ youtube_include_dash_manifest: If True (default), DASH manifests and related
+ data will be downloaded and processed by extractor.
+ You can reduce network I/O by disabling it if you don't
+ care about DASH.
"""
_NUMERIC_FIELDS = set((
@@ -902,15 +909,25 @@ class YoutubeDL(object):
yield int(item)
else:
yield int(string_segment)
- playlistitems = iter_playlistitems(playlistitems_str)
+ playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
ie_entries = ie_result['entries']
+
+ def make_playlistitems_entries(list_ie_entries):
+ num_entries = len(list_ie_entries)
+ return [
+ list_ie_entries[i - 1] for i in playlistitems
+ if -num_entries <= i - 1 < num_entries]
+
+ def report_download(num_entries):
+ self.to_screen(
+ '[%s] playlist %s: Downloading %d videos' %
+ (ie_result['extractor'], playlist, num_entries))
+
if isinstance(ie_entries, list):
n_all_entries = len(ie_entries)
if playlistitems:
- entries = [
- ie_entries[i - 1] for i in playlistitems
- if -n_all_entries <= i - 1 < n_all_entries]
+ entries = make_playlistitems_entries(ie_entries)
else:
entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
@@ -928,20 +945,15 @@ class YoutubeDL(object):
entries = ie_entries.getslice(
playliststart, playlistend)
n_entries = len(entries)
- self.to_screen(
- '[%s] playlist %s: Downloading %d videos' %
- (ie_result['extractor'], playlist, n_entries))
+ report_download(n_entries)
else: # iterable
if playlistitems:
- entry_list = list(ie_entries)
- entries = [entry_list[i - 1] for i in playlistitems]
+ entries = make_playlistitems_entries(list(ie_entries))
else:
entries = list(itertools.islice(
ie_entries, playliststart, playlistend))
n_entries = len(entries)
- self.to_screen(
- '[%s] playlist %s: Downloading %d videos' %
- (ie_result['extractor'], playlist, n_entries))
+ report_download(n_entries)
if self.params.get('playlistreverse', False):
entries = entries[::-1]
@@ -1066,22 +1078,27 @@ class YoutubeDL(object):
return _filter
def _default_format_spec(self, info_dict, download=True):
- req_format_list = []
- def can_have_partial_formats():
+ def can_merge():
+ merger = FFmpegMergerPP(self)
+ return merger.available and merger.can_merge()
+
+ def prefer_best():
if self.params.get('simulate', False):
- return True
+ return False
if not download:
- return True
- if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
return False
+ if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+ return True
if info_dict.get('is_live'):
- return False
- merger = FFmpegMergerPP(self)
- return merger.available and merger.can_merge()
- if can_have_partial_formats():
- req_format_list.append('bestvideo+bestaudio')
- req_format_list.append('best')
+ return True
+ if not can_merge():
+ return True
+ return False
+
+ req_format_list = ['bestvideo+bestaudio', 'best']
+ if prefer_best():
+ req_format_list.reverse()
return '/'.join(req_format_list)
def build_format_selector(self, format_spec):
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index 6f6fb4a..7e891b9 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -117,9 +117,15 @@ class FragmentFD(FileDownloader):
def _prepare_frag_download(self, ctx):
if 'live' not in ctx:
ctx['live'] = False
+ if not ctx['live']:
+ total_frags_str = '%d' % ctx['total_frags']
+ ad_frags = ctx.get('ad_frags', 0)
+ if ad_frags:
+ total_frags_str += ' (not including %d ad)' % ad_frags
+ else:
+ total_frags_str = 'unknown (live)'
self.to_screen(
- '[%s] Total fragments: %s'
- % (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
+ '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
self.report_destination(ctx['filename'])
dl = HttpQuietDownloader(
self.ydl,
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index 46308cf..7955ca5 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -75,15 +75,29 @@ class HlsFD(FragmentFD):
fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict)
- total_frags = 0
+ def anvato_ad(s):
+ return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
+
+ media_frags = 0
+ ad_frags = 0
+ ad_frag_next = False
for line in s.splitlines():
line = line.strip()
- if line and not line.startswith('#'):
- total_frags += 1
+ if not line:
+ continue
+ if line.startswith('#'):
+ if anvato_ad(line):
+ ad_frags += 1
+ continue
+ if ad_frag_next:
+ ad_frag_next = False
+ continue
+ media_frags += 1
ctx = {
'filename': filename,
- 'total_frags': total_frags,
+ 'total_frags': media_frags,
+ 'ad_frags': ad_frags,
}
self._prepare_and_start_frag_download(ctx)
@@ -101,10 +115,14 @@ class HlsFD(FragmentFD):
decrypt_info = {'METHOD': 'NONE'}
byte_range = {}
frag_index = 0
+ ad_frag_next = False
for line in s.splitlines():
line = line.strip()
if line:
if not line.startswith('#'):
+ if ad_frag_next:
+ ad_frag_next = False
+ continue
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
@@ -175,6 +193,8 @@ class HlsFD(FragmentFD):
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
+ elif anvato_ad(line):
+ ad_frag_next = True
self._finish_frag_download(ctx)
diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
index 2dcdba9..da1b566 100644
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@@ -131,7 +131,7 @@ class AENetworksIE(AENetworksBaseIE):
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
webpage, 'video url', group='url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
- r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
+ r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
if theplatform_metadata.get('AETN$isBehindWall'):
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py
index c8cb91d..e6513c7 100644
--- a/youtube_dl/extractor/afreecatv.py
+++ b/youtube_dl/extractor/afreecatv.py
@@ -139,6 +139,23 @@ class AfreecaTVIE(InfoExtractor):
'skip_download': True,
},
}, {
+ # adult video
+ 'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
+ 'info_dict': {
+ 'id': '20171001_F1AE1711_196617479_1',
+ 'ext': 'mp4',
+ 'title': '[생]서아 초심 찾기 방송 (part 1)',
+ 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+ 'uploader': 'BJ서아',
+ 'uploader_id': 'bjdyrksu',
+ 'upload_date': '20171001',
+ 'duration': 3600,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
}, {
@@ -160,7 +177,15 @@ class AfreecaTVIE(InfoExtractor):
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
- video_id, query={'nTitleNo': video_id})
+ video_id, query={
+ 'nTitleNo': video_id,
+ 'partialView': 'SKIP_ADULT',
+ })
+
+ flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
+ if flag and flag != 'SUCCEED':
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, flag), expected=True)
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
if video_element is None or video_element.text is None:
@@ -246,107 +271,3 @@ class AfreecaTVIE(InfoExtractor):
})
return info
-
-
-class AfreecaTVGlobalIE(AfreecaTVIE):
- IE_NAME = 'afreecatv:global'
- _VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?'
- _TESTS = [{
- 'url': 'http://afreeca.tv/36853014/v/58301',
- 'info_dict': {
- 'id': '58301',
- 'title': 'tryhard top100',
- 'uploader_id': '36853014',
- 'uploader': 'makgi Hearthstone Live!',
- },
- 'playlist_count': 3,
- }]
-
- def _real_extract(self, url):
- channel_id, video_id = re.match(self._VALID_URL, url).groups()
- video_type = 'video' if video_id else 'live'
- query = {
- 'pt': 'view',
- 'bid': channel_id,
- }
- if video_id:
- query['vno'] = video_id
- video_data = self._download_json(
- 'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type),
- video_id or channel_id, query=query)['channel']
-
- if video_data.get('result') != 1:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg']))
-
- title = video_data['title']
-
- info = {
- 'thumbnail': video_data.get('thumb'),
- 'view_count': int_or_none(video_data.get('vcnt')),
- 'age_limit': int_or_none(video_data.get('grade')),
- 'uploader_id': channel_id,
- 'uploader': video_data.get('cname'),
- }
-
- if video_id:
- entries = []
- for i, f in enumerate(video_data.get('flist', [])):
- video_key = self.parse_video_key(f.get('key', ''))
- f_url = f.get('file')
- if not video_key or not f_url:
- continue
- entries.append({
- 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
- 'title': title,
- 'upload_date': video_key.get('upload_date'),
- 'duration': int_or_none(f.get('length')),
- 'url': f_url,
- 'protocol': 'm3u8_native',
- 'ext': 'mp4',
- })
-
- info.update({
- 'id': video_id,
- 'title': title,
- 'duration': int_or_none(video_data.get('length')),
- })
- if len(entries) > 1:
- info['_type'] = 'multi_video'
- info['entries'] = entries
- elif len(entries) == 1:
- i = entries[0].copy()
- i.update(info)
- info = i
- else:
- formats = []
- for s in video_data.get('strm', []):
- s_url = s.get('purl')
- if not s_url:
- continue
- stype = s.get('stype')
- if stype == 'HLS':
- formats.extend(self._extract_m3u8_formats(
- s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
- elif stype == 'RTMP':
- format_id = [stype]
- label = s.get('label')
- if label:
- format_id.append(label)
- formats.append({
- 'format_id': '-'.join(format_id),
- 'url': s_url,
- 'tbr': int_or_none(s.get('bps')),
- 'height': int_or_none(s.get('brt')),
- 'ext': 'flv',
- 'rtmp_live': True,
- })
- self._sort_formats(formats)
-
- info.update({
- 'id': channel_id,
- 'title': self._live_title(title),
- 'is_live': True,
- 'formats': formats,
- })
-
- return info
diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py
index 8023da7..7a29cd2 100644
--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dl/extractor/anvato.py
@@ -18,6 +18,7 @@ from ..utils import (
int_or_none,
strip_jsonp,
unescapeHTML,
+ unsmuggle_url,
)
@@ -197,12 +198,16 @@ class AnvatoIE(InfoExtractor):
'tbr': tbr if tbr != 0 else None,
}
- if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
- if tbr is not None:
- a_format.update({
- 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
- 'ext': 'mp4',
- })
+ if media_format == 'm3u8' and tbr is not None:
+ a_format.update({
+ 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
+ 'ext': 'mp4',
+ })
+ elif media_format == 'm3u8-variant' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
elif ext == 'mp3' or media_format == 'mp3':
a_format['vcodec'] = 'none'
else:
@@ -271,6 +276,9 @@ class AnvatoIE(InfoExtractor):
anvplayer_data['accessKey'], anvplayer_data['video'])
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+
mobj = re.match(self._VALID_URL, url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
if access_key not in self._ANVACK_TABLE:
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index b45b431..a9ef733 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -117,7 +117,7 @@ class AppleTrailersIE(InfoExtractor):
continue
formats.append({
'format_id': '%s-%s' % (version, size),
- 'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
+ 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')),
'language': version[:2],
@@ -179,7 +179,7 @@ class AppleTrailersIE(InfoExtractor):
formats = []
for format in settings['metadata']['sizes']:
# The src is a file pointing to the real video file
- format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
+ format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
formats.append({
'url': format_url,
'format': format['type'],
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 3f248b1..915f886 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -195,7 +195,7 @@ class ARDMediathekIE(InfoExtractor):
title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
- r'<meta name="dcterms.title" content="(.*?)"/>',
+ r'<meta name="dcterms\.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>'],
webpage, 'title')
description = self._html_search_meta(
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 8b20c03..5525f7c 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -386,7 +386,7 @@ class BBCCoUkIE(InfoExtractor):
m3u8_id=format_id, fatal=False))
if re.search(self._USP_RE, href):
usp_formats = self._extract_m3u8_formats(
- re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
+ re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href),
programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
for f in usp_formats:
diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py
index bbeae4b..bf22a41 100644
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dl/extractor/beeg.py
@@ -60,9 +60,13 @@ class BeegIE(InfoExtractor):
beeg_version = beeg_version or '2185'
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
- video = self._download_json(
- 'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
- video_id)
+ for api_path in ('', 'api.'):
+ video = self._download_json(
+ 'https://%sbeeg.com/api/v6/%s/video/%s'
+ % (api_path, beeg_version, video_id), video_id,
+ fatal=api_path == 'api.')
+ if video:
+ break
def split(o, e):
def cut(s, x):
diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py
index aada029..6899f84 100644
--- a/youtube_dl/extractor/canvas.py
+++ b/youtube_dl/extractor/canvas.py
@@ -3,24 +3,104 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import float_or_none
+from ..utils import (
+ float_or_none,
+ strip_or_none,
+)
class CanvasIE(InfoExtractor):
+ _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+ 'md5': '90139b746a0a9bd7bb631283f6e2a64e',
+ 'info_dict': {
+ 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+ 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+ 'ext': 'flv',
+ 'title': 'Nachtwacht: De Greystook',
+ 'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1468.03,
+ },
+ 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
+ }, {
+ 'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site_id, video_id = mobj.group('site_id'), mobj.group('id')
+
+ data = self._download_json(
+ 'https://mediazone.vrt.be/api/v1/%s/assets/%s'
+ % (site_id, video_id), video_id)
+
+ title = data['title']
+ description = data.get('description')
+
+ formats = []
+ for target in data['targetUrls']:
+ format_url, format_type = target.get('url'), target.get('type')
+ if not format_url or not format_type:
+ continue
+ if format_type == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_type, fatal=False))
+ elif format_type == 'HDS':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_type, fatal=False))
+ elif format_type == 'MPEG_DASH':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id=format_type, fatal=False))
+ elif format_type == 'HSS':
+ formats.extend(self._extract_ism_formats(
+ format_url, video_id, ism_id='mss', fatal=False))
+ else:
+ formats.append({
+ 'format_id': format_type,
+ 'url': format_url,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ subtitle_urls = data.get('subtitleUrls')
+ if isinstance(subtitle_urls, list):
+ for subtitle in subtitle_urls:
+ subtitle_url = subtitle.get('url')
+ if subtitle_url and subtitle.get('type') == 'CLOSED':
+ subtitles.setdefault('nl', []).append({'url': subtitle_url})
+
+ return {
+ 'id': video_id,
+ 'display_id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'duration': float_or_none(data.get('duration'), 1000),
+ 'thumbnail': data.get('posterImageUrl'),
+ 'subtitles': subtitles,
+ }
+
+
+class CanvasEenIE(InfoExtractor):
IE_DESC = 'canvas.be and een.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
- 'md5': 'ea838375a547ac787d4064d8c7860a6c',
+ 'md5': 'ed66976748d12350b118455979cca293',
'info_dict': {
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'De afspraak veilt voor de Warmste Week',
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 49.02,
- }
+ },
+ 'expected_warnings': ['is not a supported codec'],
}, {
# with subtitles
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
@@ -40,7 +120,8 @@ class CanvasIE(InfoExtractor):
},
'params': {
'skip_download': True,
- }
+ },
+ 'skip': 'Pagina niet gevonden',
}, {
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
'info_dict': {
@@ -54,7 +135,8 @@ class CanvasIE(InfoExtractor):
},
'params': {
'skip_download': True,
- }
+ },
+ 'skip': 'Episode no longer available',
}, {
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
'only_matching': True,
@@ -66,55 +148,21 @@ class CanvasIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
- title = (self._search_regex(
+ title = strip_or_none(self._search_regex(
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
webpage, 'title', default=None) or self._og_search_title(
- webpage)).strip()
+ webpage, default=None))
video_id = self._html_search_regex(
- r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
-
- data = self._download_json(
- 'https://mediazone.vrt.be/api/v1/%s/assets/%s'
- % (site_id, video_id), display_id)
-
- formats = []
- for target in data['targetUrls']:
- format_url, format_type = target.get('url'), target.get('type')
- if not format_url or not format_type:
- continue
- if format_type == 'HLS':
- formats.extend(self._extract_m3u8_formats(
- format_url, display_id, entry_protocol='m3u8_native',
- ext='mp4', preference=0, fatal=False, m3u8_id=format_type))
- elif format_type == 'HDS':
- formats.extend(self._extract_f4m_formats(
- format_url, display_id, f4m_id=format_type, fatal=False))
- elif format_type == 'MPEG_DASH':
- formats.extend(self._extract_mpd_formats(
- format_url, display_id, mpd_id=format_type, fatal=False))
- else:
- formats.append({
- 'format_id': format_type,
- 'url': format_url,
- })
- self._sort_formats(formats)
-
- subtitles = {}
- subtitle_urls = data.get('subtitleUrls')
- if isinstance(subtitle_urls, list):
- for subtitle in subtitle_urls:
- subtitle_url = subtitle.get('url')
- if subtitle_url and subtitle.get('type') == 'CLOSED':
- subtitles.setdefault('nl', []).append({'url': subtitle_url})
+ r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
+ group='id')
return {
+ '_type': 'url_transparent',
+ 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
+ 'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'title': title,
'description': self._og_search_description(webpage),
- 'formats': formats,
- 'duration': float_or_none(data.get('duration'), 1000),
- 'thumbnail': data.get('posterImageUrl'),
- 'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py
index e928942..81108e7 100644
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -81,6 +81,12 @@ class Channel9IE(InfoExtractor):
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
+ webpage)
+
def _extract_list(self, video_id, rss_url=None):
if not rss_url:
rss_url = self._RSS_URL % video_id
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index 4cac294..d08b909 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -120,13 +120,16 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor):
class ComedyCentralShortnameIE(InfoExtractor):
- _VALID_URL = r'^:(?P<id>tds|thedailyshow)$'
+ _VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
_TESTS = [{
'url': ':tds',
'only_matching': True,
}, {
'url': ':thedailyshow',
'only_matching': True,
+ }, {
+ 'url': ':theopposition',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -134,5 +137,6 @@ class ComedyCentralShortnameIE(InfoExtractor):
shortcut_map = {
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
+ 'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
}
return self.url_result(shortcut_map[video_id])
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 2bbbf8f..a692406 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1920,7 +1920,7 @@ class InfoExtractor(object):
# can't be used at the same time
if '%(Number' in media_template and 's' not in representation_ms_info:
segment_duration = None
- if 'total_number' not in representation_ms_info and 'segment_duration':
+ if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
representation_ms_info['fragments'] = [{
@@ -2322,7 +2322,6 @@ class InfoExtractor(object):
formats = self._parse_jwplayer_formats(
video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
- self._sort_formats(formats)
subtitles = {}
tracks = video_data.get('tracks')
@@ -2339,16 +2338,25 @@ class InfoExtractor(object):
'url': self._proto_relative_url(track_url)
})
- entries.append({
+ entry = {
'id': this_video_id,
- 'title': video_data['title'] if require_title else video_data.get('title'),
+ 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles,
- 'formats': formats,
- })
+ }
+ # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
+ if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
+ entry.update({
+ '_type': 'url_transparent',
+ 'url': formats[0]['url'],
+ })
+ else:
+ self._sort_formats(formats)
+ entry['formats'] = formats
+ entries.append(entry)
if len(entries) == 1:
return entries[0]
else:
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index e9d0dd19..21a2d02 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -235,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
# vevo embed
vevo_id = self._search_regex(
- r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
+ r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)',
webpage, 'vevo embed', default=None)
if vevo_id:
return self.url_result('vevo:%s' % vevo_id, 'Vevo')
diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py
index ec87b94..a38b268 100644
--- a/youtube_dl/extractor/deezer.py
+++ b/youtube_dl/extractor/deezer.py
@@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor):
'id': '176747451',
'title': 'Best!',
'uploader': 'Anonymous',
- 'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$',
+ 'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$',
},
'playlist_count': 30,
'skip': 'Only available in .de',
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 4232a4f..ecb33bc 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -31,10 +31,7 @@ from .aenetworks import (
AENetworksIE,
HistoryTopicIE,
)
-from .afreecatv import (
- AfreecaTVIE,
- AfreecaTVGlobalIE,
-)
+from .afreecatv import AfreecaTVIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
@@ -150,7 +147,10 @@ from .camdemy import (
from .camwithher import CamWithHerIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
-from .canvas import CanvasIE
+from .canvas import (
+ CanvasIE,
+ CanvasEenIE,
+)
from .carambatv import (
CarambaTVIE,
CarambaTVPageIE,
@@ -381,6 +381,7 @@ from .freesound import FreesoundIE
from .freespeech import FreespeechIE
from .freshlive import FreshLiveIE
from .funimation import FunimationIE
+from .funk import FunkIE
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
from .fxnetworks import FXNetworksIE
@@ -940,6 +941,7 @@ from .skynewsarabia import (
)
from .skysports import SkySportsIE
from .slideshare import SlideshareIE
+from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
from .smotri import (
SmotriIE,
@@ -1243,7 +1245,10 @@ from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE
from .voicerepublic import VoiceRepublicIE
from .voot import VootIE
-from .voxmedia import VoxMediaIE
+from .voxmedia import (
+ VoxMediaVolumeIE,
+ VoxMediaIE,
+)
from .vporn import VpornIE
from .vrt import VRTIE
from .vrak import VrakIE
@@ -1342,7 +1347,6 @@ from .youtube import (
YoutubeSearchDateIE,
YoutubeSearchIE,
YoutubeSearchURLIE,
- YoutubeSharedVideoIE,
YoutubeShowIE,
YoutubeSubscriptionsIE,
YoutubeTruncatedIDIE,
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 4b3f6cc..220ada3 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -67,9 +67,9 @@ class FacebookIE(InfoExtractor):
'uploader': 'Tennis on Facebook',
'upload_date': '20140908',
'timestamp': 1410199200,
- }
+ },
+ 'skip': 'Requires logging in',
}, {
- 'note': 'Video without discernible title',
'url': 'https://www.facebook.com/video.php?v=274175099429670',
'info_dict': {
'id': '274175099429670',
@@ -78,6 +78,7 @@ class FacebookIE(InfoExtractor):
'uploader': 'Asif Nawab Butt',
'upload_date': '20140506',
'timestamp': 1399398998,
+ 'thumbnail': r're:^https?://.*',
},
'expected_warnings': [
'title'
@@ -94,6 +95,7 @@ class FacebookIE(InfoExtractor):
'upload_date': '20160110',
'timestamp': 1452431627,
},
+ 'skip': 'Requires logging in',
}, {
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
@@ -121,7 +123,11 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '10153664894881749',
'ext': 'mp4',
- 'title': 'Facebook video #10153664894881749',
+ 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
+ 'thumbnail': r're:^https?://.*',
+ 'timestamp': 1456259628,
+ 'upload_date': '20160223',
+ 'uploader': 'Barack Obama',
},
}, {
# have 1080P, but only up to 720p in swf params
@@ -130,10 +136,11 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '10155529876156509',
'ext': 'mp4',
- 'title': 'Holocaust survivor becomes US citizen',
+ 'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...',
'timestamp': 1477818095,
'upload_date': '20161030',
'uploader': 'CNN',
+ 'thumbnail': r're:^https?://.*',
},
}, {
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@@ -158,6 +165,7 @@ class FacebookIE(InfoExtractor):
'timestamp': 1477305000,
'upload_date': '20161024',
'uploader': 'La Guía Del Varón',
+ 'thumbnail': r're:^https?://.*',
},
'params': {
'skip_download': True,
@@ -376,6 +384,7 @@ class FacebookIE(InfoExtractor):
timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None))
+ thumbnail = self._og_search_thumbnail(webpage)
info_dict = {
'id': video_id,
@@ -383,6 +392,7 @@ class FacebookIE(InfoExtractor):
'formats': formats,
'uploader': uploader,
'timestamp': timestamp,
+ 'thumbnail': thumbnail,
}
return webpage, info_dict
diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py
index facc665..5f98d01 100644
--- a/youtube_dl/extractor/fox.py
+++ b/youtube_dl/extractor/fox.py
@@ -2,7 +2,10 @@
from __future__ import unicode_literals
from .adobepass import AdobePassIE
+from .uplynk import UplynkPreplayIE
+from ..compat import compat_str
from ..utils import (
+ HEADRequest,
int_or_none,
parse_age_limit,
parse_duration,
@@ -53,14 +56,7 @@ class FOXIE(AdobePassIE):
})
title = video['name']
-
- m3u8_url = self._download_json(
- video['videoRelease']['url'], video_id)['playURL']
-
- formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
+ release_url = video['videoRelease']['url']
description = video.get('description')
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
@@ -84,7 +80,7 @@ class FOXIE(AdobePassIE):
# TODO: AP
pass
- return {
+ info = {
'id': video_id,
'title': title,
'description': description,
@@ -97,5 +93,22 @@ class FOXIE(AdobePassIE):
'episode': episode,
'episode_number': episode_number,
'release_year': release_year,
- 'formats': formats,
}
+
+ urlh = self._request_webpage(HEADRequest(release_url), video_id)
+ video_url = compat_str(urlh.geturl())
+
+ if UplynkPreplayIE.suitable(video_url):
+ info.update({
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'ie_key': UplynkPreplayIE.ie_key(),
+ })
+ else:
+ m3u8_url = self._download_json(release_url, video_id)['playURL']
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+ info['formats'] = formats
+ return info
diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py
index 0a70ca7..7fa271b 100644
--- a/youtube_dl/extractor/freespeech.py
+++ b/youtube_dl/extractor/freespeech.py
@@ -27,7 +27,7 @@ class FreespeechIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
- info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info')
+ info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info')
info = json.loads(info_json)
return {
diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py
new file mode 100644
index 0000000..ce5c67f
--- /dev/null
+++ b/youtube_dl/extractor/funk.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .nexx import NexxIE
+from ..utils import extract_attributes
+
+
+class FunkIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)'
+ _TESTS = [{
+ 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/',
+ 'md5': '4d40974481fa3475f8bccfd20c5361f8',
+ 'info_dict': {
+ 'id': '716599',
+ 'ext': 'mp4',
+ 'title': 'Neue Rechte Welle',
+ 'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69',
+ 'timestamp': 1501337639,
+ 'upload_date': '20170729',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ domain_id = NexxIE._extract_domain_id(webpage) or '741'
+ nexx_id = extract_attributes(self._search_regex(
+ r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
+ webpage, 'media player'))['data-id']
+
+ return self.url_result(
+ 'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
+ video_id=nexx_id)
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index 00d3111..02804d2 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -105,7 +105,7 @@ class GameSpotIE(OnceIE):
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
if onceux_url:
formats.extend(self._extract_once_formats(re.sub(
- r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', '')))
+ r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url)))
if not formats:
for quality in ['sd', 'hd']:
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 7d0edf0..2a9c3e2 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -22,6 +22,8 @@ from ..utils import (
HEADRequest,
is_html,
js_to_json,
+ KNOWN_EXTENSIONS,
+ mimetype2ext,
orderedSet,
sanitized_Request,
smuggle_url,
@@ -99,6 +101,7 @@ from .mediaset import MediasetIE
from .joj import JojIE
from .megaphone import MegaphoneIE
from .vzaar import VzaarIE
+from .channel9 import Channel9IE
class GenericIE(InfoExtractor):
@@ -1088,7 +1091,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20150212',
'uploader': 'The National Archives UK',
- 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
+ 'description': 'md5:8078af856dca76edc42910b61273dbbf',
'uploader_id': 'NationalArchives08',
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
},
@@ -1104,7 +1107,8 @@ class GenericIE(InfoExtractor):
},
'params': {
'skip_download': True,
- }
+ },
+ 'skip': 'does not contain a video anymore',
},
# Complex jwplayer
{
@@ -1113,6 +1117,7 @@ class GenericIE(InfoExtractor):
'id': 'videos',
'ext': 'mp4',
'title': 'king machine trailer 1',
+ 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
@@ -1130,13 +1135,42 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
+ {
+ # Video.js embed, multiple formats
+ 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
+ 'info_dict': {
+ 'id': 'yygqldloqIk',
+ 'ext': 'mp4',
+ 'title': 'SolidWorks. Урок 6 Настройка чертежа',
+ 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
+ 'upload_date': '20130314',
+ 'uploader': 'PROстое3D',
+ 'uploader_id': 'PROstoe3D',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Video.js embed, single format
+ 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
+ 'info_dict': {
+ 'id': 'watch',
+ 'ext': 'mp4',
+ 'title': 'Step 1 - Good Foundation',
+ 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
# rtl.nl embed
{
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
'playlist_mincount': 5,
'info_dict': {
'id': 'aanslagen-kopenhagen',
- 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
+ 'title': 'Aanslagen Kopenhagen',
}
},
# Zapiks embed
@@ -1268,6 +1302,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ 'skip': 'This video is unavailable.',
},
# Pladform embed
{
@@ -1281,6 +1316,7 @@ class GenericIE(InfoExtractor):
'duration': 694,
'age_limit': 0,
},
+ 'skip': 'HTTP Error 404: Not Found',
},
# Playwire embed
{
@@ -1301,6 +1337,14 @@ class GenericIE(InfoExtractor):
'id': '518726732',
'ext': 'mp4',
'title': 'Facebook Creates "On This Day" | Crunch Report',
+ 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
+ 'timestamp': 1427237531,
+ 'uploader': 'Crunch Report',
+ 'upload_date': '20150324',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
},
},
# SVT embed
@@ -1352,16 +1396,20 @@ class GenericIE(InfoExtractor):
'upload_date': '20140107',
'timestamp': 1389118457,
},
+ 'skip': 'Invalid Page URL',
},
# NBC News embed
{
'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
'md5': '1aa589c675898ae6d37a17913cf68d66',
'info_dict': {
- 'id': '701714499682',
+ 'id': 'x_dtl_oa_LettermanliftPR_160608',
'ext': 'mp4',
- 'title': 'PREVIEW: On Assignment: David Letterman',
+ 'title': 'David Letterman: A Preview',
'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
+ 'upload_date': '20160609',
+ 'timestamp': 1465431544,
+ 'uploader': 'NBCU-NEWS',
},
},
# UDN embed
@@ -1378,6 +1426,7 @@ class GenericIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
+ 'expected_warnings': ['Failed to parse JSON Expecting value'],
},
# Ooyala embed
{
@@ -1385,7 +1434,7 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
'ext': 'mp4',
- 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
+ 'description': 'Index/Match versus VLOOKUP.',
'title': 'This is what separates the Excel masters from the wannabes',
'duration': 191.933,
},
@@ -1423,7 +1472,8 @@ class GenericIE(InfoExtractor):
'upload_date': '20150622',
'uploader': 'Public Sénat',
'uploader_id': 'xa9gza',
- }
+ },
+ 'skip': 'File not found.',
},
# OnionStudios embed
{
@@ -1581,22 +1631,6 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['BrightcoveLegacy'],
},
- # Nexx embed
- {
- 'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503',
- 'info_dict': {
- 'id': '247746',
- 'ext': 'mp4',
- 'title': "Yesterday's Jam (OV)",
- 'description': 'md5:09bc0984723fed34e2581624a84e05f0',
- 'timestamp': 1492594816,
- 'upload_date': '20170419',
- },
- 'params': {
- 'format': 'bestvideo',
- 'skip_download': True,
- },
- },
# Facebook <iframe> embed
{
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
@@ -2175,7 +2209,7 @@ class GenericIE(InfoExtractor):
# And then there are the jokers who advertise that they use RTA,
# but actually don't.
AGE_LIMIT_MARKERS = [
- r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
+ r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
]
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
age_limit = 18
@@ -2237,7 +2271,7 @@ class GenericIE(InfoExtractor):
# Look for embedded rtl.nl player
matches = re.findall(
- r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
+ r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
webpage)
if matches:
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
@@ -2636,7 +2670,7 @@ class GenericIE(InfoExtractor):
# Look for UDN embeds
mobj = re.search(
- r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
+ r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None:
return self.url_result(
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
@@ -2840,6 +2874,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
+ channel9_urls = Channel9IE._extract_urls(webpage)
+ if channel9_urls:
+ return self.playlist_from_matches(
+ channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
+
def merge_dicts(dict1, dict2):
merged = {}
for k, v in dict1.items():
@@ -2880,6 +2919,46 @@ class GenericIE(InfoExtractor):
jwplayer_data, video_id, require_title=False, base_url=url)
return merge_dicts(info, info_dict)
+ # Video.js embed
+ mobj = re.search(
+ r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
+ webpage)
+ if mobj is not None:
+ sources = self._parse_json(
+ mobj.group(1), video_id, transform_source=js_to_json,
+ fatal=False) or []
+ if not isinstance(sources, list):
+ sources = [sources]
+ formats = []
+ for source in sources:
+ src = source.get('src')
+ if not src or not isinstance(src, compat_str):
+ continue
+ src = compat_urlparse.urljoin(url, src)
+ src_type = source.get('type')
+ if isinstance(src_type, compat_str):
+ src_type = src_type.lower()
+ ext = determine_ext(src).lower()
+ if src_type == 'video/youtube':
+ return self.url_result(src, YoutubeIE.ie_key())
+ if src_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
+ elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ 'ext': (mimetype2ext(src_type) or
+ ext if ext in KNOWN_EXTENSIONS else 'mp4'),
+ })
+ if formats:
+ self._sort_formats(formats)
+ info_dict['formats'] = formats
+ return info_dict
+
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')
@@ -2973,7 +3052,7 @@ class GenericIE(InfoExtractor):
# be supported by youtube-dl thus this is checked the very last (see
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
- if embed_url:
+ if embed_url and embed_url != url:
return self.url_result(embed_url)
if not found:
diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py
index 45ccc11..a0670b6 100644
--- a/youtube_dl/extractor/gfycat.py
+++ b/youtube_dl/extractor/gfycat.py
@@ -11,7 +11,7 @@ from ..utils import (
class GfycatIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
'info_dict': {
@@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor):
'categories': list,
'age_limit': 0,
}
+ }, {
+ 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
+ 'only_matching': True
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py
index 427499b..6b927bb 100644
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor):
'width': int(width),
'height': int(height),
} for width, height, video_url in re.findall(
- r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)]
+ r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)]
self._sort_formats(formats)
return {
diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py
index 2be68ab..cf90ab3 100644
--- a/youtube_dl/extractor/howstuffworks.py
+++ b/youtube_dl/extractor/howstuffworks.py
@@ -11,45 +11,20 @@ from ..utils import (
class HowStuffWorksIE(InfoExtractor):
- _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
+ _VALID_URL = r'https?://[\da-z-]+\.(?:howstuffworks|stuff(?:(?:youshould|theydontwantyouto)know|toblowyourmind|momnevertoldyou)|(?:brain|car)stuffshow|fwthinking|geniusstuff)\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
_TESTS = [
{
- 'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
+ 'url': 'http://www.stufftoblowyourmind.com/videos/optical-illusions-video.htm',
+ 'md5': '76646a5acc0c92bf7cd66751ca5db94d',
'info_dict': {
- 'id': '450221',
- 'ext': 'flv',
- 'title': 'Cool Jobs - Iditarod Musher',
- 'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.',
- 'display_id': 'cool-jobs-iditarod-musher',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 161,
- },
- 'skip': 'Video broken',
- },
- {
- 'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
- 'info_dict': {
- 'id': '453464',
- 'ext': 'mp4',
- 'title': 'Survival Zone: Food and Water In the Savanna',
- 'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.',
- 'display_id': 'survival-zone-food-and-water-in-the-savanna',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- },
- {
- 'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
- 'info_dict': {
- 'id': '440011',
+ 'id': '855410',
'ext': 'mp4',
- 'title': 'Sword Swallowing #1 by Dan Meyer',
- 'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
- 'display_id': 'sword-swallowing-1-by-dan-meyer',
- 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': 'Your Trickster Brain: Optical Illusions -- Science on the Web',
+ 'description': 'md5:e374ff9561f6833ad076a8cc0a5ab2fb',
},
},
{
- 'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm',
+ 'url': 'http://shows.howstuffworks.com/more-shows/why-does-balloon-stick-to-hair-video.htm',
'only_matching': True,
}
]
diff --git a/youtube_dl/extractor/hrti.py b/youtube_dl/extractor/hrti.py
index 656ce6d..6424d34 100644
--- a/youtube_dl/extractor/hrti.py
+++ b/youtube_dl/extractor/hrti.py
@@ -104,7 +104,7 @@ class HRTiIE(HRTiBaseIE):
(?:
hrti:(?P<short_id>[0-9]+)|
https?://
- hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
+ hrti\.hrt\.hr/(?:\#/)?video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
)
'''
_TESTS = [{
@@ -129,6 +129,9 @@ class HRTiIE(HRTiBaseIE):
}, {
'url': 'hrti:2181385',
'only_matching': True,
+ }, {
+ 'url': 'https://hrti.hrt.hr/video/show/3873068/cuvar-dvorca-dramska-serija-14',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -170,7 +173,7 @@ class HRTiIE(HRTiBaseIE):
class HRTiPlaylistIE(HRTiBaseIE):
- _VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
+ _VALID_URL = r'https?://hrti\.hrt\.hr/(?:#/)?video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
_TESTS = [{
'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
'info_dict': {
@@ -182,6 +185,9 @@ class HRTiPlaylistIE(HRTiBaseIE):
}, {
'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
'only_matching': True,
+ }, {
+ 'url': 'https://hrti.hrt.hr/video/list/category/212/ekumena',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py
index c1367cf..a96ea80 100644
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -203,7 +203,7 @@ class PCMagIE(IGNIE):
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
IE_NAME = 'pcmag'
- _EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]'
+ _EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
_TESTS = [{
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py
index fe425e7..c3e892f 100644
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -8,7 +8,10 @@ from ..compat import (
compat_urllib_parse_unquote,
compat_urlparse,
)
-from ..utils import determine_ext
+from ..utils import (
+ determine_ext,
+ update_url_query,
+)
from .bokecc import BokeCCBaseIE
@@ -68,21 +71,22 @@ class InfoQIE(BokeCCBaseIE):
'play_path': playpath,
}]
- def _extract_cookies(self, webpage):
- policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
- signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
- key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
- return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
- policy, signature, key_pair_id)
+ def _extract_cf_auth(self, webpage):
+ policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
+ signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
+ key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
+ return {
+ 'Policy': policy,
+ 'Signature': signature,
+ 'Key-Pair-Id': key_pair_id,
+ }
def _extract_http_video(self, webpage):
http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
+ http_video_url = update_url_query(http_video_url, self._extract_cf_auth(webpage))
return [{
'format_id': 'http_video',
'url': http_video_url,
- 'http_headers': {
- 'Cookie': self._extract_cookies(webpage)
- },
}]
def _extract_http_audio(self, webpage, video_id):
@@ -91,22 +95,20 @@ class InfoQIE(BokeCCBaseIE):
if not http_audio_url:
return []
- cookies_header = {'Cookie': self._extract_cookies(webpage)}
-
# base URL is found in the Location header in the response returned by
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
+ http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
# audio file seem to be missing some times even if there is a download link
# so probe URL to make sure
- if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
+ if not self._is_valid_url(http_audio_url, video_id):
return []
return [{
'format_id': 'http_audio',
'url': http_audio_url,
'vcodec': 'none',
- 'http_headers': cookies_header,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py
index 1a4227f..e9f4ed7 100644
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
webpage = self._download_webpage(url, title)
title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
config_url = self._html_search_regex(
- r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
+ r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"',
webpage, 'config URL')
config_url = 'http://www.jeuxvideo.com' + config_url
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index 138d484..bdac2df 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -287,6 +287,9 @@ class KalturaIE(InfoExtractor):
# skip for now.
if f.get('fileExt') == 'chun':
continue
+ # DRM-protected video, cannot be decrypted
+ if f.get('fileExt') == 'wvm':
+ continue
if not f.get('fileExt'):
# QT indicates QuickTime; some videos have broken fileExt
if f.get('containerFormat') == 'qt':
diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py
index fb9c2db..93a98e1 100644
--- a/youtube_dl/extractor/ketnet.py
+++ b/youtube_dl/extractor/ketnet.py
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
+from .canvas import CanvasIE
from .common import InfoExtractor
@@ -7,7 +8,7 @@ class KetnetIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
- 'md5': 'd907f7b1814ef0fa285c0475d9994ed7',
+ 'md5': '6bdeb65998930251bbd1c510750edba9',
'info_dict': {
'id': 'zomerse-filmpjes',
'ext': 'mp4',
@@ -16,6 +17,20 @@ class KetnetIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
+ # mzid in playerConfig instead of sources
+ 'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook',
+ 'md5': '90139b746a0a9bd7bb631283f6e2a64e',
+ 'info_dict': {
+ 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+ 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+ 'ext': 'flv',
+ 'title': 'Nachtwacht: De Greystook',
+ 'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1468.03,
+ },
+ 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
+ }, {
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
'only_matching': True,
}, {
@@ -38,6 +53,12 @@ class KetnetIE(InfoExtractor):
'player config'),
video_id)
+ mzid = config.get('mzid')
+ if mzid:
+ return self.url_result(
+ 'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
+ CanvasIE.ie_key(), video_id=mzid)
+
title = config['title']
formats = []
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 7f946c6..317ebbc 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -338,7 +338,7 @@ class LivestreamOriginalIE(InfoExtractor):
info = {
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
- 'thumbnail': self._search_regex(r'channelLogo.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
+ 'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
}
video_data = self._download_json(stream_url, content_id)
is_live = video_data.get('isLive')
diff --git a/youtube_dl/extractor/lnkgo.py b/youtube_dl/extractor/lnkgo.py
index 068378c..cfec0d3 100644
--- a/youtube_dl/extractor/lnkgo.py
+++ b/youtube_dl/extractor/lnkgo.py
@@ -11,7 +11,7 @@ from ..utils import (
class LnkGoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
+ _VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
_TESTS = [{
'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
'info_dict': {
@@ -42,6 +42,9 @@ class LnkGoIE(InfoExtractor):
'params': {
'skip_download': True, # HLS download
},
+ }, {
+ 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
+ 'only_matching': True,
}]
_AGE_LIMITS = {
'N-7': 7,
diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py
index 3c34d46..8eda69c 100644
--- a/youtube_dl/extractor/makertv.py
+++ b/youtube_dl/extractor/makertv.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
class MakerTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
+ _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_TEST = {
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py
index 1885ac7..dbd761a 100644
--- a/youtube_dl/extractor/mangomolo.py
+++ b/youtube_dl/extractor/mangomolo.py
@@ -22,7 +22,7 @@ class MangomoloBaseIE(InfoExtractor):
format_url = self._html_search_regex(
[
- r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)',
+ r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
r'<a[^>]+href="(rtsp://[^"]+)"'
], webpage, 'format url')
formats = self._extract_wowza_formats(
diff --git a/youtube_dl/extractor/meipai.py b/youtube_dl/extractor/meipai.py
index c8eacb4..2445b8b 100644
--- a/youtube_dl/extractor/meipai.py
+++ b/youtube_dl/extractor/meipai.py
@@ -11,7 +11,7 @@ from ..utils import (
class MeipaiIE(InfoExtractor):
IE_DESC = '美拍'
- _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P<id>[0-9]+)'
_TESTS = [{
# regular uploaded video
'url': 'http://www.meipai.com/media/531697625',
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index f331db8..7b2bb6e 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -291,7 +291,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
functools.partial(
self._tracks_page_func,
'%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
- self._PAGE_SIZE, use_cache=True)
+ self._PAGE_SIZE)
return self.playlist_result(
entries, video_id, '%s (%s)' % (username, list_type), description)
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 25af5dd..1154a35 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -258,7 +258,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
if mgid is None or ':' not in mgid:
mgid = self._search_regex(
- [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
+ [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
webpage, 'mgid', default=None)
if not mgid:
diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py
index 6bb64eb..367e811 100644
--- a/youtube_dl/extractor/myvideo.py
+++ b/youtube_dl/extractor/myvideo.py
@@ -160,7 +160,7 @@ class MyVideoIE(InfoExtractor):
else:
video_playpath = ''
- video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
+ video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
video_swfobj = compat_urllib_parse_unquote(video_swfobj)
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py
index b91d865..9e8d28f 100644
--- a/youtube_dl/extractor/nationalgeographic.py
+++ b/youtube_dl/extractor/nationalgeographic.py
@@ -111,7 +111,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
release_url = self._search_regex(
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
webpage, 'release url')
- theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path')
+ theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path')
video_id = theplatform_path.split('/')[-1]
query = {
'mbr': 'true',
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
index e813133..2047d44 100644
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -43,7 +43,7 @@ class NaverIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
+ m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"',
webpage)
if m_id is None:
error = self._html_search_regex(
diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py
index 5356196..be295a7 100644
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -122,7 +122,7 @@ class NBAIE(TurnerBaseIE):
playlist_title = self._og_search_title(webpage, fatal=False)
entries = OnDemandPagedList(
functools.partial(self._fetch_page, team, video_id),
- self._PAGE_SIZE, use_cache=True)
+ self._PAGE_SIZE)
return self.playlist_result(entries, team, playlist_title)
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index 836a41f..35151f5 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -15,7 +15,7 @@ from ..utils import (
class NBCIE(AdobePassIE):
- _VALID_URL = r'(?P<permalink>https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
+ _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
_TESTS = [
{
@@ -72,6 +72,7 @@ class NBCIE(AdobePassIE):
def _real_extract(self, url):
permalink, video_id = re.match(self._VALID_URL, url).groups()
+ permalink = 'http' + permalink
video_data = self._download_json(
'https://api.nbc.com/v3/videos', video_id, query={
'filter[permalink]': permalink,
diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py
index d0235fd..071879b 100644
--- a/youtube_dl/extractor/nexx.py
+++ b/youtube_dl/extractor/nexx.py
@@ -18,7 +18,13 @@ from ..utils import (
class NexxIE(InfoExtractor):
- _VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
+ nexx:(?P<domain_id_s>\d+):
+ )
+ (?P<id>\d+)
+ '''
_TESTS = [{
# movie
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
@@ -62,9 +68,19 @@ class NexxIE(InfoExtractor):
}, {
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
'only_matching': True,
+ }, {
+ 'url': 'nexx:748:128907',
+ 'only_matching': True,
}]
@staticmethod
+ def _extract_domain_id(webpage):
+ mobj = re.search(
+ r'<script\b[^>]+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
+ webpage)
+ return mobj.group('id') if mobj else None
+
+ @staticmethod
def _extract_urls(webpage):
# Reference:
# 1. https://nx-s.akamaized.net/files/201510/44.pdf
@@ -72,11 +88,8 @@ class NexxIE(InfoExtractor):
entries = []
# JavaScript Integration
- mobj = re.search(
- r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
- webpage)
- if mobj:
- domain_id = mobj.group('id')
+ domain_id = NexxIE._extract_domain_id(webpage)
+ if domain_id:
for video_id in re.findall(
r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
webpage):
@@ -112,7 +125,8 @@ class NexxIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- domain_id, video_id = mobj.group('domain_id', 'id')
+ domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
+ video_id = mobj.group('id')
# Reverse engineered from JS code (see getDeviceID function)
device_id = '%d:%d:%d%d' % (
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index fa4ef20..b8fe244 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -469,7 +469,7 @@ class SchoolTVIE(NPODataMidEmbedIE):
class HetKlokhuisIE(NPODataMidEmbedIE):
IE_NAME = 'hetklokhuis'
- _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',
diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py
index 1bf96ea..a637c8e 100644
--- a/youtube_dl/extractor/once.py
+++ b/youtube_dl/extractor/once.py
@@ -7,7 +7,7 @@ from .common import InfoExtractor
class OnceIE(InfoExtractor):
- _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
+ _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py
index 1d336cf..c6e3d56 100644
--- a/youtube_dl/extractor/onionstudios.py
+++ b/youtube_dl/extractor/onionstudios.py
@@ -13,11 +13,11 @@ from ..utils import (
class OnionStudiosIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)'
+ _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
_TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
- 'md5': 'e49f947c105b8a78a675a0ee1bddedfe',
+ 'md5': '719d1f8c32094b8c33902c17bcae5e34',
'info_dict': {
'id': '2937',
'ext': 'mp4',
@@ -29,12 +29,15 @@ class OnionStudiosIE(InfoExtractor):
}, {
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
'only_matching': True,
+ }, {
+ 'url': 'http://www.onionstudios.com/video/6139.json',
+ 'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/embed.+?)\1', webpage)
+ r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1', webpage)
if mobj:
return mobj.group('url')
diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py
index a4a5d39..ee04936 100644
--- a/youtube_dl/extractor/pornflip.py
+++ b/youtube_dl/extractor/pornflip.py
@@ -14,7 +14,7 @@ from ..utils import (
class PornFlipIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})'
+ _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z-]{11})'
_TESTS = [{
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
'md5': '98c46639849145ae1fd77af532a9278c',
@@ -34,6 +34,12 @@ class PornFlipIE(InfoExtractor):
}, {
'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
'only_matching': True,
+ }, {
+ 'url': 'https://www.pornflip.com/v/EkRD6-vS2-s',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py
index 01c85ee..f36bc64 100644
--- a/youtube_dl/extractor/reddit.py
+++ b/youtube_dl/extractor/reddit.py
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -35,6 +37,8 @@ class RedditIE(InfoExtractor):
'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+
return {
'id': video_id,
'title': video_id,
@@ -43,7 +47,7 @@ class RedditIE(InfoExtractor):
class RedditRIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/]+)'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
_TESTS = [{
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
'info_dict': {
@@ -81,10 +85,13 @@ class RedditRIE(InfoExtractor):
}]
def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ url, video_id = mobj.group('url', 'id')
+
video_id = self._match_id(url)
data = self._download_json(
- url + '.json', video_id)[0]['data']['children'][0]['data']
+ url + '/.json', video_id)[0]['data']['children'][0]['data']
video_url = data['url']
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index 3e22998..bba25a2 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -12,10 +12,10 @@ class RtlNlIE(InfoExtractor):
IE_NAME = 'rtl.nl'
IE_DESC = 'rtl.nl and rtlxl.nl'
_VALID_URL = r'''(?x)
- https?://(?:www\.)?
+ https?://(?:(?:www|static)\.)?
(?:
rtlxl\.nl/[^\#]*\#!/[^/]+/|
- rtl\.nl/(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=|video/)
+ rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)
)
(?P<id>[0-9a-f-]+)'''
@@ -73,6 +73,9 @@ class RtlNlIE(InfoExtractor):
}, {
'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
'only_matching': True,
+ }, {
+ 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py
index 746677a..d9edf9d 100644
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -10,6 +10,7 @@ from ..compat import (
compat_struct_unpack,
)
from ..utils import (
+ determine_ext,
ExtractorError,
float_or_none,
remove_end,
@@ -85,6 +86,18 @@ class RTVEALaCartaIE(InfoExtractor):
},
'skip': 'The f4m manifest can\'t be used yet',
}, {
+ 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
+ 'md5': 'e55e162379ad587e9640eda4f7353c0f',
+ 'info_dict': {
+ 'id': '4236788',
+ 'ext': 'mp4',
+ 'title': 'Servir y proteger - Capítulo 104 ',
+ 'duration': 3222.0,
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ }, {
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
'only_matching': True,
}, {
@@ -107,24 +120,41 @@ class RTVEALaCartaIE(InfoExtractor):
video_id)['page']['items'][0]
if info['state'] == 'DESPU':
raise ExtractorError('The video is no longer available', expected=True)
+ title = info['title']
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
png_request = sanitized_Request(png_url)
png_request.add_header('Referer', url)
png = self._download_webpage(png_request, video_id, 'Downloading url information')
video_url = _decrypt_url(png)
- if not video_url.endswith('.f4m'):
+ ext = determine_ext(video_url)
+
+ formats = []
+ if not video_url.endswith('.f4m') and ext != 'm3u8':
if '?' not in video_url:
video_url = video_url.replace('resources/', 'auth/resources/')
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id='hds', fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ })
+ self._sort_formats(formats)
+
subtitles = None
if info.get('sbtFile') is not None:
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
return {
'id': video_id,
- 'title': info['title'],
- 'url': video_url,
+ 'title': title,
+ 'formats': formats,
'thumbnail': info.get('image'),
'page_url': url,
'subtitles': subtitles,
diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py
index 2b830cf..3c8053a 100644
--- a/youtube_dl/extractor/ruhd.py
+++ b/youtube_dl/extractor/ruhd.py
@@ -25,7 +25,7 @@ class RUHDIE(InfoExtractor):
video_url = self._html_search_regex(
r'<param name="src" value="([^"]+)"', webpage, 'video url')
title = self._html_search_regex(
- r'<title>([^<]+)&nbsp;&nbsp; RUHD.ru - Видео Высокого качества №1 в России!</title>',
+ r'<title>([^<]+)&nbsp;&nbsp; RUHD\.ru - Видео Высокого качества №1 в России!</title>',
webpage, 'title')
description = self._html_search_regex(
r'(?s)<div id="longdesc">(.+?)<span id="showlink">',
diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py
index 597d6f5..b446a02 100644
--- a/youtube_dl/extractor/scrippsnetworks.py
+++ b/youtube_dl/extractor/scrippsnetworks.py
@@ -1,60 +1,190 @@
# coding: utf-8
from __future__ import unicode_literals
-from .adobepass import AdobePassIE
+import datetime
+import json
+import hashlib
+import hmac
+import re
+
+from .common import InfoExtractor
+from .anvato import AnvatoIE
from ..utils import (
- int_or_none,
smuggle_url,
- update_url_query,
+ urlencode_postdata,
+ xpath_text,
)
-class ScrippsNetworksWatchIE(AdobePassIE):
+class ScrippsNetworksWatchIE(InfoExtractor):
IE_NAME = 'scrippsnetworks:watch'
- _VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P<id>\d+)'
- _TEST = {
- 'url': 'http://watch.hgtv.com/player.HNT.html#0256538',
+ _VALID_URL = r'''(?x)
+ https?://
+ watch\.
+ (?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/
+ (?:
+ player\.[A-Z0-9]+\.html\#|
+ show/(?:[^/]+/){2}|
+ player/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/',
'md5': '26545fd676d939954c6808274bdb905a',
'info_dict': {
- 'id': '0256538',
+ 'id': '4173834',
'ext': 'mp4',
- 'title': 'Seeking a Wow House',
- 'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.',
- 'uploader': 'SCNI',
- 'upload_date': '20170207',
- 'timestamp': 1486450493,
+ 'title': 'Best Ever Treehouses',
+ 'description': "We're searching for the most over the top treehouses.",
+ 'uploader': 'ANV',
+ 'upload_date': '20170922',
+ 'timestamp': 1506056400,
+ },
+ 'params': {
+ 'skip_download': True,
},
- 'skip': 'requires TV provider authentication',
+ 'add_ie': [AnvatoIE.ie_key()],
+ }, {
+ 'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://watch.diynetwork.com/player.HNT.html#2656646',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
+ 'only_matching': True,
+ }]
+
+ _SNI_TABLE = {
+ 'hgtv': 'hgtv',
+ 'diynetwork': 'diy',
+ 'foodnetwork': 'food',
+ 'cookingchanneltv': 'cook',
+ 'travelchannel': 'trav',
+ 'geniuskitchen': 'genius',
}
+ _SNI_HOST = 'web.api.video.snidigital.com'
+
+ _AWS_REGION = 'us-east-1'
+ _AWS_IDENTITY_ID_JSON = json.dumps({
+ 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION
+ })
+ _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
+ _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
+ _AWS_SERVICE = 'execute-api'
+ _AWS_REQUEST = 'aws4_request'
+ _AWS_SIGNED_HEADERS = ';'.join([
+ 'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key'])
+ _AWS_CANONICAL_REQUEST_TEMPLATE = '''GET
+%(uri)s
+
+host:%(host)s
+x-amz-date:%(date)s
+x-amz-security-token:%(token)s
+x-api-key:%(key)s
+
+%(signed_headers)s
+%(payload_hash)s'''
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- channel = self._parse_json(self._search_regex(
- r'"channels"\s*:\s*(\[.+\])',
- webpage, 'channels'), video_id)[0]
- video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id)
- title = video_data['title']
- release_url = video_data['releaseUrl']
- if video_data.get('restricted'):
- requestor_id = self._search_regex(
- r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id')
- resource = self._get_mvpd_resource(
- requestor_id, title, video_id,
- video_data.get('ratings', [{}])[0].get('rating'))
- auth = self._extract_mvpd_auth(
- url, video_id, requestor_id, resource)
- release_url = update_url_query(release_url, {'auth': auth})
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'title': title,
- 'url': smuggle_url(release_url, {'force_smil_url': True}),
- 'description': video_data.get('description'),
- 'thumbnail': video_data.get('thumbnailUrl'),
- 'series': video_data.get('showTitle'),
- 'season_number': int_or_none(video_data.get('season')),
- 'episode_number': int_or_none(video_data.get('episodeNumber')),
- 'ie_key': 'ThePlatform',
+ mobj = re.match(self._VALID_URL, url)
+ site_id, video_id = mobj.group('site', 'id')
+
+ def aws_hash(s):
+ return hashlib.sha256(s.encode('utf-8')).hexdigest()
+
+ token = self._download_json(
+ 'https://cognito-identity.us-east-1.amazonaws.com/', video_id,
+ data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'),
+ headers={
+ 'Accept': '*/*',
+ 'Content-Type': 'application/x-amz-json-1.1',
+ 'Referer': url,
+ 'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON),
+ 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
+ 'X-Amz-User-Agent': self._AWS_USER_AGENT,
+ })['Token']
+
+ sts = self._download_xml(
+ 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
+ 'Action': 'AssumeRoleWithWebIdentity',
+ 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
+ 'RoleSessionName': 'web-identity',
+ 'Version': '2011-06-15',
+ 'WebIdentityToken': token,
+ }), headers={
+ 'Referer': url,
+ 'X-Amz-User-Agent': self._AWS_USER_AGENT,
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
+ })
+
+ def get(key):
+ return xpath_text(
+ sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
+ fatal=True)
+
+ access_key_id = get('AccessKeyId')
+ secret_access_key = get('SecretAccessKey')
+ session_token = get('SessionToken')
+
+ # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
+ uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id)
+ datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
+ date = datetime_now[:8]
+ canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % {
+ 'uri': uri,
+ 'host': self._SNI_HOST,
+ 'date': datetime_now,
+ 'token': session_token,
+ 'key': self._AWS_API_KEY,
+ 'signed_headers': self._AWS_SIGNED_HEADERS,
+ 'payload_hash': aws_hash(''),
}
+
+ # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
+ credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST])
+ string_to_sign = '\n'.join([
+ 'AWS4-HMAC-SHA256', datetime_now, credential_string,
+ aws_hash(canonical_string)])
+
+ # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
+ def aws_hmac(key, msg):
+ return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
+
+ def aws_hmac_digest(key, msg):
+ return aws_hmac(key, msg).digest()
+
+ def aws_hmac_hexdigest(key, msg):
+ return aws_hmac(key, msg).hexdigest()
+
+ k_secret = 'AWS4' + secret_access_key
+ k_date = aws_hmac_digest(k_secret.encode('utf-8'), date)
+ k_region = aws_hmac_digest(k_date, self._AWS_REGION)
+ k_service = aws_hmac_digest(k_region, self._AWS_SERVICE)
+ k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST)
+
+ signature = aws_hmac_hexdigest(k_signing, string_to_sign)
+
+ auth_header = ', '.join([
+ 'AWS4-HMAC-SHA256 Credential=%s' % '/'.join(
+ [access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]),
+ 'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS,
+ 'Signature=%s' % signature,
+ ])
+
+ mcp_id = self._download_json(
+ 'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={
+ 'Accept': '*/*',
+ 'Referer': url,
+ 'Authorization': auth_header,
+ 'X-Amz-Date': datetime_now,
+ 'X-Amz-Security-Token': session_token,
+ 'X-Api-Key': self._AWS_API_KEY,
+ })['results'][0]['mcpId']
+
+ return self.url_result(
+ smuggle_url(
+ 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
+ {'geo_countries': ['US']}),
+ AnvatoIE.ie_key(), video_id=mcp_id)
diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py
index 62d41e8..374f7fa 100644
--- a/youtube_dl/extractor/shahid.py
+++ b/youtube_dl/extractor/shahid.py
@@ -18,46 +18,32 @@ from ..utils import (
class ShahidIE(InfoExtractor):
_NETRC_MACHINE = 'shahid'
- _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?P<type>episode|movie)/(?P<id>\d+)'
+ _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
+ 'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286',
'info_dict': {
- 'id': '90574',
+ 'id': '275286',
'ext': 'mp4',
- 'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3',
- 'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان',
- 'duration': 2972,
- 'timestamp': 1422057420,
- 'upload_date': '20150123',
+ 'title': 'مجلس الشباب الموسم 1 كليب 1',
+ 'timestamp': 1506988800,
+ 'upload_date': '20171003',
},
'params': {
# m3u8 download
'skip_download': True,
}
}, {
- 'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html',
+ 'url': 'https://shahid.mbc.net/ar/movies/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9/movie-151746',
'only_matching': True
}, {
# shahid plus subscriber only
- 'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
+ 'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511',
'only_matching': True
}]
- def _real_initialize(self):
- email, password = self._get_login_info()
- if email is None:
- return
-
+ def _api2_request(self, *args, **kwargs):
try:
- user_data = self._download_json(
- 'https://shahid.mbc.net/wd/service/users/login',
- None, 'Logging in', data=json.dumps({
- 'email': email,
- 'password': password,
- 'basic': 'false',
- }).encode('utf-8'), headers={
- 'Content-Type': 'application/json; charset=UTF-8',
- })['user']
+ return self._download_json(*args, **kwargs)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
fail_data = self._parse_json(
@@ -69,6 +55,21 @@ class ShahidIE(InfoExtractor):
raise ExtractorError(faults_message, expected=True)
raise
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ user_data = self._api2_request(
+ 'https://shahid.mbc.net/wd/service/users/login',
+ None, 'Logging in', data=json.dumps({
+ 'email': email,
+ 'password': password,
+ 'basic': 'false',
+ }).encode('utf-8'), headers={
+ 'Content-Type': 'application/json; charset=UTF-8',
+ })['user']
+
self._download_webpage(
'https://shahid.mbc.net/populateContext',
None, 'Populate Context', data=urlencode_postdata({
@@ -93,15 +94,17 @@ class ShahidIE(InfoExtractor):
def _real_extract(self, url):
page_type, video_id = re.match(self._VALID_URL, url).groups()
+ if page_type == 'clip':
+ page_type = 'episode'
- player = self._get_api_data(self._download_json(
- 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id,
- video_id, 'Downloading player JSON'))
+ playout = self._api2_request(
+ 'https://api2.shahid.net/proxy/v2/playout/url/' + video_id,
+ video_id, 'Downloading player JSON')['playout']
- if player.get('drm'):
+ if playout.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
- formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
+ formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
self._sort_formats(formats)
video = self._get_api_data(self._download_json(
diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py
new file mode 100644
index 0000000..1045760
--- /dev/null
+++ b/youtube_dl/extractor/slideslive.py
@@ -0,0 +1,34 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SlidesLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
+ 'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
+ 'info_dict': {
+ 'id': 'LMtgR8ba0b0',
+ 'ext': 'mp4',
+ 'title': '38902413: external video',
+ 'description': '3890241320170925-9-1yd6ech.mp4',
+ 'uploader': 'SlidesLive Administrator',
+ 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
+ 'upload_date': '20170925',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ url, video_id, headers={'Accept': 'application/json'})
+ service_name = video_data['video_service_name']
+ if service_name == 'YOUTUBE':
+ yt_video_id = video_data['video_service_id']
+ return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id)
+ else:
+ raise ExtractorError(
+ 'Unsupported service name: {0}'.format(service_name), expected=True)
diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py
index c59896a..a7b1b3b 100644
--- a/youtube_dl/extractor/spike.py
+++ b/youtube_dl/extractor/spike.py
@@ -44,6 +44,7 @@ class SpikeIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
_CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)')
+ _GEO_COUNTRIES = ['US']
def _extract_mgid(self, webpage):
mgid = super(SpikeIE, self)._extract_mgid(webpage)
diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py
index cce65fb..ae3dd13 100644
--- a/youtube_dl/extractor/stanfordoc.py
+++ b/youtube_dl/extractor/stanfordoc.py
@@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
r'(?s)<description>([^<]+)</description>',
coursepage, 'description', fatal=False)
- links = orderedSet(re.findall(r'<a href="(VideoPage.php\?[^"]+)">', coursepage))
+ links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage))
info['entries'] = [self.url_result(
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
) for l in links]
@@ -84,7 +84,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
rootpage = self._download_webpage(rootURL, info['id'],
errnote='Unable to download course info page')
- links = orderedSet(re.findall(r'<a href="(CoursePage.php\?[^"]+)">', rootpage))
+ links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage))
info['entries'] = [self.url_result(
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
) for l in links]
diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py
index 1a831ef..e5ac586 100644
--- a/youtube_dl/extractor/steam.py
+++ b/youtube_dl/extractor/steam.py
@@ -4,8 +4,10 @@ import re
from .common import InfoExtractor
from ..utils import (
+ extract_attributes,
ExtractorError,
- unescapeHTML,
+ get_element_by_class,
+ js_to_json,
)
@@ -25,35 +27,39 @@ class SteamIE(InfoExtractor):
'url': 'http://store.steampowered.com/video/105600/',
'playlist': [
{
- 'md5': 'f870007cee7065d7c76b88f0a45ecc07',
+ 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
'info_dict': {
- 'id': '81300',
- 'ext': 'flv',
- 'title': 'Terraria 1.1 Trailer',
+ 'id': '2040428',
+ 'ext': 'mp4',
+ 'title': 'Terraria 1.3 Trailer',
'playlist_index': 1,
}
},
{
- 'md5': '61aaf31a5c5c3041afb58fb83cbb5751',
+ 'md5': '911672b20064ca3263fa89650ba5a7aa',
'info_dict': {
- 'id': '80859',
- 'ext': 'flv',
- 'title': 'Terraria Trailer',
+ 'id': '2029566',
+ 'ext': 'mp4',
+ 'title': 'Terraria 1.2 Trailer',
'playlist_index': 2,
}
}
],
+ 'info_dict': {
+ 'id': '105600',
+ 'title': 'Terraria',
+ },
'params': {
'playlistend': 2,
}
}, {
'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
'info_dict': {
- 'id': 'WB5DvDOOvAY',
+ 'id': 'X8kpJBlzD2E',
'ext': 'mp4',
- 'upload_date': '20140329',
- 'title': 'FRONTIERS - Final Greenlight Trailer',
- 'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9',
+ 'upload_date': '20140617',
+ 'title': 'FRONTIERS - Trapping',
+ 'description': 'md5:bf6f7f773def614054089e5769c12a6e',
'uploader': 'AAD Productions',
'uploader_id': 'AtomicAgeDogGames',
}
@@ -76,48 +82,65 @@ class SteamIE(InfoExtractor):
self.report_age_confirmation()
webpage = self._download_webpage(videourl, playlist_id)
+ flash_vars = self._parse_json(self._search_regex(
+ r'(?s)rgMovieFlashvars\s*=\s*({.+?});', webpage,
+ 'flash vars'), playlist_id, js_to_json)
+
+ playlist_title = None
+ entries = []
if fileID:
- playlist_title = self._html_search_regex(
- r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title')
- mweb = re.finditer(r'''(?x)
- 'movie_(?P<videoID>[0-9]+)':\s*\{\s*
- YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)",
- ''', webpage)
- videos = [{
- '_type': 'url',
- 'url': vid.group('youtube_id'),
- 'ie_key': 'Youtube',
- } for vid in mweb]
+ playlist_title = get_element_by_class('workshopItemTitle', webpage)
+ for movie in flash_vars.values():
+ if not movie:
+ continue
+ youtube_id = movie.get('YOUTUBE_VIDEO_ID')
+ if not youtube_id:
+ continue
+ entries.append({
+ '_type': 'url',
+ 'url': youtube_id,
+ 'ie_key': 'Youtube',
+ })
else:
- playlist_title = self._html_search_regex(
- r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title')
-
- mweb = re.finditer(r'''(?x)
- 'movie_(?P<videoID>[0-9]+)':\s*\{\s*
- FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)"
- (,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},
- ''', webpage)
- titles = re.finditer(
- r'<span class="title">(?P<videoName>.+?)</span>', webpage)
- thumbs = re.finditer(
- r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage)
- videos = []
-
- for vid, vtitle, thumb in zip(mweb, titles, thumbs):
- video_id = vid.group('videoID')
- title = vtitle.group('videoName')
- video_url = vid.group('videoURL')
- video_thumb = thumb.group('thumbnail')
- if not video_url:
- raise ExtractorError('Cannot find video url for %s' % video_id)
- videos.append({
+ playlist_title = get_element_by_class('apphub_AppName', webpage)
+ for movie_id, movie in flash_vars.items():
+ if not movie:
+ continue
+ video_id = self._search_regex(r'movie_(\d+)', movie_id, 'video id', fatal=False)
+ title = movie.get('MOVIE_NAME')
+ if not title or not video_id:
+ continue
+ entry = {
'id': video_id,
- 'url': video_url,
- 'ext': 'flv',
- 'title': unescapeHTML(title),
- 'thumbnail': video_thumb
- })
- if not videos:
+ 'title': title.replace('+', ' '),
+ }
+ formats = []
+ flv_url = movie.get('FILENAME')
+ if flv_url:
+ formats.append({
+ 'format_id': 'flv',
+ 'url': flv_url,
+ })
+ highlight_element = self._search_regex(
+ r'(<div[^>]+id="highlight_movie_%s"[^>]+>)' % video_id,
+ webpage, 'highlight element', fatal=False)
+ if highlight_element:
+ highlight_attribs = extract_attributes(highlight_element)
+ if highlight_attribs:
+ entry['thumbnail'] = highlight_attribs.get('data-poster')
+ for quality in ('', '-hd'):
+ for ext in ('webm', 'mp4'):
+ video_url = highlight_attribs.get('data-%s%s-source' % (ext, quality))
+ if video_url:
+ formats.append({
+ 'format_id': ext + quality,
+ 'url': video_url,
+ })
+ if not formats:
+ continue
+ entry['formats'] = formats
+ entries.append(entry)
+ if not entries:
raise ExtractorError('Could not find any videos')
- return self.playlist_result(videos, playlist_id, playlist_title)
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index de236bb..b1a985f 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -216,7 +216,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
def hex_to_bytes(hex):
return binascii.a2b_hex(hex.encode('ascii'))
- relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
+ relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1)
clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py
index 33683b1..dc3dd03 100644
--- a/youtube_dl/extractor/thisav.py
+++ b/youtube_dl/extractor/thisav.py
@@ -57,10 +57,10 @@ class ThisAVIE(InfoExtractor):
info_dict = self._extract_jwplayer_data(
webpage, video_id, require_title=False)
uploader = self._html_search_regex(
- r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
+ r': <a href="http://www\.thisav\.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
webpage, 'uploader name', fatal=False)
uploader_id = self._html_search_regex(
- r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
+ r': <a href="http://www\.thisav\.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
webpage, 'uploader id', fatal=False)
info_dict.update({
diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py
index c44018a..36f6c16 100644
--- a/youtube_dl/extractor/tubitv.py
+++ b/youtube_dl/extractor/tubitv.py
@@ -13,11 +13,11 @@ from ..utils import (
class TubiTvIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P<id>[0-9]+)'
_LOGIN_URL = 'http://tubitv.com/login'
_NETRC_MACHINE = 'tubitv'
_GEO_COUNTRIES = ['US']
- _TEST = {
+ _TESTS = [{
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
'md5': '43ac06be9326f41912dc64ccf7a80320',
'info_dict': {
@@ -27,7 +27,13 @@ class TubiTvIE(InfoExtractor):
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
},
- }
+ }, {
+ 'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tubitv.com/movies/383676/tracker',
+ 'only_matching': True,
+ }]
def _login(self):
(username, password) = self._get_login_info()
diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py
index 3ced098..b57abea 100644
--- a/youtube_dl/extractor/tva.py
+++ b/youtube_dl/extractor/tva.py
@@ -3,52 +3,50 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- int_or_none,
- parse_iso8601,
+ float_or_none,
smuggle_url,
)
class TVAIE(InfoExtractor):
- _VALID_URL = r'https?://videos\.tva\.ca/episode/(?P<id>\d+)'
+ _VALID_URL = r'https?://videos\.tva\.ca/details/_(?P<id>\d+)'
_TEST = {
- 'url': 'http://videos.tva.ca/episode/85538',
+ 'url': 'https://videos.tva.ca/details/_5596811470001',
'info_dict': {
- 'id': '85538',
+ 'id': '5596811470001',
'ext': 'mp4',
- 'title': 'Épisode du 25 janvier 2017',
- 'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98',
- 'upload_date': '20170126',
- 'timestamp': 1485442329,
+ 'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
+ 'uploader_id': '5481942443001',
+ 'upload_date': '20171003',
+ 'timestamp': 1507064617,
},
'params': {
# m3u8 download
'skip_download': True,
}
}
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
- "https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id,
- video_id, query={
- '$expand': 'Metadata,CustomId',
- '$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName',
- '$format': 'json',
+ 'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
+ 'Accept': 'application/json',
})
- metadata = video_data.get('Metadata', {})
+
+ def get_attribute(key):
+ for attribute in video_data.get('attributes', []):
+ if attribute.get('key') == key:
+ return attribute.get('value')
+ return None
return {
'_type': 'url_transparent',
'id': video_id,
- 'title': video_data['Title'],
- 'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}),
- 'description': video_data.get('LongDescription') or video_data.get('ShortDescription'),
- 'series': video_data.get('ShowName'),
- 'episode': metadata.get('EpisodeTitle'),
- 'episode_number': int_or_none(metadata.get('EpisodeNumber')),
- 'categories': video_data.get('Categories'),
- 'average_rating': video_data.get('AverageUserRating'),
- 'timestamp': parse_iso8601(video_data.get('CreatedDate')),
- 'ie_key': 'Ooyala',
+ 'title': get_attribute('title'),
+ 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
+ 'description': get_attribute('description'),
+ 'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'),
+ 'duration': float_or_none(get_attribute('video-duration'), 1000),
+ 'ie_key': 'BrightcoveNew',
}
diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py
index 12ed603..6590e1f 100644
--- a/youtube_dl/extractor/tvn24.py
+++ b/youtube_dl/extractor/tvn24.py
@@ -9,7 +9,7 @@ from ..utils import (
class TVN24IE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+ _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
'md5': 'fbdec753d7bc29d96036808275f2130c',
@@ -18,7 +18,7 @@ class TVN24IE(InfoExtractor):
'ext': 'mp4',
'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
- 'thumbnail': 're:http://.*[.]jpeg',
+ 'thumbnail': 're:https?://.*[.]jpeg',
}
}, {
'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
@@ -29,6 +29,9 @@ class TVN24IE(InfoExtractor):
}, {
'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html',
'only_matching': True,
+ }, {
+ 'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index c5b3288..3954f0b 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -15,16 +15,16 @@ from ..utils import (
class TVPIE(InfoExtractor):
IE_NAME = 'tvp'
IE_DESC = 'Telewizja Polska'
- _VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
+ _VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
+ 'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536',
'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
'info_dict': {
'id': '194536',
'ext': 'mp4',
'title': 'Czas honoru, I seria – odc. 13',
- 'description': 'md5:76649d2014f65c99477be17f23a4dead',
+ 'description': 'md5:381afa5bca72655fe94b05cfe82bf53d',
},
}, {
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
@@ -37,12 +37,13 @@ class TVPIE(InfoExtractor):
},
}, {
# page id is not the same as video id(#7799)
- 'url': 'http://vod.tvp.pl/22704887/08122015-1500',
- 'md5': 'cf6a4705dfd1489aef8deb168d6ba742',
+ 'url': 'https://wiadomosci.tvp.pl/33908820/28092017-1930',
+ 'md5': '84cd3c8aec4840046e5ab712416b73d0',
'info_dict': {
- 'id': '22680786',
+ 'id': '33908820',
'ext': 'mp4',
- 'title': 'Wiadomości, 08.12.2015, 15:00',
+ 'title': 'Wiadomości, 28.09.2017, 19:30',
+ 'description': 'Wydanie główne codziennego serwisu informacyjnego.'
},
}, {
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 0df3ad7..1b0b963 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -174,7 +174,7 @@ class TwitterCardIE(TwitterBaseIE):
webpage = self._download_webpage(url, video_id)
iframe_url = self._html_search_regex(
- r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
+ r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
webpage, 'video iframe', default=None)
if iframe_url:
return self.url_result(iframe_url)
diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py
index daf45d0..2c8e5c7 100644
--- a/youtube_dl/extractor/udn.py
+++ b/youtube_dl/extractor/udn.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
@@ -29,6 +28,7 @@ class UDNEmbedIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
+ 'expected_warnings': ['Failed to parse JSON Expecting value'],
}, {
'url': 'https://video.udn.com/embed/news/300040',
'only_matching': True,
@@ -43,10 +43,21 @@ class UDNEmbedIE(InfoExtractor):
page = self._download_webpage(url, video_id)
- options = json.loads(js_to_json(self._html_search_regex(
- r'var\s+options\s*=\s*([^;]+);', page, 'video urls dictionary')))
-
- video_urls = options['video']
+ options_str = self._html_search_regex(
+ r'var\s+options\s*=\s*([^;]+);', page, 'options')
+ trans_options_str = js_to_json(options_str)
+ options = self._parse_json(trans_options_str, 'options', fatal=False) or {}
+ if options:
+ video_urls = options['video']
+ title = options['title']
+ poster = options.get('poster')
+ else:
+ video_urls = self._parse_json(self._html_search_regex(
+ r'"video"\s*:\s*({.+?})\s*,', trans_options_str, 'video urls'), 'video urls')
+ title = self._html_search_regex(
+ r"title\s*:\s*'(.+?)'\s*,", options_str, 'title')
+ poster = self._html_search_regex(
+ r"poster\s*:\s*'(.+?)'\s*,", options_str, 'poster', default=None)
if video_urls.get('youtube'):
return self.url_result(video_urls.get('youtube'), 'Youtube')
@@ -68,7 +79,7 @@ class UDNEmbedIE(InfoExtractor):
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id='hds'))
else:
- mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+).mp4', video_url)
+ mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+)\.mp4', video_url)
a_format = {
'url': video_url,
# video_type may be 'mp4', which confuses YoutubeDL
@@ -83,14 +94,9 @@ class UDNEmbedIE(InfoExtractor):
self._sort_formats(formats)
- thumbnails = [{
- 'url': img_url,
- 'id': img_type,
- } for img_type, img_url in options.get('gallery', [{}])[0].items() if img_url]
-
return {
'id': video_id,
'formats': formats,
- 'title': options['title'],
- 'thumbnails': thumbnails,
+ 'title': title,
+ 'thumbnail': poster,
}
diff --git a/youtube_dl/extractor/vh1.py b/youtube_dl/extractor/vh1.py
index 570fa45..dff94a2 100644
--- a/youtube_dl/extractor/vh1.py
+++ b/youtube_dl/extractor/vh1.py
@@ -1,131 +1,41 @@
+# coding: utf-8
from __future__ import unicode_literals
-from .mtv import MTVIE
+from .mtv import MTVServicesInfoExtractor
-import re
-from ..utils import fix_xml_ampersands
-
-class VH1IE(MTVIE):
+class VH1IE(MTVServicesInfoExtractor):
IE_NAME = 'vh1.com'
- _FEED_URL = 'http://www.vh1.com/player/embed/AS3/fullepisode/rss/'
+ _FEED_URL = 'http://www.vh1.com/feeds/mrss/'
_TESTS = [{
- 'url': 'http://www.vh1.com/video/metal-evolution/full-episodes/progressive-metal/1678612/playlist.jhtml',
- 'playlist': [
- {
- 'md5': '7827a7505f59633983165bbd2c119b52',
- 'info_dict': {
- 'id': '731565',
- 'ext': 'mp4',
- 'title': 'Metal Evolution: Ep. 11 Act 1',
- 'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 12 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
- }
- },
- {
- 'md5': '34fb4b7321c546b54deda2102a61821f',
- 'info_dict': {
- 'id': '731567',
- 'ext': 'mp4',
- 'title': 'Metal Evolution: Ep. 11 Act 2',
- 'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
- }
- },
- {
- 'md5': '813f38dba4c1b8647196135ebbf7e048',
- 'info_dict': {
- 'id': '731568',
- 'ext': 'mp4',
- 'title': 'Metal Evolution: Ep. 11 Act 3',
- 'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
- }
- },
- {
- 'md5': '51adb72439dfaed11c799115d76e497f',
- 'info_dict': {
- 'id': '731569',
- 'ext': 'mp4',
- 'title': 'Metal Evolution: Ep. 11 Act 4',
- 'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
- }
- },
- {
- 'md5': '93d554aaf79320703b73a95288c76a6e',
- 'info_dict': {
- 'id': '731570',
- 'ext': 'mp4',
- 'title': 'Metal Evolution: Ep. 11 Act 5',
- 'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
- }
- }
- ],
- 'skip': 'Blocked outside the US',
- }, {
- # Clip
- 'url': 'http://www.vh1.com/video/misc/706675/metal-evolution-episode-1-pre-metal-show-clip.jhtml#id=1674118',
- 'md5': '7d67cf6d9cdc6b4f3d3ac97a55403844',
+ 'url': 'http://www.vh1.com/episodes/0umwpq/hip-hop-squares-kent-jones-vs-nick-young-season-1-ep-120',
'info_dict': {
- 'id': '706675',
- 'ext': 'mp4',
- 'title': 'Metal Evolution: Episode 1 Pre-Metal Show Clip',
- 'description': 'The greatest documentary ever made about Heavy Metal begins as our host Sam Dunn travels the globe to seek out the origins and influences that helped create Heavy Metal. Sam speaks to legends like Kirk Hammett, Alice Cooper, Slash, Bill Ward, Geezer Butler, Tom Morello, Ace Frehley, Lemmy Kilmister, Dave Davies, and many many more. This episode is the prologue for the 11 hour series, and Sam goes back to the very beginning to reveal how Heavy Metal was created.'
+ 'title': 'Kent Jones vs. Nick Young',
+ 'description': 'Come to Play. Stay to Party. With Mike Epps, TIP, O’Shea Jackson Jr., T-Pain, Tisha Campbell-Martin and more.',
},
- 'skip': 'Blocked outside the US',
+ 'playlist_mincount': 4,
}, {
- # Short link
- 'url': 'http://www.vh1.com/video/play.jhtml?id=1678353',
- 'md5': '853192b87ad978732b67dd8e549b266a',
+ # Clip
+ 'url': 'http://www.vh1.com/video-clips/t74mif/scared-famous-scared-famous-extended-preview',
'info_dict': {
- 'id': '730355',
+ 'id': '0a50c2d2-a86b-4141-9565-911c7e2d0b92',
'ext': 'mp4',
- 'title': 'Metal Evolution: Episode 11 Progressive Metal Sneak',
- 'description': 'In Metal Evolution\'s finale sneak, Sam sits with Michael Giles of King Crimson and gets feedback from Metallica guitarist Kirk Hammett on why the group was influential.'
+ 'title': 'Scared Famous|October 9, 2017|1|NO-EPISODE#|Scared Famous + Extended Preview',
+ 'description': 'md5:eff5551a274c473a29463de40f7b09da',
+ 'upload_date': '20171009',
+ 'timestamp': 1507574700,
},
- 'skip': 'Blocked outside the US',
- }, {
- 'url': 'http://www.vh1.com/video/macklemore-ryan-lewis/900535/cant-hold-us-ft-ray-dalton.jhtml',
- 'md5': 'b1bcb5b4380c9d7f544065589432dee7',
- 'info_dict': {
- 'id': '900535',
- 'ext': 'mp4',
- 'title': 'Macklemore & Ryan Lewis - "Can\'t Hold Us ft. Ray Dalton"',
- 'description': 'The Heist'
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
},
- 'skip': 'Blocked outside the US',
}]
- _VALID_URL = r'''(?x)
- https?://www\.vh1\.com/video/
- (?:
- .+?/full-episodes/.+?/(?P<playlist_id>[^/]+)/playlist\.jhtml
- |
- (?:
- play.jhtml\?id=|
- misc/.+?/.+?\.jhtml\#id=
- )
- (?P<video_id>[0-9]+)$
- |
- [^/]+/(?P<music_id>[0-9]+)/[^/]+?
- )
- '''
+ _VALID_URL = r'https?://(?:www\.)?vh1\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- if mobj.group('music_id'):
- id_field = 'vid'
- video_id = mobj.group('music_id')
- else:
- video_id = mobj.group('playlist_id') or mobj.group('video_id')
- id_field = 'id'
- doc_url = '%s?%s=%s' % (self._FEED_URL, id_field, video_id)
-
- idoc = self._download_xml(
- doc_url, video_id,
- 'Downloading info', transform_source=fix_xml_ampersands)
-
- entries = []
- for item in idoc.findall('.//item'):
- info = self._get_video_info(item)
- if info:
- entries.append(info)
-
- return self.playlist_result(entries, playlist_id=video_id)
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+ mgid = self._extract_triforce_mgid(webpage)
+ videos_info = self._get_videos_info(mgid)
+ return videos_info
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
index b8b8bf9..bcc2869 100644
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@@ -198,7 +198,7 @@ class ViceShowIE(InfoExtractor):
class ViceArticleIE(InfoExtractor):
IE_NAME = 'vice:article'
- _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)'
+ _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)'
_TESTS = [{
'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
diff --git a/youtube_dl/extractor/videopremium.py b/youtube_dl/extractor/videopremium.py
index 5de8273..cf690d7 100644
--- a/youtube_dl/extractor/videopremium.py
+++ b/youtube_dl/extractor/videopremium.py
@@ -26,7 +26,7 @@ class VideoPremiumIE(InfoExtractor):
webpage_url = 'http://videopremium.tv/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
- if re.match(r'^<html><head><script[^>]*>window.location\s*=', webpage):
+ if re.match(r'^<html><head><script[^>]*>window\.location\s*=', webpage):
# Download again, we need a cookie
webpage = self._download_webpage(
webpage_url, video_id,
diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py
index f8e3314..c7a0a88 100644
--- a/youtube_dl/extractor/voxmedia.py
+++ b/youtube_dl/extractor/voxmedia.py
@@ -2,11 +2,44 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from .once import OnceIE
from ..compat import compat_urllib_parse_unquote
+from ..utils import ExtractorError
+
+
+class VoxMediaVolumeIE(OnceIE):
+ _VALID_URL = r'https?://volume\.vox-cdn\.com/embed/(?P<id>[0-9a-f]{9})'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ video_data = self._parse_json(self._search_regex(
+ r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', webpage, 'video data'), video_id)
+ for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
+ provider_video_id = video_data.get('%s_id' % provider_video_type)
+ if not provider_video_id:
+ continue
+ info = {
+ 'id': video_id,
+ 'title': video_data.get('title_short'),
+ 'description': video_data.get('description_long') or video_data.get('description_short'),
+ 'thumbnail': video_data.get('brightcove_thumbnail')
+ }
+ if provider_video_type == 'brightcove':
+ info['formats'] = self._extract_once_formats(provider_video_id)
+ self._sort_formats(info['formats'])
+ else:
+ info.update({
+ '_type': 'url_transparent',
+ 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id),
+ 'ie_key': provider_video_type.capitalize(),
+ })
+ return info
+ raise ExtractorError('Unable to find provider video id')
class VoxMediaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P<id>[^/?]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)'
_TESTS = [{
'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
'info_dict': {
@@ -31,6 +64,7 @@ class VoxMediaIE(InfoExtractor):
'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
},
'add_ie': ['Ooyala'],
+ 'skip': 'Video Not Found',
}, {
# volume embed
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
@@ -84,6 +118,17 @@ class VoxMediaIE(InfoExtractor):
'description': 'md5:e02d56b026d51aa32c010676765a690d',
},
}],
+ }, {
+ # volume embed, Brightcove Once
+ 'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya',
+ 'md5': '01571a896281f77dc06e084138987ea2',
+ 'info_dict': {
+ 'id': '1231c973d',
+ 'ext': 'mp4',
+ 'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
+ 'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
+ },
+ 'add_ie': ['VoxMediaVolume'],
}]
def _real_extract(self, url):
@@ -91,9 +136,14 @@ class VoxMediaIE(InfoExtractor):
webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id))
def create_entry(provider_video_id, provider_video_type, title=None, description=None):
+ video_url = {
+ 'youtube': '%s',
+ 'ooyala': 'ooyala:%s',
+ 'volume': 'http://volume.vox-cdn.com/embed/%s',
+ }[provider_video_type] % provider_video_id
return {
'_type': 'url_transparent',
- 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id),
+ 'url': video_url,
'title': title or self._og_search_title(webpage),
'description': description or self._og_search_description(webpage),
}
@@ -124,17 +174,7 @@ class VoxMediaIE(InfoExtractor):
volume_uuid = self._search_regex(
r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None)
if volume_uuid:
- volume_webpage = self._download_webpage(
- 'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
- video_data = self._parse_json(self._search_regex(
- r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
- for provider_video_type in ('ooyala', 'youtube'):
- provider_video_id = video_data.get('%s_id' % provider_video_type)
- if provider_video_id:
- description = video_data.get('description_long') or video_data.get('description_short')
- entries.append(create_entry(
- provider_video_id, provider_video_type, video_data.get('title_short'), description))
- break
+ entries.append(create_entry(volume_uuid, 'volume'))
if len(entries) == 1:
return entries[0]
diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py
index d44ec85..656a4b9 100644
--- a/youtube_dl/extractor/vvvvid.py
+++ b/youtube_dl/extractor/vvvvid.py
@@ -133,7 +133,7 @@ class VVVVIDIE(InfoExtractor):
'season_id': season_id,
'season_number': video_data.get('season_number'),
'episode_id': str_or_none(video_data.get('id')),
- 'epidode_number': int_or_none(video_data.get('number')),
+ 'episode_number': int_or_none(video_data.get('number')),
'episode_title': video_data['title'],
'view_count': int_or_none(video_data.get('views')),
'like_count': int_or_none(video_data.get('video_likes')),
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index 8bb7362..621de1e 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -22,8 +22,13 @@ class WDRBaseIE(InfoExtractor):
# for wdrmaus, in a tag with the class "videoButton" (previously a link
# to the page in a multiline "videoLink"-tag)
json_metadata = self._html_search_regex(
- r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
- webpage, 'media link', default=None, flags=re.MULTILINE)
+ r'''(?sx)class=
+ (?:
+ (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
+ (["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
+ )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
+ ''',
+ webpage, 'media link', default=None, group='data')
if not json_metadata:
return
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index c42b59e..be3624e 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -221,7 +221,7 @@ class XHamsterEmbedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
- r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id,
+ r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]*\.html|videos/[^/]*-{0})[^"]*)"'.format(video_id),
webpage, 'xhamster url', default=None)
if not video_url:
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py
index bea9b87..c6c0b32 100644
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -18,7 +18,7 @@ class XTubeIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
xtube:|
- https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-)
+ https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?:embedded/)?(?P<display_id>[^/]+)-)
)
(?P<id>[^/?&#]+)
'''
@@ -64,6 +64,9 @@ class XTubeIE(InfoExtractor):
}, {
'url': 'xtube:kVTUy_G222_',
'only_matching': True,
+ }, {
+ 'url': 'https://www.xtube.com/video-watch/embedded/milf-tara-and-teen-shared-and-cum-covered-extreme-bukkake-32203482?embedsize=big',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py
index eca6030..085c8d4 100644
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@@ -14,8 +14,16 @@ from ..utils import (
class XVideosIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P<id>[0-9]+)(?:.*)'
- _TEST = {
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?xvideos\.com/video|
+ flashservice\.xvideos\.com/embedframe/|
+ static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video=
+ )
+ (?P<id>[0-9]+)
+ '''
+ _TESTS = [{
'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
'md5': '14cea69fcb84db54293b1e971466c2e1',
'info_dict': {
@@ -25,21 +33,33 @@ class XVideosIE(InfoExtractor):
'duration': 108,
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'https://flashservice.xvideos.com/embedframe/4588838',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+
+ webpage = self._download_webpage(
+ 'http://www.xvideos.com/video%s/' % video_id, video_id)
mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
if mobj:
raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
- video_title = self._html_search_regex(
- r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
- video_thumbnail = self._search_regex(
+ title = self._html_search_regex(
+ (r'<title>(?P<title>.+?)\s+-\s+XVID',
+ r'setVideoTitle\s*\(\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
+ webpage, 'title', default=None,
+ group='title') or self._og_search_title(webpage)
+
+ thumbnail = self._search_regex(
r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
- video_duration = int_or_none(self._og_search_property(
+ duration = int_or_none(self._og_search_property(
'duration', webpage, default=None)) or parse_duration(
self._search_regex(
r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)',
@@ -74,8 +94,8 @@ class XVideosIE(InfoExtractor):
return {
'id': video_id,
'formats': formats,
- 'title': video_title,
- 'duration': video_duration,
- 'thumbnail': video_thumbnail,
+ 'title': title,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
'age_limit': 18,
}
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index 38f82bf..552013a 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -12,11 +12,13 @@ from ..compat import (
)
from ..utils import (
clean_html,
- unescapeHTML,
+ determine_ext,
ExtractorError,
+ extract_attributes,
int_or_none,
mimetype2ext,
- determine_ext,
+ smuggle_url,
+ unescapeHTML,
)
from .brightcove import (
@@ -28,7 +30,7 @@ from .nbc import NBCSportsVPlayerIE
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
- _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?(?:\.html)?)'
+ _VALID_URL = r'(?P<host>https?://(?:(?P<country>[a-zA-Z]{2})\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?:(?P<display_id>.+)?-)?(?P<id>[0-9]+)(?:-[a-z]+)?(?:\.html)?'
_TESTS = [
{
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@@ -50,6 +52,7 @@ class YahooIE(InfoExtractor):
'description': 'md5:66b627ab0a282b26352136ca96ce73c1',
'duration': 151,
},
+ 'skip': 'HTTP Error 404',
},
{
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
@@ -142,7 +145,7 @@ class YahooIE(InfoExtractor):
'skip': 'Domain name in.lifestyle.yahoo.com gone',
}, {
'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
- 'md5': '2a9752f74cb898af5d1083ea9f661b58',
+ 'md5': '989396ae73d20c6f057746fb226aa215',
'info_dict': {
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
'ext': 'mp4',
@@ -227,13 +230,33 @@ class YahooIE(InfoExtractor):
'skip_download': True,
},
},
+ {
+ # custom brightcove
+ 'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37083565/clown-entertainers-say-it-is-hurting-their-business/',
+ 'info_dict': {
+ 'id': '5575377707001',
+ 'ext': 'mp4',
+ 'title': "Clown entertainers say 'It' is hurting their business",
+ 'description': 'Stephen King s horror film has much to answer for. Jelby and Mr Loopy the Clowns join us.',
+ 'timestamp': 1505341164,
+ 'upload_date': '20170913',
+ 'uploader_id': '2376984109001',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # custom brightcove, geo-restricted to Australia, bypassable
+ 'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37263964/sunrise-episode-wed-27-sep/',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('display_id') or self._match_id(url)
page_id = mobj.group('id')
- url = mobj.group('url')
+ display_id = mobj.group('display_id') or page_id
host = mobj.group('host')
webpage, urlh = self._download_webpage_handle(url, display_id)
if 'err=404' in urlh.geturl():
@@ -257,10 +280,31 @@ class YahooIE(InfoExtractor):
if bc_url:
return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
+ def brightcove_url_result(bc_url):
+ return self.url_result(
+ smuggle_url(bc_url, {'geo_countries': [mobj.group('country')]}),
+ BrightcoveNewIE.ie_key())
+
# Look for Brightcove New Studio embeds
bc_url = BrightcoveNewIE._extract_url(self, webpage)
if bc_url:
- return self.url_result(bc_url, BrightcoveNewIE.ie_key())
+ return brightcove_url_result(bc_url)
+
+ brightcove_iframe = self._search_regex(
+ r'(<iframe[^>]+data-video-id=["\']\d+[^>]+>)', webpage,
+ 'brightcove iframe', default=None)
+ if brightcove_iframe:
+ attr = extract_attributes(brightcove_iframe)
+ src = attr.get('src')
+ if src:
+ parsed_src = compat_urlparse.urlparse(src)
+ qs = compat_urlparse.parse_qs(parsed_src.query)
+ account_id = qs.get('accountId', ['2376984109001'])[0]
+ brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0]
+ if account_id and brightcove_id:
+ return brightcove_url_result(
+ 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+ % (account_id, brightcove_id))
# Query result is often embedded in webpage as JSON. Sometimes explicit requests
# to video API results in a failure with geo restriction reason therefore using
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index ad2e933..4e8db24 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -332,6 +332,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
(?:www\.)?deturl\.com/www\.youtube\.com/|
(?:www\.)?pwnyoutube\.com/|
+ (?:www\.)?hooktube\.com/|
(?:www\.)?yourepeat\.com/|
tube\.majestyc\.net/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
@@ -1629,7 +1630,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
class="[^"]*"[^>]*>
[^<]+\.{3}\s*
</a>
- ''', r'\1', video_description)
+ ''', lambda m: compat_urlparse.urljoin(url, m.group(1)), video_description)
video_description = clean_html(video_description)
else:
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
@@ -1682,7 +1683,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_uploader_id = None
video_uploader_url = None
mobj = re.search(
- r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
+ r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
video_webpage)
if mobj is not None:
video_uploader_id = mobj.group('uploader_id')
@@ -2039,39 +2040,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}
-class YoutubeSharedVideoIE(InfoExtractor):
- _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P<id>[0-9A-Za-z_-]{11})'
- IE_NAME = 'youtube:shared'
-
- _TEST = {
- 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
- 'info_dict': {
- 'id': 'uPDB5I9wfp8',
- 'ext': 'webm',
- 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
- 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
- 'upload_date': '20160219',
- 'uploader': 'Pocoyo - Português (BR)',
- 'uploader_id': 'PocoyoBrazil',
- },
- 'add_ie': ['Youtube'],
- 'params': {
- # There are already too many Youtube downloads
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- real_video_id = self._html_search_meta(
- 'videoId', webpage, 'YouTube video id', fatal=True)
-
- return self.url_result(real_video_id, YoutubeIE.ie_key())
-
-
class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com playlists'
_VALID_URL = r"""(?x)(?:
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index f71d413..3ea1afc 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -44,7 +44,7 @@ ACODECS = {
'aac': 'aac',
'flac': 'flac',
'm4a': 'aac',
- 'opus': 'opus',
+ 'opus': 'libopus',
'vorbis': 'libvorbis',
'wav': None,
}
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 92b22e6..59fb334 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1933,7 +1933,7 @@ class PagedList(object):
class OnDemandPagedList(PagedList):
- def __init__(self, pagefunc, pagesize, use_cache=False):
+ def __init__(self, pagefunc, pagesize, use_cache=True):
self._pagefunc = pagefunc
self._pagesize = pagesize
self._use_cache = use_cache
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 0e2e4dc..d01ba30 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2017.09.24'
+__version__ = '2017.10.15.1'