aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWolfgang Wiedmeyer <wolfgit@wiedmeyer.de>2016-12-03 21:10:24 +0100
committerWolfgang Wiedmeyer <wolfgit@wiedmeyer.de>2016-12-03 21:10:24 +0100
commit0b9ee148f5234d4e9fef6110e512dd7e95a5641b (patch)
treec2a48510636f461b9f74cee7612f72336d0ea19d
parentb624dcf2b031a55102993dbff90c1c1d0c80fae2 (diff)
parent83442966194640d9bc00e7f3086aa5e8b25c4ae3 (diff)
downloadyoutube-dl-0b9ee148f5234d4e9fef6110e512dd7e95a5641b.zip
youtube-dl-0b9ee148f5234d4e9fef6110e512dd7e95a5641b.tar.gz
youtube-dl-0b9ee148f5234d4e9fef6110e512dd7e95a5641b.tar.bz2
Merge branch 'upstream'
-rw-r--r--.github/ISSUE_TEMPLATE.md6
-rw-r--r--.gitignore4
-rw-r--r--AUTHORS2
-rw-r--r--CONTRIBUTING.md4
-rw-r--r--ChangeLog167
-rw-r--r--Makefile2
-rw-r--r--README.md8
-rwxr-xr-xdevscripts/bash-completion.py1
-rw-r--r--devscripts/create-github-release.py17
-rwxr-xr-xdevscripts/fish-completion.py1
-rw-r--r--devscripts/generate_aes_testdata.py1
-rwxr-xr-xdevscripts/gh-pages/update-sites.py1
-rwxr-xr-xdevscripts/make_contributing.py1
-rw-r--r--devscripts/make_lazy_extractors.py1
-rw-r--r--devscripts/make_supportedsites.py1
-rw-r--r--devscripts/prepare_manpage.py1
-rwxr-xr-xdevscripts/release.sh2
-rwxr-xr-xdevscripts/zsh-completion.py1
-rw-r--r--docs/supportedsites.md21
-rw-r--r--test/test_InfoExtractor.py1
-rw-r--r--test/test_YoutubeDL.py6
-rw-r--r--test/test_aes.py1
-rw-r--r--test/test_download.py2
-rw-r--r--test/test_execution.py1
-rw-r--r--test/test_http.py1
-rw-r--r--test/test_iqiyi_sdk_interpreter.py1
-rw-r--r--test/test_jsinterp.py8
-rw-r--r--test/test_utils.py9
-rw-r--r--test/test_verbose_output.py1
-rw-r--r--test/test_write_annotations.py2
-rw-r--r--test/test_youtube_lists.py1
-rw-r--r--test/test_youtube_signature.py1
-rwxr-xr-xyoutube_dl/YoutubeDL.py2
-rw-r--r--youtube_dl/__init__.py4
-rw-r--r--youtube_dl/aes.py2
-rw-r--r--youtube_dl/compat.py2
-rw-r--r--youtube_dl/downloader/__init__.py2
-rw-r--r--youtube_dl/downloader/external.py1
-rw-r--r--youtube_dl/downloader/f4m.py8
-rw-r--r--youtube_dl/downloader/fragment.py5
-rw-r--r--youtube_dl/downloader/hls.py8
-rw-r--r--youtube_dl/downloader/ism.py271
-rw-r--r--youtube_dl/extractor/adultswim.py23
-rw-r--r--youtube_dl/extractor/aenetworks.py12
-rw-r--r--youtube_dl/extractor/afreecatv.py26
-rw-r--r--youtube_dl/extractor/amcnetworks.py5
-rw-r--r--youtube_dl/extractor/anvato.py28
-rw-r--r--youtube_dl/extractor/ard.py12
-rw-r--r--youtube_dl/extractor/audioboom.py9
-rw-r--r--youtube_dl/extractor/azubu.py11
-rw-r--r--youtube_dl/extractor/bandcamp.py86
-rw-r--r--youtube_dl/extractor/beeg.py14
-rw-r--r--youtube_dl/extractor/cbslocal.py41
-rwxr-xr-xyoutube_dl/extractor/cda.py65
-rw-r--r--youtube_dl/extractor/comedycentral.py36
-rw-r--r--youtube_dl/extractor/common.py115
-rw-r--r--youtube_dl/extractor/crunchyroll.py2
-rw-r--r--youtube_dl/extractor/dotsub.py46
-rw-r--r--youtube_dl/extractor/drtuber.py22
-rw-r--r--youtube_dl/extractor/eagleplatform.py53
-rw-r--r--youtube_dl/extractor/espn.py126
-rw-r--r--youtube_dl/extractor/extractors.py34
-rw-r--r--youtube_dl/extractor/facebook.py60
-rw-r--r--youtube_dl/extractor/fox9.py43
-rw-r--r--youtube_dl/extractor/franceculture.py4
-rw-r--r--youtube_dl/extractor/funnyordie.py49
-rw-r--r--youtube_dl/extractor/generic.py87
-rw-r--r--youtube_dl/extractor/googlesearch.py18
-rw-r--r--youtube_dl/extractor/hellporno.py12
-rw-r--r--youtube_dl/extractor/hornbunny.py33
-rw-r--r--youtube_dl/extractor/imgur.py5
-rw-r--r--youtube_dl/extractor/jamendo.py107
-rw-r--r--youtube_dl/extractor/litv.py39
-rw-r--r--youtube_dl/extractor/liveleak.py19
-rw-r--r--youtube_dl/extractor/microsoftvirtualacademy.py7
-rw-r--r--youtube_dl/extractor/mitele.py144
-rw-r--r--youtube_dl/extractor/movieclips.py2
-rw-r--r--youtube_dl/extractor/msn.py5
-rw-r--r--youtube_dl/extractor/mtv.py5
-rw-r--r--youtube_dl/extractor/nick.py40
-rw-r--r--youtube_dl/extractor/nobelprize.py62
-rw-r--r--youtube_dl/extractor/normalboots.py12
-rw-r--r--youtube_dl/extractor/nrk.py44
-rw-r--r--youtube_dl/extractor/onet.py4
-rw-r--r--youtube_dl/extractor/ooyala.py7
-rw-r--r--youtube_dl/extractor/openload.py65
-rw-r--r--youtube_dl/extractor/pandatv.py91
-rw-r--r--youtube_dl/extractor/plays.py34
-rw-r--r--youtube_dl/extractor/pluralsight.py10
-rw-r--r--youtube_dl/extractor/pornhub.py13
-rw-r--r--youtube_dl/extractor/puls4.py8
-rw-r--r--youtube_dl/extractor/radiocanada.py9
-rw-r--r--youtube_dl/extractor/redtube.py20
-rw-r--r--youtube_dl/extractor/rentv.py76
-rw-r--r--youtube_dl/extractor/ruutu.py6
-rw-r--r--youtube_dl/extractor/screenwavemedia.py146
-rw-r--r--youtube_dl/extractor/shahid.py74
-rw-r--r--youtube_dl/extractor/shared.py100
-rw-r--r--youtube_dl/extractor/soundcloud.py2
-rw-r--r--youtube_dl/extractor/spike.py20
-rw-r--r--youtube_dl/extractor/teamfourstar.py48
-rw-r--r--youtube_dl/extractor/thisoldhouse.py7
-rw-r--r--youtube_dl/extractor/tmz.py16
-rw-r--r--youtube_dl/extractor/toutv.py11
-rw-r--r--youtube_dl/extractor/tubitv.py39
-rw-r--r--youtube_dl/extractor/tvanouvelles.py65
-rw-r--r--youtube_dl/extractor/tvp.py6
-rw-r--r--youtube_dl/extractor/twitch.py3
-rw-r--r--youtube_dl/extractor/twitter.py5
-rw-r--r--youtube_dl/extractor/vessel.py10
-rw-r--r--youtube_dl/extractor/vevo.py35
-rw-r--r--youtube_dl/extractor/vice.py125
-rw-r--r--youtube_dl/extractor/viceland.py82
-rw-r--r--youtube_dl/extractor/viki.py31
-rw-r--r--youtube_dl/extractor/vimeo.py23
-rw-r--r--youtube_dl/extractor/vk.py15
-rw-r--r--youtube_dl/extractor/vlive.py19
-rw-r--r--youtube_dl/extractor/vodlocker.py3
-rw-r--r--youtube_dl/extractor/vzaar.py55
-rw-r--r--youtube_dl/extractor/webcaster.py102
-rw-r--r--youtube_dl/extractor/yahoo.py29
-rw-r--r--youtube_dl/extractor/youtube.py7
-rw-r--r--youtube_dl/jsinterp.py4
-rw-r--r--youtube_dl/socks.py3
-rw-r--r--youtube_dl/swfinterp.py3
-rw-r--r--youtube_dl/utils.py20
-rw-r--r--youtube_dl/version.py2
127 files changed, 2796 insertions, 802 deletions
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 0a051ad..36559dd 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@
---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.21.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.21.1**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.01**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.10.21.1
+[debug] youtube-dl version 2016.12.01
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/.gitignore b/.gitignore
index bbb63bd..82c4a03 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,6 +30,10 @@ updates_key.pem
*.m4v
*.mp3
*.3gp
+*.wav
+*.ape
+*.mkv
+*.swf
*.part
*.swp
test/testdata
diff --git a/AUTHORS b/AUTHORS
index 2d0b0c6..4a6f7e1 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -188,3 +188,5 @@ Xie Yanbo
Philip Xu
John Hawkinson
Rich Leeper
+Zhong Jianxin
+Thor77
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 29f52cb..495955b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -92,7 +92,7 @@ If you want to create a build of youtube-dl yourself, you'll need
### Adding support for a new site
-If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
+If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
@@ -245,7 +245,7 @@ Say `meta` from the previous example has a `title` and you are about to extract
title = meta['title']
```
-If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
+If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
diff --git a/ChangeLog b/ChangeLog
index 4987fb7..bf5f269 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,170 @@
+version <unreleased>
+
+Extractors
++ [thisoldhouse] Recognize /tv-episode/ URLs (#11271)
+
+version 2016.12.01
+
+Extractors
+* [soundcloud] Update client id (#11327)
+* [ruutu] Detect DRM protected videos
++ [liveleak] Add support for youtube embeds (#10688)
+* [spike] Fix full episodes support (#11312)
+* [comedycentral] Fix full episodes support
+* [normalboots] Rewrite in terms of JWPlatform (#11184)
+* [teamfourstar] Rewrite in terms of JWPlatform (#11184)
+- [screenwavemedia] Remove extractor (#11184)
+
+
+version 2016.11.27
+
+Extractors
++ [webcaster] Add support for webcaster.pro
++ [azubu] Add support for azubu.uol.com.br (#11305)
+* [viki] Prefer hls formats
+* [viki] Fix rtmp formats extraction (#11255)
+* [puls4] Relax URL regular expression (#11267)
+* [vevo] Improve artist extraction (#10911)
+* [mitele] Relax URL regular expression and extract more metadata (#11244)
++ [cbslocal] Recognize New York site (#11285)
++ [youtube:playlist] Pass disable_polymer in URL query (#11193)
+
+
+version 2016.11.22
+
+Extractors
+* [hellporno] Fix video extension extraction (#11247)
++ [hellporno] Add support for hellporno.net (#11247)
++ [amcnetworks] Recognize more BBC America URLs (#11263)
+* [funnyordie] Improve extraction (#11208)
+* [extractor/generic] Improve limelight embeds support
+- [crunchyroll] Remove ScaledBorderAndShadow from ASS subtitles (#8207, #9028)
+* [bandcamp] Fix free downloads extraction and extract all formats (#11067)
+* [twitter:card] Relax URL regular expression (#11225)
++ [tvanouvelles] Add support for tvanouvelles.ca (#10616)
+
+
+version 2016.11.18
+
+Extractors
+* [youtube:live] Relax URL regular expression (#11164)
+* [openload] Fix extraction (#10408, #11122)
+* [vlive] Prefer locale over language for subtitles id (#11203)
+
+
+version 2016.11.14.1
+
+Core
++ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict
+* [extractor/common] Fix media templates with Bandwidth substitution pattern in
+ MPD manifests (#11175)
+* [extractor/common] Improve thumbnail extraction from JSON-LD
+
+Extractors
++ [nrk] Workaround geo restriction
++ [nrk] Improve error detection and messages
++ [afreecatv] Add support for vod.afreecatv.com (#11174)
+* [cda] Fix and improve extraction (#10929, #10936)
+* [plays] Fix extraction (#11165)
+* [eagleplatform] Fix extraction (#11160)
++ [audioboom] Recognize /posts/ URLs (#11149)
+
+
+version 2016.11.08.1
+
+Extractors
+* [espn:article] Fix support for espn.com articles
+* [franceculture] Fix extraction (#11140)
+
+
+version 2016.11.08
+
+Extractors
+* [tmz:article] Fix extraction (#11052)
+* [espn] Fix extraction (#11041)
+* [mitele] Fix extraction after website redesign (#10824)
+- [ard] Remove age restriction check (#11129)
+* [generic] Improve support for pornhub.com embeds (#11100)
++ [generic] Add support for redtube.com embeds (#11099)
++ [generic] Add support for drtuber.com embeds (#11098)
++ [redtube] Add support for embed URLs
++ [drtuber] Add support for embed URLs
++ [yahoo] Improve content id extraction (#11088)
+* [toutv] Relax URL regular expression (#11121)
+
+
+version 2016.11.04
+
+Core
+* [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8
+ manifests (#11113)
+* [downloader/ism] Fix AVC Decoder Configuration Record
+
+Extractors
++ [fox9] Add support for fox9.com (#11110)
++ [anvato] Extract more metadata and improve formats extraction
+* [vodlocker] Improve removed videos detection (#11106)
++ [vzaar] Add support for vzaar.com (#11093)
++ [vice] Add support for uplynk preplay videos (#11101)
+* [tubitv] Fix extraction (#11061)
++ [shahid] Add support for authentication (#11091)
++ [radiocanada] Add subtitles support (#11096)
++ [generic] Add support for ISM manifests
+
+
+version 2016.11.02
+
+Core
++ Add basic support for Smooth Streaming protocol (#8118, #10969)
+* Improve MPD manifest base URL extraction (#10909, #11079)
+* Fix --match-filter for int-like strings (#11082)
+
+Extractors
++ [mva] Add support for ISM formats
++ [msn] Add support for ISM formats
++ [onet] Add support for ISM formats
++ [tvp] Add support for ISM formats
++ [nicknight] Add support for nicknight sites (#10769)
+
+
+version 2016.10.30
+
+Extractors
+* [facebook] Improve 1080P video detection (#11073)
+* [imgur] Recognize /r/ URLs (#11071)
+* [beeg] Fix extraction (#11069)
+* [openload] Fix extraction (#10408)
+* [gvsearch] Modernize and fix search request (#11051)
+* [adultswim] Fix extraction (#10979)
++ [nobelprize] Add support for nobelprize.org (#9999)
+* [hornbunny] Fix extraction (#10981)
+* [tvp] Improve video id extraction (#10585)
+
+
+version 2016.10.26
+
+Extractors
++ [rentv] Add support for ren.tv (#10620)
++ [ard] Detect unavailable videos (#11018)
+* [vk] Fix extraction (#11022)
+
+
+version 2016.10.25
+
+Core
+* Running youtube-dl in the background is fixed (#10996, #10706, #955)
+
+Extractors
++ [jamendo] Add support for jamendo.com (#10132, #10736)
++ [pandatv] Add support for panda.tv (#10736)
++ [dotsub] Support Vimeo embed (#10964)
+* [litv] Fix extraction
++ [vimeo] Delegate ondemand redirects to ondemand extractor (#10994)
+* [vivo] Fix extraction (#11003)
++ [twitch:stream] Add support for rebroadcasts (#10995)
+* [pluralsight] Fix subtitles conversion (#10990)
+
+
version 2016.10.21.1
Extractors
diff --git a/Makefile b/Makefile
index 8d66e48..9d1ddc9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean:
- rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
+ rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete
find . -name "*.class" -delete
diff --git a/README.md b/README.md
index 0fbf325..8409322 100644
--- a/README.md
+++ b/README.md
@@ -664,7 +664,7 @@ $ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
# Download best format available but not better that 480p
$ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
-# Download best video only format but no bigger that 50 MB
+# Download best video only format but no bigger than 50 MB
$ youtube-dl -f 'best[filesize<50M]'
# Download best format available via direct link over HTTP/HTTPS protocol
@@ -758,7 +758,7 @@ Once the video is fully downloaded, use any video player, such as [mpv](https://
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
-It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl.
+It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`.
It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.
@@ -930,7 +930,7 @@ If you want to create a build of youtube-dl yourself, you'll need
### Adding support for a new site
-If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
+If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
@@ -1083,7 +1083,7 @@ Say `meta` from the previous example has a `title` and you are about to extract
title = meta['title']
```
-If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
+If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py
index ce68f26..3d13913 100755
--- a/devscripts/bash-completion.py
+++ b/devscripts/bash-completion.py
@@ -25,5 +25,6 @@ def build_completion(opt_parser):
filled_template = template.replace("{{flags}}", " ".join(opts_flag))
f.write(filled_template)
+
parser = youtube_dl.parseOpts()[0]
build_completion(parser)
diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py
index 3b8021e..30716ad 100644
--- a/devscripts/create-github-release.py
+++ b/devscripts/create-github-release.py
@@ -2,11 +2,13 @@
from __future__ import unicode_literals
import base64
+import io
import json
import mimetypes
import netrc
import optparse
import os
+import re
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -90,16 +92,23 @@ class GitHubReleaser(object):
def main():
- parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH')
+ parser = optparse.OptionParser(usage='%prog CHANGELOG VERSION BUILDPATH')
options, args = parser.parse_args()
- if len(args) != 2:
+ if len(args) != 3:
parser.error('Expected a version and a build directory')
- version, build_path = args
+ changelog_file, version, build_path = args
+
+ with io.open(changelog_file, encoding='utf-8') as inf:
+ changelog = inf.read()
+
+ mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
+ body = mobj.group(1) if mobj else ''
releaser = GitHubReleaser()
- new_release = releaser.create_release(version, name='youtube-dl %s' % version)
+ new_release = releaser.create_release(
+ version, name='youtube-dl %s' % version, body=body)
release_id = new_release['id']
for asset in os.listdir(build_path):
diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
index 41629d8..51d19dd 100755
--- a/devscripts/fish-completion.py
+++ b/devscripts/fish-completion.py
@@ -44,5 +44,6 @@ def build_completion(opt_parser):
with open(FISH_COMPLETION_FILE, 'w') as f:
f.write(filled_template)
+
parser = youtube_dl.parseOpts()[0]
build_completion(parser)
diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py
index 2e389fc..e3df42c 100644
--- a/devscripts/generate_aes_testdata.py
+++ b/devscripts/generate_aes_testdata.py
@@ -23,6 +23,7 @@ def openssl_encode(algo, key, iv):
out, _ = prog.communicate(secret_msg)
return out
+
iv = key = [0x20, 0x15] + 14 * [0]
r = openssl_encode('aes-128-cbc', key, iv)
diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py
index 503c137..531c93c 100755
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@@ -32,5 +32,6 @@ def main():
with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
sitesf.write(template)
+
if __name__ == '__main__':
main()
diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py
index 5e454a4..226d1a5 100755
--- a/devscripts/make_contributing.py
+++ b/devscripts/make_contributing.py
@@ -28,5 +28,6 @@ def main():
with io.open(outfile, 'w', encoding='utf-8') as outf:
outf.write(out)
+
if __name__ == '__main__':
main()
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 9a79c2b..19114d3 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -59,6 +59,7 @@ def build_lazy_ie(ie, name):
s += make_valid_template.format(valid_url=ie._make_valid_url())
return s
+
# find the correct sorting and add the required base classes so that sublcasses
# can be correctly created
classes = _ALL_CLASSES[:-1]
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index 8cb4a46..764795b 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -41,5 +41,6 @@ def main():
with io.open(outfile, 'w', encoding='utf-8') as outf:
outf.write(out)
+
if __name__ == '__main__':
main()
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index ce54873..f9fe63f 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -74,5 +74,6 @@ def filter_options(readme):
return ret
+
if __name__ == '__main__':
main()
diff --git a/devscripts/release.sh b/devscripts/release.sh
index 1af61aa..4db5def 100755
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -110,7 +110,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
ROOT=$(pwd)
-python devscripts/create-github-release.py $version "$ROOT/build/$version"
+python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version"
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py
index 04728e8..60aaf76 100755
--- a/devscripts/zsh-completion.py
+++ b/devscripts/zsh-completion.py
@@ -44,5 +44,6 @@ def build_completion(opt_parser):
with open(ZSH_COMPLETION_FILE, "w") as f:
f.write(template)
+
parser = youtube_dl.parseOpts()[0]
build_completion(parser)
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 0a51888..edb76d9 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -158,6 +158,7 @@
- **CollegeRama**
- **ComCarCoff**
- **ComedyCentral**
+ - **ComedyCentralFullEpisodes**
- **ComedyCentralShortname**
- **ComedyCentralTV**
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
@@ -225,6 +226,7 @@
- **EroProfile**
- **Escapist**
- **ESPN**
+ - **ESPNArticle**
- **EsriVideo**
- **Europa**
- **EveryonesMixtape**
@@ -247,6 +249,7 @@
- **FootyRoom**
- **Formula1**
- **FOX**
+ - **FOX9**
- **Foxgay**
- **foxnews**: Fox News and Fox Business Video
- **foxnews:article**
@@ -332,6 +335,8 @@
- **ivideon**: Ivideon TV
- **Iwara**
- **Izlesene**
+ - **Jamendo**
+ - **JamendoAlbum**
- **JeuxVideo**
- **Jove**
- **jpopsuki.tv**
@@ -481,11 +486,13 @@
- **nhl.com:videocenter:category**: NHL videocenter category
- **nick.com**
- **nick.de**
+ - **nicknight**
- **niconico**: ニコニコ動画
- **NiconicoPlaylist**
- **Nintendo**
- **njoy**: N-JOY
- **njoy:embed**
+ - **NobelPrize**
- **Noco**
- **Normalboots**
- **NosVideo**
@@ -527,6 +534,7 @@
- **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Österreich 1
- **orf:tvthek**: ORF TVthek
+ - **PandaTV**: 熊猫TV
- **pandora.tv**: 판도라TV
- **parliamentlive.tv**: UK parliament videos
- **Patreon**
@@ -586,6 +594,8 @@
- **RDS**: RDS.ca
- **RedTube**
- **RegioTV**
+ - **RENTV**
+ - **RENTVArticle**
- **Restudy**
- **Reuters**
- **ReverbNation**
@@ -634,14 +644,13 @@
- **Screencast**
- **ScreencastOMatic**
- **ScreenJunkies**
- - **ScreenwaveMedia**
- **Seeker**
- **SenateISVP**
- **SendtoNews**
- **ServingSys**
- **Sexu**
- **Shahid**
- - **Shared**: shared.sx and vivo.sx
+ - **Shared**: shared.sx
- **ShareSix**
- **Sina**
- **SixPlay**
@@ -706,7 +715,7 @@
- **teachertube:user:collection**: teachertube.com user and collection videos
- **TeachingChannel**
- **Teamcoco**
- - **TeamFour**
+ - **TeamFourStar**
- **TechTalks**
- **techtv.mit.edu**
- **ted**
@@ -762,6 +771,8 @@
- **TV2Article**
- **TV3**
- **TV4**: tv4.se and tv4play.se
+ - **TVANouvelles**
+ - **TVANouvellesArticle**
- **TVC**
- **TVCArticle**
- **tvigle**: Интернет-телевидение Tvigle.ru
@@ -848,6 +859,7 @@
- **Vimple**: Vimple - one-click video hosting
- **Vine**
- **vine:user**
+ - **Vivo**: vivo.sx
- **vk**: VK
- **vk:uservideos**: VK - User's Videos
- **vk:wallpost**
@@ -862,6 +874,7 @@
- **vube**: Vube.com
- **VuClip**
- **VyboryMos**
+ - **Vzaar**
- **Walla**
- **washingtonpost**
- **washingtonpost:article**
@@ -869,6 +882,8 @@
- **WatchIndianPorn**: Watch Indian Porn
- **WDR**
- **wdr:mobile**
+ - **Webcaster**
+ - **WebcasterFeed**
- **WebOfStories**
- **WebOfStoriesPlaylist**
- **WeiqiTV**: WQTV
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index a98305c..437c727 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -84,5 +84,6 @@ class TestInfoExtractor(unittest.TestCase):
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 0dfe25c..8bf00be 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -605,6 +605,7 @@ class TestYoutubeDL(unittest.TestCase):
'extractor': 'TEST',
'duration': 30,
'filesize': 10 * 1024,
+ 'playlist_id': '42',
}
second = {
'id': '2',
@@ -614,6 +615,7 @@ class TestYoutubeDL(unittest.TestCase):
'duration': 10,
'description': 'foo',
'filesize': 5 * 1024,
+ 'playlist_id': '43',
}
videos = [first, second]
@@ -650,6 +652,10 @@ class TestYoutubeDL(unittest.TestCase):
res = get_videos(f)
self.assertEqual(res, ['1'])
+ f = match_filter_func('playlist_id = 42')
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
def test_playlist_items_selection(self):
entries = [{
'id': compat_str(i),
diff --git a/test/test_aes.py b/test/test_aes.py
index 315a3f5..54078a6 100644
--- a/test/test_aes.py
+++ b/test/test_aes.py
@@ -51,5 +51,6 @@ class TestAES(unittest.TestCase):
decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_download.py b/test/test_download.py
index a3f1c06..4639529 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -60,6 +60,7 @@ def _file_md5(fn):
with open(fn, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
+
defs = gettestcases()
@@ -217,6 +218,7 @@ def generator(test_case):
return test_template
+
# And add them to TestDownload
for n, test_case in enumerate(defs):
test_method = generator(test_case)
diff --git a/test/test_execution.py b/test/test_execution.py
index 620db08..11661bb 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -39,5 +39,6 @@ class TestExecution(unittest.TestCase):
_, stderr = p.communicate()
self.assertFalse(stderr)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_http.py b/test/test_http.py
index bb0a098..7a7a351 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -169,5 +169,6 @@ class TestProxy(unittest.TestCase):
# b'xn--fiq228c' is '中文'.encode('idna')
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py
index 9d95cb6..789059d 100644
--- a/test/test_iqiyi_sdk_interpreter.py
+++ b/test/test_iqiyi_sdk_interpreter.py
@@ -43,5 +43,6 @@ class TestIqiyiSDKInterpreter(unittest.TestCase):
ie._login()
self.assertTrue('unable to log in:' in logger.messages[0])
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 63c350b..c24b8ca 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -104,6 +104,14 @@ class TestJSInterpreter(unittest.TestCase):
}''')
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
+ def test_call(self):
+ jsi = JSInterpreter('''
+ function x() { return 2; }
+ function y(a) { return x() + a; }
+ function z() { return y(3); }
+ ''')
+ self.assertEqual(jsi.call_function('z'), 5)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py
index b1b2eff..2e3cd01 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -69,6 +69,7 @@ from youtube_dl.utils import (
uppercase_escape,
lowercase_escape,
url_basename,
+ base_url,
urlencode_postdata,
urshift,
update_url_query,
@@ -437,6 +438,13 @@ class TestUtil(unittest.TestCase):
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
'trailer.mp4')
+ def test_base_url(self):
+ self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/')
+ self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/')
+ self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
+ self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
+ self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
+
def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None)
self.assertEqual(parse_age_limit(False), None)
@@ -1067,5 +1075,6 @@ The first line
self.assertEqual(get_element_by_class('foo', html), 'nice')
self.assertEqual(get_element_by_class('no-such-class', html), None)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py
index 96a66f7..c1465fe 100644
--- a/test/test_verbose_output.py
+++ b/test/test_verbose_output.py
@@ -66,5 +66,6 @@ class TestVerboseOutput(unittest.TestCase):
self.assertTrue(b'-p' in serr)
self.assertTrue(b'secret' not in serr)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py
index 8de08f2..41abdfe 100644
--- a/test/test_write_annotations.py
+++ b/test/test_write_annotations.py
@@ -24,6 +24,7 @@ class YoutubeDL(youtube_dl.YoutubeDL):
super(YoutubeDL, self).__init__(*args, **kwargs)
self.to_stderr = self.to_screen
+
params = get_params({
'writeannotations': True,
'skip_download': True,
@@ -74,5 +75,6 @@ class TestAnnotations(unittest.TestCase):
def tearDown(self):
try_rm(ANNOTATIONS_FILE)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index af1c454..7a33dbf 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -66,5 +66,6 @@ class TestYoutubeLists(unittest.TestCase):
for entry in result['entries']:
self.assertTrue(entry.get('title'))
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 0608644..f0c370e 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -114,6 +114,7 @@ def make_tfunc(url, stype, sig_input, expected_sig):
test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
setattr(TestSignature, test_func.__name__, test_func)
+
for test_spec in _TESTS:
make_tfunc(*test_spec)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 99825e3..53f20ac 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1658,7 +1658,7 @@ class YoutubeDL(object):
video_ext, audio_ext = audio.get('ext'), video.get('ext')
if video_ext and audio_ext:
COMPATIBLE_EXTS = (
- ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
+ ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
('webm')
)
for exts in COMPATIBLE_EXTS:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 6433935..6850d95 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -95,8 +95,7 @@ def _real_main(argv=None):
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
except IOError:
sys.exit('ERROR: batch file could not be read')
- all_urls = batch_urls + args
- all_urls = [url.strip() for url in all_urls]
+ all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
_enc = preferredencoding()
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
@@ -450,4 +449,5 @@ def main(argv=None):
except KeyboardInterrupt:
sys.exit('\nERROR: Interrupted by user')
+
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
index a01c367..b8ff454 100644
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@@ -174,6 +174,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
return plaintext
+
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
@@ -328,4 +329,5 @@ def inc(data):
break
return data
+
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index b8aaf5a..83ee7e2 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2491,6 +2491,7 @@ class _TreeBuilder(etree.TreeBuilder):
def doctype(self, name, pubid, system):
pass
+
if sys.version_info[0] >= 3:
def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
@@ -2787,6 +2788,7 @@ def workaround_optparse_bug9161():
return real_add_option(self, *bargs, **bkwargs)
optparse.OptionGroup.add_option = _compat_add_option
+
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
compat_get_terminal_size = shutil.get_terminal_size
else:
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
index 817591d..16952e3 100644
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -7,6 +7,7 @@ from .http import HttpFD
from .rtmp import RtmpFD
from .dash import DashSegmentsFD
from .rtsp import RtspFD
+from .ism import IsmFD
from .external import (
get_external_downloader,
FFmpegFD,
@@ -24,6 +25,7 @@ PROTOCOL_MAP = {
'rtsp': RtspFD,
'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD,
+ 'ism': IsmFD,
}
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 0aeae3b..5d3e5d8 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -293,6 +293,7 @@ class FFmpegFD(ExternalFD):
class AVconvFD(FFmpegFD):
pass
+
_BY_NAME = dict(
(klass.get_basename(), klass)
for name, klass in globals().items()
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 80c21d4..688e086 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -314,7 +314,8 @@ class F4mFD(FragmentFD):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
- urlh = self.ydl.urlopen(man_url)
+
+ urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl()
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
@@ -387,7 +388,10 @@ class F4mFD(FragmentFD):
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
try:
- success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()})
+ success = ctx['dl'].download(frag_filename, {
+ 'url': url_parsed.geturl(),
+ 'http_headers': info_dict.get('http_headers'),
+ })
if not success:
return False
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index 84aacf7..60df627 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -9,6 +9,7 @@ from ..utils import (
error_to_compat_str,
encodeFilename,
sanitize_open,
+ sanitized_Request,
)
@@ -37,6 +38,10 @@ class FragmentFD(FileDownloader):
def report_skip_fragment(self, fragment_name):
self.to_screen('[download] Skipping fragment %s...' % fragment_name)
+ def _prepare_url(self, info_dict, url):
+ headers = info_dict.get('http_headers')
+ return sanitized_Request(url, None, headers) if headers else url
+
def _prepare_and_start_frag_download(self, ctx):
self._prepare_frag_download(ctx)
self._start_frag_download(ctx)
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index 541b92e..7373ec0 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -59,7 +59,8 @@ class HlsFD(FragmentFD):
def real_download(self, filename, info_dict):
man_url = info_dict['url']
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
- manifest = self.ydl.urlopen(man_url).read()
+
+ manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read()
s = manifest.decode('utf-8', 'ignore')
@@ -112,7 +113,10 @@ class HlsFD(FragmentFD):
count = 0
while count <= fragment_retries:
try:
- success = ctx['dl'].download(frag_filename, {'url': frag_url})
+ success = ctx['dl'].download(frag_filename, {
+ 'url': frag_url,
+ 'http_headers': info_dict.get('http_headers'),
+ })
if not success:
return False
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py
new file mode 100644
index 0000000..93cac5e
--- /dev/null
+++ b/youtube_dl/downloader/ism.py
@@ -0,0 +1,271 @@
+from __future__ import unicode_literals
+
+import os
+import time
+import struct
+import binascii
+import io
+
+from .fragment import FragmentFD
+from ..compat import compat_urllib_error
+from ..utils import (
+ sanitize_open,
+ encodeFilename,
+)
+
+
+u8 = struct.Struct(b'>B')
+u88 = struct.Struct(b'>Bx')
+u16 = struct.Struct(b'>H')
+u1616 = struct.Struct(b'>Hxx')
+u32 = struct.Struct(b'>I')
+u64 = struct.Struct(b'>Q')
+
+s88 = struct.Struct(b'>bx')
+s16 = struct.Struct(b'>h')
+s1616 = struct.Struct(b'>hxx')
+s32 = struct.Struct(b'>i')
+
+unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
+
+TRACK_ENABLED = 0x1
+TRACK_IN_MOVIE = 0x2
+TRACK_IN_PREVIEW = 0x4
+
+SELF_CONTAINED = 0x1
+
+
+def box(box_type, payload):
+ return u32.pack(8 + len(payload)) + box_type + payload
+
+
+def full_box(box_type, version, flags, payload):
+ return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
+
+
+def write_piff_header(stream, params):
+ track_id = params['track_id']
+ fourcc = params['fourcc']
+ duration = params['duration']
+ timescale = params.get('timescale', 10000000)
+ language = params.get('language', 'und')
+ height = params.get('height', 0)
+ width = params.get('width', 0)
+ is_audio = width == 0 and height == 0
+ creation_time = modification_time = int(time.time())
+
+ ftyp_payload = b'isml' # major brand
+ ftyp_payload += u32.pack(1) # minor version
+ ftyp_payload += b'piff' + b'iso2' # compatible brands
+ stream.write(box(b'ftyp', ftyp_payload)) # File Type Box
+
+ mvhd_payload = u64.pack(creation_time)
+ mvhd_payload += u64.pack(modification_time)
+ mvhd_payload += u32.pack(timescale)
+ mvhd_payload += u64.pack(duration)
+ mvhd_payload += s1616.pack(1) # rate
+ mvhd_payload += s88.pack(1) # volume
+ mvhd_payload += u16.pack(0) # reserved
+ mvhd_payload += u32.pack(0) * 2 # reserved
+ mvhd_payload += unity_matrix
+ mvhd_payload += u32.pack(0) * 6 # pre defined
+ mvhd_payload += u32.pack(0xffffffff) # next track id
+ moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box
+
+ tkhd_payload = u64.pack(creation_time)
+ tkhd_payload += u64.pack(modification_time)
+ tkhd_payload += u32.pack(track_id) # track id
+ tkhd_payload += u32.pack(0) # reserved
+ tkhd_payload += u64.pack(duration)
+ tkhd_payload += u32.pack(0) * 2 # reserved
+ tkhd_payload += s16.pack(0) # layer
+ tkhd_payload += s16.pack(0) # alternate group
+ tkhd_payload += s88.pack(1 if is_audio else 0) # volume
+ tkhd_payload += u16.pack(0) # reserved
+ tkhd_payload += unity_matrix
+ tkhd_payload += u1616.pack(width)
+ tkhd_payload += u1616.pack(height)
+ trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box
+
+ mdhd_payload = u64.pack(creation_time)
+ mdhd_payload += u64.pack(modification_time)
+ mdhd_payload += u32.pack(timescale)
+ mdhd_payload += u64.pack(duration)
+ mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
+ mdhd_payload += u16.pack(0) # pre defined
+ mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
+
+ hdlr_payload = u32.pack(0) # pre defined
+ hdlr_payload += b'soun' if is_audio else b'vide' # handler type
+ hdlr_payload += u32.pack(0) * 3 # reserved
+ hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
+ mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
+
+ if is_audio:
+ smhd_payload = s88.pack(0) # balance
+ smhd_payload = u16.pack(0) # reserved
+ media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
+ else:
+ vmhd_payload = u16.pack(0) # graphics mode
+ vmhd_payload += u16.pack(0) * 3 # opcolor
+ media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
+ minf_payload = media_header_box
+
+ dref_payload = u32.pack(1) # entry count
+ dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box
+ dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box
+ minf_payload += box(b'dinf', dinf_payload) # Data Information Box
+
+ stsd_payload = u32.pack(1) # entry count
+
+ sample_entry_payload = u8.pack(0) * 6 # reserved
+ sample_entry_payload += u16.pack(1) # data reference index
+ if is_audio:
+ sample_entry_payload += u32.pack(0) * 2 # reserved
+ sample_entry_payload += u16.pack(params.get('channels', 2))
+ sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
+ sample_entry_payload += u16.pack(0) # pre defined
+ sample_entry_payload += u16.pack(0) # reserved
+ sample_entry_payload += u1616.pack(params['sampling_rate'])
+
+ if fourcc == 'AACL':
+ sample_entry_box = box(b'mp4a', sample_entry_payload)
+ else:
+ sample_entry_payload = sample_entry_payload
+ sample_entry_payload += u16.pack(0) # pre defined
+ sample_entry_payload += u16.pack(0) # reserved
+ sample_entry_payload += u32.pack(0) * 3 # pre defined
+ sample_entry_payload += u16.pack(width)
+ sample_entry_payload += u16.pack(height)
+ sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi
+ sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi
+ sample_entry_payload += u32.pack(0) # reserved
+ sample_entry_payload += u16.pack(1) # frame count
+ sample_entry_payload += u8.pack(0) * 32 # compressor name
+ sample_entry_payload += u16.pack(0x18) # depth
+ sample_entry_payload += s16.pack(-1) # pre defined
+
+ codec_private_data = binascii.unhexlify(params['codec_private_data'])
+ if fourcc in ('H264', 'AVC1'):
+ sps, pps = codec_private_data.split(u32.pack(1))[1:]
+ avcc_payload = u8.pack(1) # configuration version
+ avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
+ avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
+ avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
+ avcc_payload += u16.pack(len(sps))
+ avcc_payload += sps
+ avcc_payload += u8.pack(1) # number of pps
+ avcc_payload += u16.pack(len(pps))
+ avcc_payload += pps
+ sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
+ sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
+ stsd_payload += sample_entry_box
+
+ stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
+
+ stts_payload = u32.pack(0) # entry count
+ stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box
+
+ stsc_payload = u32.pack(0) # entry count
+ stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box
+
+ stco_payload = u32.pack(0) # entry count
+ stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box
+
+ minf_payload += box(b'stbl', stbl_payload) # Sample Table Box
+
+ mdia_payload += box(b'minf', minf_payload) # Media Information Box
+
+ trak_payload += box(b'mdia', mdia_payload) # Media Box
+
+ moov_payload += box(b'trak', trak_payload) # Track Box
+
+ mehd_payload = u64.pack(duration)
+ mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box
+
+ trex_payload = u32.pack(track_id) # track id
+ trex_payload += u32.pack(1) # default sample description index
+ trex_payload += u32.pack(0) # default sample duration
+ trex_payload += u32.pack(0) # default sample size
+ trex_payload += u32.pack(0) # default sample flags
+ mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box
+
+ moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box
+ stream.write(box(b'moov', moov_payload)) # Movie Box
+
+
+def extract_box_data(data, box_sequence):
+ data_reader = io.BytesIO(data)
+ while True:
+ box_size = u32.unpack(data_reader.read(4))[0]
+ box_type = data_reader.read(4)
+ if box_type == box_sequence[0]:
+ box_data = data_reader.read(box_size - 8)
+ if len(box_sequence) == 1:
+ return box_data
+ return extract_box_data(box_data, box_sequence[1:])
+ data_reader.seek(box_size - 8, 1)
+
+
+class IsmFD(FragmentFD):
+ """
+ Download segments in a ISM manifest
+ """
+
+ FD_NAME = 'ism'
+
+ def real_download(self, filename, info_dict):
+ segments = info_dict['fragments'][:1] if self.params.get(
+ 'test', False) else info_dict['fragments']
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': len(segments),
+ }
+
+ self._prepare_and_start_frag_download(ctx)
+
+ segments_filenames = []
+
+ fragment_retries = self.params.get('fragment_retries', 0)
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+
+ track_written = False
+ for i, segment in enumerate(segments):
+ segment_url = segment['url']
+ segment_name = 'Frag%d' % i
+ target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name)
+ count = 0
+ while count <= fragment_retries:
+ try:
+ success = ctx['dl'].download(target_filename, {'url': segment_url})
+ if not success:
+ return False
+ down, target_sanitized = sanitize_open(target_filename, 'rb')
+ down_data = down.read()
+ if not track_written:
+ tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd'])
+ info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
+ write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
+ track_written = True
+ ctx['dest_stream'].write(down_data)
+ down.close()
+ segments_filenames.append(target_sanitized)
+ break
+ except compat_urllib_error.HTTPError as err:
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, segment_name, count, fragment_retries)
+ if count > fragment_retries:
+ if skip_unavailable_fragments:
+ self.report_skip_fragment(segment_name)
+ continue
+ self.report_error('giving up after %s fragment retries' % fragment_retries)
+ return False
+
+ self._finish_frag_download(ctx)
+
+ for segment_file in segments_filenames:
+ os.remove(encodeFilename(segment_file))
+
+ return True
diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py
index 5d0bf5a..989505c 100644
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dl/extractor/adultswim.py
@@ -96,6 +96,27 @@ class AdultSwimIE(TurnerBaseIE):
'skip_download': True,
},
'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'http://www.adultswim.com/videos/toonami/friday-october-14th-2016/',
+ 'info_dict': {
+ 'id': 'eYiLsKVgQ6qTC6agD67Sig',
+ 'title': 'Toonami - Friday, October 14th, 2016',
+ 'description': 'md5:99892c96ffc85e159a428de85c30acde',
+ },
+ 'playlist': [{
+ 'md5': '',
+ 'info_dict': {
+ 'id': 'eYiLsKVgQ6qTC6agD67Sig',
+ 'ext': 'mp4',
+ 'title': 'Toonami - Friday, October 14th, 2016',
+ 'description': 'md5:99892c96ffc85e159a428de85c30acde',
+ },
+ }],
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
}]
@staticmethod
@@ -163,6 +184,8 @@ class AdultSwimIE(TurnerBaseIE):
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
elif video_info.get('videoPlaybackID'):
segment_ids = [video_info['videoPlaybackID']]
+ elif video_info.get('id'):
+ segment_ids = [video_info['id']]
else:
if video_info.get('auth') is True:
raise ExtractorError(
diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
index 6adb6d8..c5e079a 100644
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@@ -26,7 +26,7 @@ class AENetworksIE(AENetworksBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
_TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
- 'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
+ 'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
'info_dict': {
'id': '22253814',
'ext': 'mp4',
@@ -99,7 +99,7 @@ class AENetworksIE(AENetworksBaseIE):
query = {
'mbr': 'true',
- 'assetTypes': 'medium_video_s3'
+ 'assetTypes': 'high_video_s3'
}
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
@@ -155,7 +155,7 @@ class HistoryTopicIE(AENetworksBaseIE):
'id': 'world-war-i-history',
'title': 'World War I History',
},
- 'playlist_mincount': 24,
+ 'playlist_mincount': 23,
}, {
'url': 'http://www.history.com/topics/world-war-i-history/videos',
'only_matching': True,
@@ -193,7 +193,8 @@ class HistoryTopicIE(AENetworksBaseIE):
return self.theplatform_url_result(
release_url, video_id, {
'mbr': 'true',
- 'switch': 'hls'
+ 'switch': 'hls',
+ 'assetTypes': 'high_video_ak',
})
else:
webpage = self._download_webpage(url, topic_id)
@@ -203,6 +204,7 @@ class HistoryTopicIE(AENetworksBaseIE):
entries.append(self.theplatform_url_result(
video_attributes['data-release-url'], video_attributes['data-id'], {
'mbr': 'true',
- 'switch': 'hls'
+ 'switch': 'hls',
+ 'assetTypes': 'high_video_ak',
}))
return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))
diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py
index 518c61f..75b3669 100644
--- a/youtube_dl/extractor/afreecatv.py
+++ b/youtube_dl/extractor/afreecatv.py
@@ -11,6 +11,7 @@ from ..compat import (
from ..utils import (
ExtractorError,
int_or_none,
+ update_url_query,
xpath_element,
xpath_text,
)
@@ -18,12 +19,18 @@ from ..utils import (
class AfreecaTVIE(InfoExtractor):
IE_DESC = 'afreecatv.com'
- _VALID_URL = r'''(?x)^
- https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
- (?:
- /app/(?:index|read_ucc_bbs)\.cgi|
- /player/[Pp]layer\.(?:swf|html))
- \?.*?\bnTitleNo=(?P<id>\d+)'''
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
+ (?:
+ /app/(?:index|read_ucc_bbs)\.cgi|
+ /player/[Pp]layer\.(?:swf|html)
+ )\?.*?\bnTitleNo=|
+ vod\.afreecatv\.com/PLAYER/STATION/
+ )
+ (?P<id>\d+)
+ '''
_TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
@@ -66,6 +73,9 @@ class AfreecaTVIE(InfoExtractor):
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
+ }, {
+ 'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
+ 'only_matching': True,
}]
@staticmethod
@@ -83,7 +93,9 @@ class AfreecaTVIE(InfoExtractor):
info_url = compat_urlparse.urlunparse(parsed_url._replace(
netloc='afbbs.afreecatv.com:8080',
path='/api/video/get_video_info.php'))
- video_xml = self._download_xml(info_url, video_id)
+
+ video_xml = self._download_xml(
+ update_url_query(info_url, {'nTitleNo': video_id}), video_id)
if xpath_element(video_xml, './track/video/file') is None:
raise ExtractorError('Specified AfreecaTV video does not exist',
diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py
index d2b03b1..87c803e 100644
--- a/youtube_dl/extractor/amcnetworks.py
+++ b/youtube_dl/extractor/amcnetworks.py
@@ -10,7 +10,7 @@ from ..utils import (
class AMCNetworksIE(ThePlatformIE):
- _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
'md5': '',
@@ -41,6 +41,9 @@ class AMCNetworksIE(ThePlatformIE):
}, {
'url': 'http://www.ifc.com/movies/chaos',
'only_matching': True,
+ }, {
+ 'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py
index cb29cf1..623f44d 100644
--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dl/extractor/anvato.py
@@ -157,22 +157,16 @@ class AnvatoIE(InfoExtractor):
video_data_url, video_id, transform_source=strip_jsonp,
data=json.dumps(payload).encode('utf-8'))
- def _extract_anvato_videos(self, webpage, video_id):
- anvplayer_data = self._parse_json(self._html_search_regex(
- r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
- 'Anvato player data'), video_id)
-
- video_id = anvplayer_data['video']
- access_key = anvplayer_data['accessKey']
-
+ def _get_anvato_videos(self, access_key, video_id):
video_data = self._get_video_json(access_key, video_id)
formats = []
for published_url in video_data['published_urls']:
video_url = published_url['embed_url']
+ media_format = published_url.get('format')
ext = determine_ext(video_url)
- if ext == 'smil':
+ if ext == 'smil' or media_format == 'smil':
formats.extend(self._extract_smil_formats(video_url, video_id))
continue
@@ -183,7 +177,7 @@ class AnvatoIE(InfoExtractor):
'tbr': tbr if tbr != 0 else None,
}
- if ext == 'm3u8':
+ if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
# Not using _extract_m3u8_formats here as individual media
# playlists are also included in published_urls.
if tbr is None:
@@ -194,7 +188,7 @@ class AnvatoIE(InfoExtractor):
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
})
- elif ext == 'mp3':
+ elif ext == 'mp3' or media_format == 'mp3':
a_format['vcodec'] = 'none'
else:
a_format.update({
@@ -218,7 +212,19 @@ class AnvatoIE(InfoExtractor):
'formats': formats,
'title': video_data.get('def_title'),
'description': video_data.get('def_description'),
+ 'tags': video_data.get('def_tags', '').split(','),
'categories': video_data.get('categories'),
'thumbnail': video_data.get('thumbnail'),
+ 'timestamp': int_or_none(video_data.get(
+ 'ts_published') or video_data.get('ts_added')),
+ 'uploader': video_data.get('mcp_id'),
+ 'duration': int_or_none(video_data.get('duration')),
'subtitles': subtitles,
}
+
+ def _extract_anvato_videos(self, webpage, video_id):
+ anvplayer_data = self._parse_json(self._html_search_regex(
+ r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
+ 'Anvato player data'), video_id)
+ return self._get_anvato_videos(
+ anvplayer_data['accessKey'], anvplayer_data['video'])
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 3a806a6..35f3656 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -174,11 +174,15 @@ class ARDMediathekIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
- raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+ ERRORS = (
+ ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
+ ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
+ 'Video %s is no longer available'),
+ )
- if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
- raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
+ for pattern, message in ERRORS:
+ if pattern in webpage:
+ raise ExtractorError(message % video_id, expected=True)
if re.search(r'[\?&]rss($|[=&])', url):
doc = compat_etree_fromstring(webpage.encode('utf-8'))
diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py
index 2ec2d70..d7d1c63 100644
--- a/youtube_dl/extractor/audioboom.py
+++ b/youtube_dl/extractor/audioboom.py
@@ -6,8 +6,8 @@ from ..utils import float_or_none
class AudioBoomIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P<id>[0-9]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
+ _TESTS = [{
'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
'info_dict': {
@@ -19,7 +19,10 @@ class AudioBoomIE(InfoExtractor):
'uploader': 'Steve Czaban',
'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
}
- }
+ }, {
+ 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py
index 72e1bd5..1eebf5d 100644
--- a/youtube_dl/extractor/azubu.py
+++ b/youtube_dl/extractor/azubu.py
@@ -11,7 +11,7 @@ from ..utils import (
class AzubuIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)'
_TESTS = [
{
'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
@@ -103,12 +103,15 @@ class AzubuIE(InfoExtractor):
class AzubuLiveIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P<id>[^/]+)$'
+ _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.azubu.tv/MarsTVMDLen',
'only_matching': True,
- }
+ }, {
+ 'url': 'http://azubu.uol.com.br/adolfz',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
user = self._match_id(url)
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index 249c3d9..88c590e 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -1,7 +1,9 @@
from __future__ import unicode_literals
import json
+import random
import re
+import time
from .common import InfoExtractor
from ..compat import (
@@ -12,6 +14,9 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ parse_filesize,
+ unescapeHTML,
+ update_url_query,
)
@@ -81,35 +86,68 @@ class BandcampIE(InfoExtractor):
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
webpage, 'video id')
- download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
- # We get the dictionary of the track from some javascript code
- all_info = self._parse_json(self._search_regex(
- r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
- info = all_info[0]
- # We pick mp3-320 for now, until format selection can be easily implemented.
- mp3_info = info['downloads']['mp3-320']
- # If we try to use this url it says the link has expired
- initial_url = mp3_info['url']
- m_url = re.match(
- r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
- initial_url)
- # We build the url we will use to get the final track url
- # This url is build in Bandcamp in the script download_bunde_*.js
- request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
- final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
- # If we could correctly generate the .rand field the url would be
- # in the "download_url" key
- final_url = self._proto_relative_url(self._search_regex(
- r'"retry_url":"(.+?)"', final_url_webpage, 'final video URL'), 'http:')
+ download_webpage = self._download_webpage(
+ download_link, video_id, 'Downloading free downloads page')
+
+ blob = self._parse_json(
+ self._search_regex(
+ r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
+ 'blob', group='blob'),
+ video_id, transform_source=unescapeHTML)
+
+ info = blob['digital_items'][0]
+
+ downloads = info['downloads']
+ track = info['title']
+
+ artist = info.get('artist')
+ title = '%s - %s' % (artist, track) if artist else track
+
+ download_formats = {}
+ for f in blob['download_formats']:
+ name, ext = f.get('name'), f.get('file_extension')
+ if all(isinstance(x, compat_str) for x in (name, ext)):
+ download_formats[name] = ext.strip('.')
+
+ formats = []
+ for format_id, f in downloads.items():
+ format_url = f.get('url')
+ if not format_url:
+ continue
+ # Stat URL generation algorithm is reverse engineered from
+ # download_*_bundle_*.js
+ stat_url = update_url_query(
+ format_url.replace('/download/', '/statdownload/'), {
+ '.rand': int(time.time() * 1000 * random.random()),
+ })
+ format_id = f.get('encoding_name') or format_id
+ stat = self._download_json(
+ stat_url, video_id, 'Downloading %s JSON' % format_id,
+ transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
+ fatal=False)
+ if not stat:
+ continue
+ retry_url = stat.get('retry_url')
+ if not isinstance(retry_url, compat_str):
+ continue
+ formats.append({
+ 'url': self._proto_relative_url(retry_url, 'http:'),
+ 'ext': download_formats.get(format_id),
+ 'format_id': format_id,
+ 'format_note': f.get('description'),
+ 'filesize': parse_filesize(f.get('size_mb')),
+ 'vcodec': 'none',
+ })
+ self._sort_formats(formats)
return {
'id': video_id,
- 'title': info['title'],
- 'ext': 'mp3',
- 'vcodec': 'none',
- 'url': final_url,
+ 'title': title,
'thumbnail': info.get('thumb_url'),
'uploader': info.get('artist'),
+ 'artist': artist,
+ 'track': track,
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py
index 956c768..b0b7914 100644
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dl/extractor/beeg.py
@@ -46,19 +46,19 @@ class BeegIE(InfoExtractor):
self._proto_relative_url(cpl_url), video_id,
'Downloading cpl JS', fatal=False)
if cpl:
- beeg_version = self._search_regex(
- r'beeg_version\s*=\s*(\d+)', cpl,
- 'beeg version', default=None) or self._search_regex(
+ beeg_version = int_or_none(self._search_regex(
+ r'beeg_version\s*=\s*([^\b]+)', cpl,
+ 'beeg version', default=None)) or self._search_regex(
r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
beeg_salt = self._search_regex(
- r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg beeg_salt',
+ r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
default=None, group='beeg_salt')
- beeg_version = beeg_version or '1750'
- beeg_salt = beeg_salt or 'MIDtGaw96f0N1kMMAM1DE46EC9pmFr'
+ beeg_version = beeg_version or '2000'
+ beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
video = self._download_json(
- 'http://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
+ 'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
video_id)
def split(o, e):
diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py
index 4bcd104..8d5f11d 100644
--- a/youtube_dl/extractor/cbslocal.py
+++ b/youtube_dl/extractor/cbslocal.py
@@ -4,11 +4,14 @@ from __future__ import unicode_literals
from .anvato import AnvatoIE
from .sendtonews import SendtoNewsIE
from ..compat import compat_urlparse
-from ..utils import unified_timestamp
+from ..utils import (
+ parse_iso8601,
+ unified_timestamp,
+)
class CBSLocalIE(AnvatoIE):
- _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
+ _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
_TESTS = [{
# Anvato backend
@@ -22,6 +25,7 @@ class CBSLocalIE(AnvatoIE):
'thumbnail': 're:^https?://.*',
'timestamp': 1463440500,
'upload_date': '20160516',
+ 'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
@@ -35,6 +39,7 @@ class CBSLocalIE(AnvatoIE):
'Syndication\\Curb.tv',
'Content\\News'
],
+ 'tags': ['CBS 2 News Evening'],
},
}, {
# SendtoNews embed
@@ -47,6 +52,31 @@ class CBSLocalIE(AnvatoIE):
# m3u8 download
'skip_download': True,
},
+ }, {
+ 'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
+ 'info_dict': {
+ 'id': '3580809',
+ 'ext': 'mp4',
+ 'title': 'A Very Blue Anniversary',
+ 'description': 'CBS2’s Cindy Hsu has more.',
+ 'thumbnail': 're:^https?://.*',
+ 'timestamp': 1479962220,
+ 'upload_date': '20161124',
+ 'uploader': 'CBS',
+ 'subtitles': {
+ 'en': 'mincount:5',
+ },
+ 'categories': [
+ 'Stations\\Spoken Word\\WCBSTV',
+ 'Syndication\\AOL',
+ 'Syndication\\MSN',
+ 'Syndication\\NDN',
+ 'Syndication\\Yahoo',
+ 'Content\\News',
+ 'Content\\News\\Local News',
+ ],
+ 'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
+ },
}]
def _real_extract(self, url):
@@ -62,8 +92,11 @@ class CBSLocalIE(AnvatoIE):
info_dict = self._extract_anvato_videos(webpage, display_id)
time_str = self._html_search_regex(
- r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
- timestamp = unified_timestamp(time_str)
+ r'class="entry-date">([^<]+)<', webpage, 'released date', default=None)
+ if time_str:
+ timestamp = unified_timestamp(time_str)
+ else:
+ timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage))
info_dict.update({
'display_id': display_id,
diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py
index 8af3187..e00bdaf 100755
--- a/youtube_dl/extractor/cda.py
+++ b/youtube_dl/extractor/cda.py
@@ -5,14 +5,16 @@ import re
from .common import InfoExtractor
from ..utils import (
- decode_packed_codes,
ExtractorError,
- parse_duration
+ float_or_none,
+ int_or_none,
+ parse_duration,
)
class CDAIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
+ _BASE_URL = 'http://www.cda.pl/'
_TESTS = [{
'url': 'http://www.cda.pl/video/5749950c',
'md5': '6f844bf51b15f31fae165365707ae970',
@@ -21,6 +23,9 @@ class CDAIE(InfoExtractor):
'ext': 'mp4',
'height': 720,
'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
+ 'description': 'md5:269ccd135d550da90d1662651fcb9772',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'average_rating': float,
'duration': 39
}
}, {
@@ -30,6 +35,11 @@ class CDAIE(InfoExtractor):
'id': '57413289',
'ext': 'mp4',
'title': 'Lądowanie na lotnisku na Maderze',
+ 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'crash404',
+ 'view_count': int,
+ 'average_rating': float,
'duration': 137
}
}, {
@@ -39,31 +49,55 @@ class CDAIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
+ self._set_cookie('cda.pl', 'cda.player', 'html5')
+ webpage = self._download_webpage(
+ self._BASE_URL + '/video/' + video_id, video_id)
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
raise ExtractorError('This video is only available for premium users.', expected=True)
- title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
-
formats = []
+ uploader = self._search_regex(r'''(?x)
+ <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
+ (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
+ <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
+ ''', webpage, 'uploader', default=None, group='uploader')
+ view_count = self._search_regex(
+ r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
+ 'view_count', default=None)
+ average_rating = self._search_regex(
+ r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
+ webpage, 'rating', fatal=False, group='rating_value')
+
info_dict = {
'id': video_id,
- 'title': title,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'uploader': uploader,
+ 'view_count': int_or_none(view_count),
+ 'average_rating': float_or_none(average_rating),
+ 'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'duration': None,
}
def extract_format(page, version):
- unpacked = decode_packed_codes(page)
- format_url = self._search_regex(
- r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked,
- '%s url' % version, fatal=False, group='url')
- if not format_url:
+ json_str = self._search_regex(
+ r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
+ '%s player_json' % version, fatal=False, group='player_data')
+ if not json_str:
+ return
+ player_data = self._parse_json(
+ json_str, '%s player_data' % version, fatal=False)
+ if not player_data:
+ return
+ video = player_data.get('video')
+ if not video or 'file' not in video:
+ self.report_warning('Unable to extract %s version information' % version)
return
f = {
- 'url': format_url,
+ 'url': video['file'],
}
m = re.search(
r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
@@ -75,9 +109,7 @@ class CDAIE(InfoExtractor):
})
info_dict['formats'].append(f)
if not info_dict['duration']:
- info_dict['duration'] = parse_duration(self._search_regex(
- r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
- unpacked, 'duration', fatal=False, group='duration'))
+ info_dict['duration'] = parse_duration(video.get('duration'))
extract_format(webpage, 'default')
@@ -85,7 +117,8 @@ class CDAIE(InfoExtractor):
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
webpage):
webpage = self._download_webpage(
- href, video_id, 'Downloading %s version information' % resolution, fatal=False)
+ self._BASE_URL + href, video_id,
+ 'Downloading %s version information' % resolution, fatal=False)
if not webpage:
# Manually report warning because empty page is returned when
# invalid version is requested.
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index 88346dd..0239dfd 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -6,7 +6,7 @@ from .common import InfoExtractor
class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
- (video-clips|episodes|cc-studios|video-collections|full-episodes|shows)
+ (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
/(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
@@ -27,6 +27,40 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
}]
+class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
+ (?:full-episodes|shows(?=/[^/]+/full-episodes))
+ /(?P<id>[^?]+)'''
+ _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
+ 'info_dict': {
+ 'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
+ 'title': 'November 28, 2016 - Ryan Speedo Green',
+ },
+ 'playlist_count': 4,
+ }, {
+ 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed')
+ feed = self._parse_json(feed_json, playlist_id)
+ zones = feed['manifest']['zones']
+
+ video_zone = zones['t2_lc_promo1']
+ feed = self._download_json(video_zone['feed'], playlist_id)
+ mgid = feed['result']['data']['id']
+
+ videos_info = self._get_videos_info(mgid)
+ return videos_info
+
+
class ToshIE(MTVServicesInfoExtractor):
IE_DESC = 'Tosh.0'
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 415dc84..05c51fa 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -30,6 +30,7 @@ from ..downloader.f4m import remove_encrypted_media
from ..utils import (
NO_DEFAULT,
age_restricted,
+ base_url,
bug_reports_message,
clean_html,
compiled_regex_type,
@@ -885,7 +886,7 @@ class InfoExtractor(object):
'url': e.get('contentUrl'),
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
- 'thumbnail': e.get('thumbnailUrl'),
+ 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')),
'filesize': float_or_none(e.get('contentSize')),
@@ -1279,9 +1280,10 @@ class InfoExtractor(object):
}
resolution = last_info.get('RESOLUTION')
if resolution:
- width_str, height_str = resolution.split('x')
- f['width'] = int(width_str)
- f['height'] = int(height_str)
+ mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
+ if mobj:
+ f['width'] = int(mobj.group('width'))
+ f['height'] = int(mobj.group('height'))
# Unified Streaming Platform
mobj = re.search(
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
@@ -1539,7 +1541,7 @@ class InfoExtractor(object):
if res is False:
return []
mpd, urlh = res
- mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
+ mpd_base_url = base_url(urlh.geturl())
return self._parse_mpd_formats(
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
@@ -1701,7 +1703,7 @@ class InfoExtractor(object):
representation_ms_info['fragments'] = [{
'url': media_template % {
'Number': segment_number,
- 'Bandwidth': representation_attrib.get('bandwidth'),
+ 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
},
'duration': segment_duration,
} for segment_number in range(
@@ -1719,7 +1721,7 @@ class InfoExtractor(object):
def add_segment_url():
segment_url = media_template % {
'Time': segment_time,
- 'Bandwidth': representation_attrib.get('bandwidth'),
+ 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
'Number': segment_number,
}
representation_ms_info['fragments'].append({
@@ -1780,6 +1782,105 @@ class InfoExtractor(object):
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats
+ def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
+ res = self._download_webpage_handle(
+ ism_url, video_id,
+ note=note or 'Downloading ISM manifest',
+ errnote=errnote or 'Failed to download ISM manifest',
+ fatal=fatal)
+ if res is False:
+ return []
+ ism, urlh = res
+
+ return self._parse_ism_formats(
+ compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
+
+ def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+ if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
+ return []
+
+ duration = int(ism_doc.attrib['Duration'])
+ timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
+
+ formats = []
+ for stream in ism_doc.findall('StreamIndex'):
+ stream_type = stream.get('Type')
+ if stream_type not in ('video', 'audio'):
+ continue
+ url_pattern = stream.attrib['Url']
+ stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
+ stream_name = stream.get('Name')
+ for track in stream.findall('QualityLevel'):
+ fourcc = track.get('FourCC')
+ # TODO: add support for WVC1 and WMAP
+ if fourcc not in ('H264', 'AVC1', 'AACL'):
+ self.report_warning('%s is not a supported codec' % fourcc)
+ continue
+ tbr = int(track.attrib['Bitrate']) // 1000
+ width = int_or_none(track.get('MaxWidth'))
+ height = int_or_none(track.get('MaxHeight'))
+ sampling_rate = int_or_none(track.get('SamplingRate'))
+
+ track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
+ track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
+
+ fragments = []
+ fragment_ctx = {
+ 'time': 0,
+ }
+ stream_fragments = stream.findall('c')
+ for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
+ fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
+ fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
+ fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
+ if not fragment_ctx['duration']:
+ try:
+ next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
+ except IndexError:
+ next_fragment_time = duration
+ fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
+ for _ in range(fragment_repeat):
+ fragments.append({
+ 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
+ 'duration': fragment_ctx['duration'] / stream_timescale,
+ })
+ fragment_ctx['time'] += fragment_ctx['duration']
+
+ format_id = []
+ if ism_id:
+ format_id.append(ism_id)
+ if stream_name:
+ format_id.append(stream_name)
+ format_id.append(compat_str(tbr))
+
+ formats.append({
+ 'format_id': '-'.join(format_id),
+ 'url': ism_url,
+ 'manifest_url': ism_url,
+ 'ext': 'ismv' if stream_type == 'video' else 'isma',
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'asr': sampling_rate,
+ 'vcodec': 'none' if stream_type == 'audio' else fourcc,
+ 'acodec': 'none' if stream_type == 'video' else fourcc,
+ 'protocol': 'ism',
+ 'fragments': fragments,
+ '_download_params': {
+ 'duration': duration,
+ 'timescale': stream_timescale,
+ 'width': width or 0,
+ 'height': height or 0,
+ 'fourcc': fourcc,
+ 'codec_private_data': track.get('CodecPrivateData'),
+ 'sampling_rate': sampling_rate,
+ 'channels': int_or_none(track.get('Channels', 2)),
+ 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+ 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+ },
+ })
+ return formats
+
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
def absolute_url(video_url):
return compat_urlparse.urljoin(base_url, video_url)
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index cc141f6..8d5b69f 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -236,7 +236,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
- output += """ScaledBorderAndShadow: yes
+ output += """ScaledBorderAndShadow: no
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
diff --git a/youtube_dl/extractor/dotsub.py b/youtube_dl/extractor/dotsub.py
index fd64d1a..1f75352 100644
--- a/youtube_dl/extractor/dotsub.py
+++ b/youtube_dl/extractor/dotsub.py
@@ -9,7 +9,7 @@ from ..utils import (
class DotsubIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
- _TEST = {
+ _TESTS = [{
'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09',
'md5': '21c7ff600f545358134fea762a6d42b6',
'info_dict': {
@@ -24,7 +24,24 @@ class DotsubIE(InfoExtractor):
'upload_date': '20131130',
'view_count': int,
}
- }
+ }, {
+ 'url': 'https://dotsub.com/view/747bcf58-bd59-45b7-8c8c-ac312d084ee6',
+ 'md5': '2bb4a83896434d5c26be868c609429a3',
+ 'info_dict': {
+ 'id': '168006778',
+ 'ext': 'mp4',
+ 'title': 'Apartments and flats in Raipur the white symphony',
+ 'description': 'md5:784d0639e6b7d1bc29530878508e38fe',
+ 'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
+ 'duration': 290,
+ 'timestamp': 1476767794.2809999,
+ 'upload_date': '20160525',
+ 'uploader': 'parthivi001',
+ 'uploader_id': 'user52596202',
+ 'view_count': int,
+ },
+ 'add_ie': ['Vimeo'],
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -37,12 +54,23 @@ class DotsubIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
[r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'],
- webpage, 'video url')
+ webpage, 'video url', default=None)
+ info_dict = {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'flv',
+ }
- return {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'flv',
+ if not video_url:
+ setup_data = self._parse_json(self._html_search_regex(
+ r'(?s)data-setup=([\'"])(?P<content>(?!\1).+?)\1',
+ webpage, 'setup data', group='content'), video_id)
+ info_dict = {
+ '_type': 'url_transparent',
+ 'url': setup_data['src'],
+ }
+
+ info_dict.update({
'title': info['title'],
'description': info.get('description'),
'thumbnail': info.get('screenshotURI'),
@@ -50,4 +78,6 @@ class DotsubIE(InfoExtractor):
'uploader': info.get('user'),
'timestamp': float_or_none(info.get('dateCreated'), 1000),
'view_count': int_or_none(info.get('numberOfViews')),
- }
+ })
+
+ return info_dict
diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py
index e8870c4..22da8e4 100644
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dl/extractor/drtuber.py
@@ -10,8 +10,8 @@ from ..utils import (
class DrTuberIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
+ _TESTS = [{
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd',
'info_dict': {
@@ -25,20 +25,30 @@ class DrTuberIE(InfoExtractor):
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'http://www.drtuber.com/embed/489939',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
+ webpage)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- display_id = mobj.group('display_id')
+ display_id = mobj.group('display_id') or video_id
- webpage = self._download_webpage(url, display_id)
+ webpage = self._download_webpage(
+ 'http://www.drtuber.com/video/%s' % video_id, display_id)
video_url = self._html_search_regex(
r'<source src="([^"]+)"', webpage, 'video URL')
title = self._html_search_regex(
- (r'class="title_watch"[^>]*><p>([^<]+)<',
+ (r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
r'<p[^>]+class="title_substrate">([^<]+)</p>',
r'<title>([^<]+) - \d+'),
webpage, 'title')
diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py
index d4dfda8..c2f593e 100644
--- a/youtube_dl/extractor/eagleplatform.py
+++ b/youtube_dl/extractor/eagleplatform.py
@@ -4,11 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
from ..utils import (
ExtractorError,
int_or_none,
- url_basename,
)
@@ -77,7 +79,7 @@ class EaglePlatformIE(InfoExtractor):
if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True)
- def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
+ def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs):
try:
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
except ExtractorError as ee:
@@ -116,29 +118,38 @@ class EaglePlatformIE(InfoExtractor):
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
m3u8_formats = self._extract_m3u8_formats(
- m3u8_url, video_id,
- 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
- mp4_url = self._get_video_url(
+ m3u8_formats_dict = {}
+ for f in m3u8_formats:
+ if f.get('height') is not None:
+ m3u8_formats_dict[f['height']] = f
+
+ mp4_data = self._download_json(
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
# http://lentaru.media.eagleplatform.com/player/player.js
- re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
- video_id, 'Downloading mp4 JSON')
- mp4_url_basename = url_basename(mp4_url)
- for m3u8_format in m3u8_formats:
- mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
- if mobj:
- http_format = m3u8_format.copy()
- video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
- if not self._is_valid_url(video_url, video_id):
+ re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
+ video_id, 'Downloading mp4 JSON', fatal=False)
+ if mp4_data:
+ for format_id, format_url in mp4_data.get('data', {}).items():
+ if not isinstance(format_url, compat_str):
continue
- http_format.update({
- 'url': video_url,
- 'format_id': m3u8_format['format_id'].replace('hls', 'http'),
- 'protocol': 'http',
- })
- formats.append(http_format)
+ height = int_or_none(format_id)
+ if height is not None and m3u8_formats_dict.get(height):
+ f = m3u8_formats_dict[height].copy()
+ f.update({
+ 'format_id': f['format_id'].replace('hls', 'http'),
+ 'protocol': 'http',
+ })
+ else:
+ f = {
+ 'format_id': 'http-%s' % format_id,
+ 'height': int_or_none(format_id),
+ }
+ f['url'] = format_url
+ formats.append(f)
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py
index 6d10f8e..8795e0d 100644
--- a/youtube_dl/extractor/espn.py
+++ b/youtube_dl/extractor/espn.py
@@ -1,38 +1,117 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import remove_end
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ unified_timestamp,
+)
class ESPNIE(InfoExtractor):
- _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+ _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079',
- 'md5': '60e5d097a523e767d06479335d1bdc58',
'info_dict': {
- 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
+ 'id': '10365079',
'ext': 'mp4',
'title': '30 for 30 Shorts: Judging Jewell',
- 'description': None,
+ 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
+ 'timestamp': 1390936111,
+ 'upload_date': '20140128',
},
'params': {
'skip_download': True,
},
- 'add_ie': ['OoyalaExternal'],
}, {
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
'url': 'http://espn.go.com/video/clip?id=2743663',
- 'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
'info_dict': {
- 'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
+ 'id': '2743663',
'ext': 'mp4',
'title': 'Must-See Moments: Best of the MLS season',
+ 'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
+ 'timestamp': 1449446454,
+ 'upload_date': '20151207',
},
'params': {
'skip_download': True,
},
- 'add_ie': ['OoyalaExternal'],
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
+ 'url': 'http://www.espn.com/video/clip?id=10365079',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/video/clip/_/id/17989860',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ clip = self._download_json(
+ 'http://api-app.espn.com/v1/video/clips/%s' % video_id,
+ video_id)['videos'][0]
+
+ title = clip['headline']
+
+ format_urls = set()
+ formats = []
+
+ def traverse_source(source, base_source_id=None):
+ for source_id, source in source.items():
+ if isinstance(source, compat_str):
+ extract_source(source, base_source_id)
+ elif isinstance(source, dict):
+ traverse_source(
+ source,
+ '%s-%s' % (base_source_id, source_id)
+ if base_source_id else source_id)
+
+ def extract_source(source_url, source_id=None):
+ if source_url in format_urls:
+ return
+ format_urls.add(source_url)
+ ext = determine_ext(source_url)
+ if ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ source_url, video_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ source_url, video_id, f4m_id=source_id, fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=source_id, fatal=False))
+ else:
+ formats.append({
+ 'url': source_url,
+ 'format_id': source_id,
+ })
+
+ traverse_source(clip['links']['source'])
+ self._sort_formats(formats)
+
+ description = clip.get('caption') or clip.get('description')
+ thumbnail = clip.get('thumbnail')
+ duration = int_or_none(clip.get('duration'))
+ timestamp = unified_timestamp(clip.get('originalPublishDate'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class ESPNArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+ _TESTS = [{
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
'only_matching': True,
}, {
@@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor):
}, {
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
'only_matching': True,
- }, {
- 'url': 'http://www.espn.com/video/clip?id=10365079',
- 'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
+
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor):
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
webpage, 'video id', group='id')
- cms = 'espn'
- if 'data-source="intl"' in webpage:
- cms = 'intl'
- player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
- player = self._download_webpage(
- player_url, video_id)
-
- pcode = self._search_regex(
- r'["\']pcode=([^"\']+)["\']', player, 'pcode')
-
- title = remove_end(
- self._og_search_title(webpage),
- '- ESPN Video').strip()
-
- return {
- '_type': 'url_transparent',
- 'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
- 'ie_key': 'OoyalaExternal',
- 'title': title,
- }
+ return self.url_result(
+ 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index a693f8c..46d007b 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -180,6 +180,7 @@ from .cnn import (
from .coub import CoubIE
from .collegerama import CollegeRamaIE
from .comedycentral import (
+ ComedyCentralFullEpisodesIE,
ComedyCentralIE,
ComedyCentralShortnameIE,
ComedyCentralTVIE,
@@ -267,7 +268,10 @@ from .engadget import EngadgetIE
from .eporner import EpornerIE
from .eroprofile import EroProfileIE
from .escapist import EscapistIE
-from .espn import ESPNIE
+from .espn import (
+ ESPNIE,
+ ESPNArticleIE,
+)
from .esri import EsriVideoIE
from .europa import EuropaIE
from .everyonesmixtape import EveryonesMixtapeIE
@@ -296,6 +300,7 @@ from .footyroom import FootyRoomIE
from .formula1 import Formula1IE
from .fourtube import FourTubeIE
from .fox import FOXIE
+from .fox9 import FOX9IE
from .foxgay import FoxgayIE
from .foxnews import (
FoxNewsIE,
@@ -408,6 +413,10 @@ from .ivi import (
from .ivideon import IvideonIE
from .iwara import IwaraIE
from .izlesene import IzleseneIE
+from .jamendo import (
+ JamendoIE,
+ JamendoAlbumIE,
+)
from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .jwplatform import JWPlatformIE
@@ -592,6 +601,7 @@ from .nhl import (
from .nick import (
NickIE,
NickDeIE,
+ NickNightIE,
)
from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninecninemedia import (
@@ -601,6 +611,7 @@ from .ninecninemedia import (
from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
+from .nobelprize import NobelPrizeIE
from .noco import NocoIE
from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE
@@ -667,6 +678,7 @@ from .orf import (
ORFFM4IE,
ORFIPTVIE,
)
+from .pandatv import PandaTVIE
from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
@@ -740,6 +752,10 @@ from .rbmaradio import RBMARadioIE
from .rds import RDSIE
from .redtube import RedTubeIE
from .regiotv import RegioTVIE
+from .rentv import (
+ RENTVIE,
+ RENTVArticleIE,
+)
from .restudy import RestudyIE
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE
@@ -789,14 +805,16 @@ from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE
from .screenjunkies import ScreenJunkiesIE
-from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
from .seeker import SeekerIE
from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE
from .sexu import SexuIE
from .shahid import ShahidIE
-from .shared import SharedIE
+from .shared import (
+ SharedIE,
+ VivoIE,
+)
from .sharesix import ShareSixIE
from .sina import SinaIE
from .sixplay import SixPlayIE
@@ -879,6 +897,7 @@ from .teachertube import (
)
from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
+from .teamfourstar import TeamFourStarIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tele13 import Tele13IE
@@ -947,6 +966,10 @@ from .tv2 import (
)
from .tv3 import TV3IE
from .tv4 import TV4IE
+from .tvanouvelles import (
+ TVANouvellesIE,
+ TVANouvellesArticleIE,
+)
from .tvc import (
TVCIE,
TVCArticleIE,
@@ -1087,6 +1110,7 @@ from .vrt import VRTIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vyborymos import VyboryMosIE
+from .vzaar import VzaarIE
from .walla import WallaIE
from .washingtonpost import (
WashingtonPostIE,
@@ -1098,6 +1122,10 @@ from .wdr import (
WDRIE,
WDRMobileIE,
)
+from .webcaster import (
+ WebcasterIE,
+ WebcasterFeedIE,
+)
from .webofstories import (
WebOfStoriesIE,
WebOfStoriesPlaylistIE,
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 8015734..b4d38e5 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import json
import re
import socket
@@ -100,7 +99,8 @@ class FacebookIE(InfoExtractor):
'ext': 'mp4',
'title': '"What are you doing running in the snow?"',
'uploader': 'FailArmy',
- }
+ },
+ 'skip': 'Video gone',
}, {
'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
@@ -110,6 +110,7 @@ class FacebookIE(InfoExtractor):
'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog',
'uploader': 'S. Saint',
},
+ 'skip': 'Video gone',
}, {
'note': 'swf params escaped',
'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
@@ -120,6 +121,18 @@ class FacebookIE(InfoExtractor):
'title': 'Facebook video #10153664894881749',
},
}, {
+ # have 1080P, but only up to 720p in swf params
+ 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
+ 'md5': '0d9813160b146b3bc8744e006027fcc6',
+ 'info_dict': {
+ 'id': '10155529876156509',
+ 'ext': 'mp4',
+ 'title': 'Holocaust survivor becomes US citizen',
+ 'timestamp': 1477818095,
+ 'upload_date': '20161030',
+ 'uploader': 'CNN',
+ },
+ }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True,
}, {
@@ -227,43 +240,13 @@ class FacebookIE(InfoExtractor):
video_data = None
- BEFORE = '{swf.addParam(param[0], param[1]);});'
- AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
- PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER)
-
- for m in re.findall(PATTERN, webpage):
- swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
- data = dict(json.loads(swf_params))
- params_raw = compat_urllib_parse_unquote(data['params'])
- video_data_candidate = json.loads(params_raw)['video_data']
- for _, f in video_data_candidate.items():
- if not f:
- continue
- if isinstance(f, dict):
- f = [f]
- if not isinstance(f, list):
- continue
- if f[0].get('video_id') == video_id:
- video_data = video_data_candidate
- break
- if video_data:
+ server_js_data = self._parse_json(self._search_regex(
+ r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
+ for item in server_js_data.get('instances', []):
+ if item[1][0] == 'VideoConfig':
+ video_data = item[2][0]['videoData']
break
- def video_data_list2dict(video_data):
- ret = {}
- for item in video_data:
- format_id = item['stream_type']
- ret.setdefault(format_id, []).append(item)
- return ret
-
- if not video_data:
- server_js_data = self._parse_json(self._search_regex(
- r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
- for item in server_js_data.get('instances', []):
- if item[1][0] == 'VideoConfig':
- video_data = video_data_list2dict(item[2][0]['videoData'])
- break
-
if not video_data:
if not fatal_if_no_video:
return webpage, False
@@ -276,7 +259,8 @@ class FacebookIE(InfoExtractor):
raise ExtractorError('Cannot parse data')
formats = []
- for format_id, f in video_data.items():
+ for f in video_data:
+ format_id = f['stream_type']
if f and isinstance(f, dict):
f = [f]
if not f or not isinstance(f, list):
diff --git a/youtube_dl/extractor/fox9.py b/youtube_dl/extractor/fox9.py
new file mode 100644
index 0000000..56d9975
--- /dev/null
+++ b/youtube_dl/extractor/fox9.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .anvato import AnvatoIE
+from ..utils import js_to_json
+
+
+class FOX9IE(AnvatoIE):
+ _VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story'
+ _TESTS = [{
+ 'url': 'http://www.fox9.com/news/215123287-story',
+ 'md5': 'd6e1b2572c3bab8a849c9103615dd243',
+ 'info_dict': {
+ 'id': '314473',
+ 'ext': 'mp4',
+ 'title': 'Bear climbs tree in downtown Duluth',
+ 'description': 'md5:6a36bfb5073a411758a752455408ac90',
+ 'duration': 51,
+ 'timestamp': 1478123580,
+ 'upload_date': '20161102',
+ 'uploader': 'EPFOX',
+ 'categories': ['News', 'Sports'],
+ 'tags': ['news', 'video'],
+ },
+ }, {
+ 'url': 'http://www.fox9.com/news/investigators/214070684-story',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_id = self._parse_json(
+ self._search_regex(
+ r'AnvatoPlaylist\s*\(\s*(\[.+?\])\s*\)\s*;',
+ webpage, 'anvato playlist'),
+ video_id, transform_source=js_to_json)[0]['video']
+
+ return self._get_anvato_videos(
+ 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
+ video_id)
diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py
index 186da0d..56048ff 100644
--- a/youtube_dl/extractor/franceculture.py
+++ b/youtube_dl/extractor/franceculture.py
@@ -29,7 +29,7 @@ class FranceCultureIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(
- r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<a[^>]+href="([^"]+)"',
+ r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"',
webpage, 'video path')
title = self._og_search_title(webpage)
@@ -38,7 +38,7 @@ class FranceCultureIE(InfoExtractor):
'(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
webpage, 'upload date', fatal=False))
thumbnail = self._search_regex(
- r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-pagespeed-(?:lazy|high-res)-src="([^"]+)"',
+ r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex(
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index 8c5ffc9..f2928b5 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -28,6 +28,9 @@ class FunnyOrDieIE(InfoExtractor):
'description': 'Please use this to sell something. www.jonlajoie.com',
'thumbnail': 're:^http:.*\.jpg$',
},
+ 'params': {
+ 'skip_download': True,
+ },
}, {
'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man',
'only_matching': True,
@@ -51,19 +54,45 @@ class FunnyOrDieIE(InfoExtractor):
formats = []
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ source_formats = list(filter(
+ lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+ m3u8_formats))
- bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)]
+ bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
bitrates.sort()
- for bitrate in bitrates:
- for link in links:
- formats.append({
- 'url': self._proto_relative_url('%s%d.%s' % (link[0], bitrate, link[1])),
- 'format_id': '%s-%d' % (link[1], bitrate),
- 'vbr': bitrate,
- })
+ if source_formats:
+ self._sort_formats(source_formats)
+
+ for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)):
+ for path, ext in links:
+ ff = f.copy()
+ if ff:
+ if ext != 'mp4':
+ ff = dict(
+ [(k, v) for k, v in ff.items()
+ if k in ('height', 'width', 'format_id')])
+ ff.update({
+ 'format_id': ff['format_id'].replace('hls', ext),
+ 'ext': ext,
+ 'protocol': 'http',
+ })
+ else:
+ ff.update({
+ 'format_id': '%s-%d' % (ext, bitrate),
+ 'vbr': bitrate,
+ })
+ ff['url'] = self._proto_relative_url(
+ '%s%d.%s' % (path, bitrate, ext))
+ formats.append(ff)
+ self._check_formats(formats, video_id)
+
+ formats.extend(m3u8_formats)
+ self._sort_formats(
+ formats, field_preference=('height', 'width', 'tbr', 'format_id'))
subtitles = {}
for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 7b8a9cf..3949c8b 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -47,6 +47,8 @@ from .svt import SVTIE
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
from .tnaflix import TNAFlixNetworkEmbedIE
+from .drtuber import DrTuberIE
+from .redtube import RedTubeIE
from .vimeo import VimeoIE
from .dailymotion import (
DailymotionIE,
@@ -54,10 +56,10 @@ from .dailymotion import (
)
from .onionstudios import OnionStudiosIE
from .viewlift import ViewLiftEmbedIE
-from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE
from .videomore import VideomoreIE
+from .webcaster import WebcasterFeedIE
from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE
@@ -1187,16 +1189,6 @@ class GenericIE(InfoExtractor):
'duration': 248.667,
},
},
- # ScreenwaveMedia embed
- {
- 'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
- 'md5': '24ace5baba0d35d55c6810b51f34e9e0',
- 'info_dict': {
- 'id': 'cinemasnob-55d26273809dd',
- 'ext': 'mp4',
- 'title': 'cinemasnob',
- },
- },
# BrightcoveInPageEmbed embed
{
'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
@@ -1208,20 +1200,6 @@ class GenericIE(InfoExtractor):
'duration': 51690,
},
},
- # JWPlayer with M3U8
- {
- 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
- 'info_dict': {
- 'id': 'playlist',
- 'ext': 'mp4',
- 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
- 'uploader': 'ren.tv',
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- }
- },
# Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
# This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
{
@@ -1648,6 +1626,10 @@ class GenericIE(InfoExtractor):
doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
+ elif doc.tag == 'SmoothStreamingMedia':
+ info_dict['formats'] = self._parse_ism_formats(doc, url)
+ self._sort_formats(info_dict['formats'])
+ return info_dict
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
smil = self._parse_smil(doc, url, video_id)
self._sort_formats(smil['formats'])
@@ -1991,11 +1973,6 @@ class GenericIE(InfoExtractor):
if sportbox_urls:
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
- # Look for embedded PornHub player
- pornhub_url = PornHubIE._extract_url(webpage)
- if pornhub_url:
- return self.url_result(pornhub_url, 'PornHub')
-
# Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
if xhamster_urls:
@@ -2006,6 +1983,21 @@ class GenericIE(InfoExtractor):
if tnaflix_urls:
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
+ # Look for embedded PornHub player
+ pornhub_urls = PornHubIE._extract_urls(webpage)
+ if pornhub_urls:
+ return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
+
+ # Look for embedded DrTuber player
+ drtuber_urls = DrTuberIE._extract_urls(webpage)
+ if drtuber_urls:
+ return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
+
+ # Look for embedded RedTube player
+ redtube_urls = RedTubeIE._extract_urls(webpage)
+ if redtube_urls:
+ return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
+
# Look for embedded Tvigle player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@@ -2138,6 +2130,11 @@ class GenericIE(InfoExtractor):
if videomore_url:
return self.url_result(videomore_url)
+ # Look for Webcaster embeds
+ webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
+ if webcaster_url:
+ return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
+
# Look for Playwire embeds
mobj = re.search(
r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
@@ -2204,11 +2201,6 @@ class GenericIE(InfoExtractor):
if jwplatform_url:
return self.url_result(jwplatform_url, 'JWPlatform')
- # Look for ScreenwaveMedia embeds
- mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
- if mobj is not None:
- return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
-
# Look for Digiteka embeds
digiteka_url = DigitekaIE._extract_url(webpage)
if digiteka_url:
@@ -2230,6 +2222,16 @@ class GenericIE(InfoExtractor):
return self.url_result('limelight:%s:%s' % (
lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
+ mobj = re.search(
+ r'''(?sx)
+ <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
+ <param[^>]+
+ name=(["\'])flashVars\2[^>]+
+ value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
+ ''', webpage)
+ if mobj:
+ return self.url_result('limelight:media:%s' % mobj.group('id'))
+
# Look for AdobeTVVideo embeds
mobj = re.search(
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
@@ -2463,6 +2465,21 @@ class GenericIE(InfoExtractor):
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
+ elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
+ # Just matching .ism/manifest is not enough to be reliably sure
+ # whether it's actually an ISM manifest or some other streaming
+ # manifest since there are various streaming URL formats
+ # possible (see [1]) as well as some other shenanigans like
+ # .smil/manifest URLs that actually serve an ISM (see [2]) and
+ # so on.
+ # Thus the most reasonable way to solve this is to delegate
+ # to generic extractor in order to look into the contents of
+ # the manifest itself.
+ # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
+ # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
+ entry_info_dict = self.url_result(
+ smuggle_url(video_url, {'to_generic': True}),
+ GenericIE.ie_key())
else:
entry_info_dict['url'] = video_url
diff --git a/youtube_dl/extractor/googlesearch.py b/youtube_dl/extractor/googlesearch.py
index 498304c..5279fa8 100644
--- a/youtube_dl/extractor/googlesearch.py
+++ b/youtube_dl/extractor/googlesearch.py
@@ -4,9 +4,6 @@ import itertools
import re
from .common import SearchInfoExtractor
-from ..compat import (
- compat_urllib_parse,
-)
class GoogleSearchIE(SearchInfoExtractor):
@@ -34,13 +31,16 @@ class GoogleSearchIE(SearchInfoExtractor):
}
for pagenum in itertools.count():
- result_url = (
- 'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en'
- % (compat_urllib_parse.quote_plus(query), pagenum * 10))
-
webpage = self._download_webpage(
- result_url, 'gvsearch:' + query,
- note='Downloading result page ' + str(pagenum + 1))
+ 'http://www.google.com/search',
+ 'gvsearch:' + query,
+ note='Downloading result page %s' % (pagenum + 1),
+ query={
+ 'tbm': 'vid',
+ 'q': query,
+ 'start': pagenum * 10,
+ 'hl': 'en',
+ })
for hit_idx, mobj in enumerate(re.finditer(
r'<h3 class="r"><a href="([^"]+)"', webpage)):
diff --git a/youtube_dl/extractor/hellporno.py b/youtube_dl/extractor/hellporno.py
index 7a1c75b..10da140 100644
--- a/youtube_dl/extractor/hellporno.py
+++ b/youtube_dl/extractor/hellporno.py
@@ -6,12 +6,13 @@ from .common import InfoExtractor
from ..utils import (
js_to_json,
remove_end,
+ determine_ext,
)
class HellPornoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
+ _TESTS = [{
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
'md5': '1fee339c610d2049699ef2aa699439f1',
'info_dict': {
@@ -22,7 +23,10 @@ class HellPornoIE(InfoExtractor):
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'http://hellporno.net/v/186271/',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
display_id = self._match_id(url)
@@ -38,7 +42,7 @@ class HellPornoIE(InfoExtractor):
video_id = flashvars.get('video_id')
thumbnail = flashvars.get('preview_url')
- ext = flashvars.get('postfix', '.mp4')[1:]
+ ext = determine_ext(flashvars.get('postfix'), 'mp4')
formats = []
for video_url_key in ['video_url', 'video_alt_url']:
diff --git a/youtube_dl/extractor/hornbunny.py b/youtube_dl/extractor/hornbunny.py
index 5b6efb2..0615f06 100644
--- a/youtube_dl/extractor/hornbunny.py
+++ b/youtube_dl/extractor/hornbunny.py
@@ -1,8 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -14,29 +12,24 @@ class HornBunnyIE(InfoExtractor):
_VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
_TEST = {
'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
- 'md5': '95e40865aedd08eff60272b704852ad7',
+ 'md5': 'e20fd862d1894b67564c96f180f43924',
'info_dict': {
'id': '5227',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'panty slut jerk off instruction',
'duration': 550,
'age_limit': 18,
+ 'view_count': int,
+ 'thumbnail': 're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- webpage = self._download_webpage(
- url, video_id, note='Downloading initial webpage')
- title = self._html_search_regex(
- r'class="title">(.*?)</h2>', webpage, 'title')
- redirect_url = self._html_search_regex(
- r'pg&settings=(.*?)\|0"\);', webpage, 'title')
- webpage2 = self._download_webpage(redirect_url, video_id)
- video_url = self._html_search_regex(
- r'flvMask:(.*?);', webpage2, 'video_url')
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+ info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
duration = parse_duration(self._search_regex(
r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
@@ -45,12 +38,12 @@ class HornBunnyIE(InfoExtractor):
r'<strong>Views:</strong>\s*(\d+)</div>',
webpage, 'view count', fatal=False))
- return {
+ info_dict.update({
'id': video_id,
- 'url': video_url,
'title': title,
- 'ext': 'flv',
'duration': duration,
'view_count': view_count,
'age_limit': 18,
- }
+ })
+
+ return info_dict
diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py
index d23489d..67c24a5 100644
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@@ -13,7 +13,7 @@ from ..utils import (
class ImgurIE(InfoExtractor):
- _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
_TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
}, {
'url': 'http://imgur.com/topic/Funny/N8rOudd',
'only_matching': True,
+ }, {
+ 'url': 'http://imgur.com/r/aww/VQcQPhM',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py
new file mode 100644
index 0000000..ee9acac
--- /dev/null
+++ b/youtube_dl/extractor/jamendo.py
@@ -0,0 +1,107 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from ..compat import compat_urlparse
+from .common import InfoExtractor
+
+
+class JamendoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
+ 'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
+ 'info_dict': {
+ 'id': '196219',
+ 'display_id': 'stories-from-emona-i',
+ 'ext': 'flac',
+ 'title': 'Stories from Emona I',
+ 'thumbnail': 're:^https?://.*\.jpg'
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._VALID_URL_RE.match(url)
+ track_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_meta('name', webpage, 'title')
+
+ formats = [{
+ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
+ % (sub_domain, track_id, format_id),
+ 'format_id': format_id,
+ 'ext': ext,
+ 'quality': quality,
+ } for quality, (format_id, sub_domain, ext) in enumerate((
+ ('mp31', 'mp3l', 'mp3'),
+ ('mp32', 'mp3d', 'mp3'),
+ ('ogg1', 'ogg', 'ogg'),
+ ('flac', 'flac', 'flac'),
+ ))]
+ self._sort_formats(formats)
+
+ thumbnail = self._html_search_meta(
+ 'image', webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': track_id,
+ 'display_id': display_id,
+ 'thumbnail': thumbnail,
+ 'title': title,
+ 'formats': formats
+ }
+
+
+class JamendoAlbumIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
+ _TEST = {
+ 'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
+ 'info_dict': {
+ 'id': '121486',
+ 'title': 'Duck On Cover'
+ },
+ 'playlist': [{
+ 'md5': 'e1a2fcb42bda30dfac990212924149a8',
+ 'info_dict': {
+ 'id': '1032333',
+ 'ext': 'flac',
+ 'title': 'Warmachine'
+ }
+ }, {
+ 'md5': '1f358d7b2f98edfe90fd55dac0799d50',
+ 'info_dict': {
+ 'id': '1032330',
+ 'ext': 'flac',
+ 'title': 'Without Your Ghost'
+ }
+ }],
+ 'params': {
+ 'playlistend': 2
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = self._VALID_URL_RE.match(url)
+ album_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, mobj.group('display_id'))
+
+ title = self._html_search_meta('name', webpage, 'title')
+
+ entries = [
+ self.url_result(
+ compat_urlparse.urljoin(url, m.group('path')),
+ ie=JamendoIE.ie_key(),
+ video_id=self._search_regex(
+ r'/track/(\d+)', m.group('path'),
+ 'track id', default=None))
+ for m in re.finditer(
+ r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
+ webpage)
+ ]
+
+ return self.playlist_result(entries, album_id, title)
diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py
index a3784e6..ded717c 100644
--- a/youtube_dl/extractor/litv.py
+++ b/youtube_dl/extractor/litv.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import (
@@ -52,8 +51,8 @@ class LiTVIE(InfoExtractor):
'skip': 'Georestricted to Taiwan',
}]
- def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True):
- episode_title = view_data['title']
+ def _extract_playlist(self, season_list, video_id, program_info, prompt=True):
+ episode_title = program_info['title']
content_id = season_list['contentId']
if prompt:
@@ -61,7 +60,7 @@ class LiTVIE(InfoExtractor):
all_episodes = [
self.url_result(smuggle_url(
- self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']),
+ self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
{'force_noplaylist': True})) # To prevent infinite recursion
for episode in season_list['episode']]
@@ -80,19 +79,15 @@ class LiTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- view_data = dict(map(lambda t: (t[0], t[2]), re.findall(
- r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2',
- webpage)))
-
- vod_data = self._parse_json(self._search_regex(
- 'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
+ program_info = self._parse_json(self._search_regex(
+ 'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
video_id)
- season_list = list(vod_data.get('seasonList', {}).values())
+ season_list = list(program_info.get('seasonList', {}).values())
if season_list:
if not noplaylist:
return self._extract_playlist(
- season_list[0], video_id, vod_data, view_data,
+ season_list[0], video_id, program_info,
prompt=noplaylist_prompt)
if noplaylist_prompt:
@@ -102,8 +97,8 @@ class LiTVIE(InfoExtractor):
# endpoint gives the same result as the data embedded in the webpage.
# If georestricted, there are no embedded data, so an extra request is
# necessary to get the error code
- if 'assetId' not in view_data:
- view_data = self._download_json(
+ if 'assetId' not in program_info:
+ program_info = self._download_json(
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
query={'contentId': video_id},
headers={'Accept': 'application/json'})
@@ -112,9 +107,9 @@ class LiTVIE(InfoExtractor):
webpage, 'video data', default='{}'), video_id)
if not video_data:
payload = {
- 'assetId': view_data['assetId'],
- 'watchDevices': view_data['watchDevices'],
- 'contentType': view_data['contentType'],
+ 'assetId': program_info['assetId'],
+ 'watchDevices': program_info['watchDevices'],
+ 'contentType': program_info['contentType'],
}
video_data = self._download_json(
'https://www.litv.tv/vod/getMainUrl', video_id,
@@ -136,11 +131,11 @@ class LiTVIE(InfoExtractor):
# LiTV HLS segments doesn't like compressions
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True
- title = view_data['title'] + view_data.get('secondaryMark', '')
- description = view_data.get('description')
- thumbnail = view_data.get('imageFile')
- categories = [item['name'] for item in vod_data.get('category', [])]
- episode = int_or_none(view_data.get('episode'))
+ title = program_info['title'] + program_info.get('secondaryMark', '')
+ description = program_info.get('description')
+ thumbnail = program_info.get('imageFile')
+ categories = [item['name'] for item in program_info.get('category', [])]
+ episode = int_or_none(program_info.get('episode'))
return {
'id': video_id,
diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py
index ea0565a..b84e4dd 100644
--- a/youtube_dl/extractor/liveleak.py
+++ b/youtube_dl/extractor/liveleak.py
@@ -54,6 +54,22 @@ class LiveLeakIE(InfoExtractor):
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
'thumbnail': 're:^https?://.*\.jpg$'
}
+ }, {
+ # Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521
+ 'url': 'http://m.liveleak.com/view?i=763_1473349649',
+ 'add_ie': ['Youtube'],
+ 'info_dict': {
+ 'id': '763_1473349649',
+ 'ext': 'mp4',
+ 'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
+ 'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
+ 'uploader': 'Ziz',
+ 'upload_date': '20160908',
+ 'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
@staticmethod
@@ -87,7 +103,7 @@ class LiveLeakIE(InfoExtractor):
else:
# Maybe an embed?
embed_url = self._search_regex(
- r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"',
+ r'<iframe[^>]+src="(https?://(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
webpage, 'embed URL')
return {
'_type': 'url_transparent',
@@ -107,6 +123,7 @@ class LiveLeakIE(InfoExtractor):
'format_note': s.get('label'),
'url': s['file'],
} for i, s in enumerate(sources)]
+
for i, s in enumerate(sources):
# Removing '.h264_*.mp4' gives the raw video, which is essentially
# the same video without the LiveLeak logo at the top (see
diff --git a/youtube_dl/extractor/microsoftvirtualacademy.py b/youtube_dl/extractor/microsoftvirtualacademy.py
index afd3e98..8e0aee0 100644
--- a/youtube_dl/extractor/microsoftvirtualacademy.py
+++ b/youtube_dl/extractor/microsoftvirtualacademy.py
@@ -71,12 +71,15 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
formats = []
for sources in settings.findall(compat_xpath('.//MediaSources')):
- if sources.get('videoType') == 'smoothstreaming':
- continue
+ sources_type = sources.get('videoType')
for source in sources.findall(compat_xpath('./MediaSource')):
video_url = source.text
if not video_url or not video_url.startswith('http'):
continue
+ if sources_type == 'smoothstreaming':
+ formats.extend(self._extract_ism_formats(
+ video_url, video_id, 'mss', fatal=False))
+ continue
video_mode = source.get('videoMode')
height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
index 2294745..f577836 100644
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -1,19 +1,20 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
+import uuid
from .common import InfoExtractor
from ..compat import (
+ compat_str,
compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..utils import (
- get_element_by_attribute,
int_or_none,
- remove_start,
extract_attributes,
determine_ext,
+ smuggle_url,
+ parse_duration,
)
@@ -72,76 +73,133 @@ class MiTeleBaseIE(InfoExtractor):
}
-class MiTeleIE(MiTeleBaseIE):
+class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es'
- _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/'
+ _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
_TESTS = [{
- 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
- # MD5 is unstable
+ 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
'info_dict': {
- 'id': '0NF1jJnxS1Wu3pHrmvFyw2',
- 'display_id': 'programa-144',
+ 'id': '57b0dfb9c715da65618b4afa',
'ext': 'mp4',
'title': 'Tor, la web invisible',
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
'series': 'Diario de',
'season': 'La redacción',
+ 'season_number': 14,
+ 'season_id': 'diario_de_t14_11981',
'episode': 'Programa 144',
+ 'episode_number': 3,
'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 2913,
},
+ 'add_ie': ['Ooyala'],
}, {
# no explicit title
- 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/',
+ 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
'info_dict': {
- 'id': 'eLZSwoEd1S3pVyUm8lc6F',
- 'display_id': 'programa-226',
+ 'id': '57b0de3dc915da14058b4876',
'ext': 'mp4',
- 'title': 'Cuarto Milenio - Temporada 6 - Programa 226',
- 'description': 'md5:50daf9fadefa4e62d9fc866d0c015701',
+ 'title': 'Cuarto Milenio Temporada 6 Programa 226',
+ 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
'series': 'Cuarto Milenio',
'season': 'Temporada 6',
+ 'season_number': 6,
+ 'season_id': 'cuarto_milenio_t06_12715',
'episode': 'Programa 226',
+ 'episode_number': 24,
'thumbnail': 're:(?i)^https?://.*\.jpg$',
- 'duration': 7312,
+ 'duration': 7313,
},
'params': {
'skip_download': True,
},
+ 'add_ie': ['Ooyala'],
+ }, {
+ 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- info = self._get_player_info(url, webpage)
-
- title = self._search_regex(
- r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>',
- webpage, 'title', default=None)
-
- mobj = re.search(r'''(?sx)
- class="Destacado-text"[^>]*>.*?<h1>\s*
- <span>(?P<series>[^<]+)</span>\s*
- <span>(?P<season>[^<]+)</span>\s*
- <span>(?P<episode>[^<]+)</span>''', webpage)
- series, season, episode = mobj.groups() if mobj else [None] * 3
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ gigya_url = self._search_regex(
+ r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
+ webpage, 'gigya', default=None)
+ gigya_sc = self._download_webpage(
+ compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
+ video_id, 'Downloading gigya script')
+
+ # Get a appKey/uuid for getting the session key
+ appKey_var = self._search_regex(
+ r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)',
+ gigya_sc, 'appKey variable')
+ appKey = self._search_regex(
+ r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey')
+
+ session_json = self._download_json(
+ 'https://appgrid-api.cloud.accedo.tv/session',
+ video_id, 'Downloading session keys', query={
+ 'appKey': appKey,
+ 'uuid': compat_str(uuid.uuid4()),
+ })
+
+ paths = self._download_json(
+ 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration',
+ video_id, 'Downloading paths JSON',
+ query={'sessionKey': compat_str(session_json['sessionKey'])})
+
+ ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
+ source = self._download_json(
+ 'http://%s%s%s/docs/%s' % (
+ ooyala_s['base_url'], ooyala_s['full_path'],
+ ooyala_s['provider_id'], video_id),
+ video_id, 'Downloading data JSON', query={
+ 'include_titles': 'Series,Season',
+ 'product_name': 'test',
+ 'format': 'full',
+ })['hits']['hits'][0]['_source']
+
+ embedCode = source['offers'][0]['embed_codes'][0]
+ titles = source['localizable_titles'][0]
+
+ title = titles.get('title_medium') or titles['title_long']
+
+ description = titles.get('summary_long') or titles.get('summary_medium')
+
+ def get(key1, key2):
+ value1 = source.get(key1)
+ if not value1 or not isinstance(value1, list):
+ return
+ if not isinstance(value1[0], dict):
+ return
+ return value1[0].get(key2)
+
+ series = get('localizable_titles_series', 'title_medium')
+
+ season = get('localizable_titles_season', 'title_medium')
+ season_number = int_or_none(source.get('season_number'))
+ season_id = source.get('season_id')
+
+ episode = titles.get('title_sort_name')
+ episode_number = int_or_none(source.get('episode_number'))
+
+ duration = parse_duration(get('videos', 'duration'))
- if not title:
- if mobj:
- title = '%s - %s - %s' % (series, season, episode)
- else:
- title = remove_start(self._search_regex(
- r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
-
- info.update({
- 'display_id': display_id,
+ return {
+ '_type': 'url_transparent',
+ # for some reason only HLS is supported
+ 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}),
+ 'id': video_id,
'title': title,
- 'description': get_element_by_attribute('class', 'text', webpage),
+ 'description': description,
'series': series,
'season': season,
+ 'season_number': season_number,
+ 'season_id': season_id,
'episode': episode,
- })
- return info
+ 'episode_number': episode_number,
+ 'duration': duration,
+ 'thumbnail': get('images', 'url'),
+ }
diff --git a/youtube_dl/extractor/movieclips.py b/youtube_dl/extractor/movieclips.py
index d0cb827..30c206f 100644
--- a/youtube_dl/extractor/movieclips.py
+++ b/youtube_dl/extractor/movieclips.py
@@ -11,7 +11,7 @@ from ..utils import (
class MovieClipsIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
+ _VALID_URL = r'https?://(?:www\.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
_TEST = {
'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597',
'md5': '42b5a0352d4933a7bd54f2104f481244',
diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py
index 1ec8e0f..d75ce8b 100644
--- a/youtube_dl/extractor/msn.py
+++ b/youtube_dl/extractor/msn.py
@@ -69,10 +69,9 @@ class MSNIE(InfoExtractor):
if not format_url:
continue
ext = determine_ext(format_url)
- # .ism is not yet supported (see
- # https://github.com/rg3/youtube-dl/issues/8118)
if ext == 'ism':
- continue
+ formats.extend(self._extract_ism_formats(
+ format_url + '/Manifest', display_id, 'mss', fatal=False))
if 'm3u8' in format_url:
# m3u8_native should not be used here until
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 74a3a03..0335191 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -13,6 +13,7 @@ from ..utils import (
fix_xml_ampersands,
float_or_none,
HEADRequest,
+ NO_DEFAULT,
RegexNotFoundError,
sanitized_Request,
strip_or_none,
@@ -201,7 +202,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
[self._get_video_info(item) for item in idoc.findall('.//item')],
playlist_title=title, playlist_description=description)
- def _extract_mgid(self, webpage):
+ def _extract_mgid(self, webpage, default=NO_DEFAULT):
try:
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
# or http://media.mtvnservices.com/{mgid}
@@ -221,7 +222,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
sm4_embed = self._html_search_meta(
'sm4:video:embed', webpage, 'sm4 embed', default='')
mgid = self._search_regex(
- r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid')
+ r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default)
return mgid
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py
index 57cf1ce..7672845 100644
--- a/youtube_dl/extractor/nick.py
+++ b/youtube_dl/extractor/nick.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .mtv import MTVServicesInfoExtractor
from ..utils import update_url_query
@@ -69,7 +71,7 @@ class NickIE(MTVServicesInfoExtractor):
class NickDeIE(MTVServicesInfoExtractor):
IE_NAME = 'nick.de'
- _VALID_URL = r'https?://(?:www\.)?(?:nick\.de|nickelodeon\.nl)/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
'only_matching': True,
@@ -79,15 +81,43 @@ class NickDeIE(MTVServicesInfoExtractor):
}, {
'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit',
'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
+ 'only_matching': True,
}]
+ def _extract_mrss_url(self, webpage, host):
+ return update_url_query(self._search_regex(
+ r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
+ {'siteKey': host})
+
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ host = mobj.group('host')
webpage = self._download_webpage(url, video_id)
- mrss_url = update_url_query(self._search_regex(
- r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
- {'siteKey': 'nick.de'})
+ mrss_url = self._extract_mrss_url(webpage, host)
return self._get_videos_info_from_url(mrss_url, video_id)
+
+
+class NickNightIE(NickDeIE):
+ IE_NAME = 'nicknight'
+ _VALID_URL = r'https?://(?:www\.)(?P<host>nicknight\.(?:de|at|tv))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.nicknight.at/shows/977-awkward/videos/85987-nimmer-beste-freunde',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nicknight.at/shows/977-awkward',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nicknight.at/shows/1900-faking-it',
+ 'only_matching': True,
+ }]
+
+ def _extract_mrss_url(self, webpage, *args):
+ return self._search_regex(
+ r'mrss\s*:\s*(["\'])(?P<url>http.+?)\1', webpage,
+ 'mrss url', group='url')
diff --git a/youtube_dl/extractor/nobelprize.py b/youtube_dl/extractor/nobelprize.py
new file mode 100644
index 0000000..4dfdb09
--- /dev/null
+++ b/youtube_dl/extractor/nobelprize.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ mimetype2ext,
+ determine_ext,
+ update_url_query,
+ get_element_by_attribute,
+ int_or_none,
+)
+
+
+class NobelPrizeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
+ 'md5': '04c81e5714bb36cc4e2232fee1d8157f',
+ 'info_dict': {
+ 'id': '2636',
+ 'ext': 'mp4',
+ 'title': 'Announcement of the 2016 Nobel Prize in Physics',
+ 'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ media = self._parse_json(self._search_regex(
+ r'(?s)var\s*config\s*=\s*({.+?});', webpage,
+ 'config'), video_id, js_to_json)['media']
+ title = media['title']
+
+ formats = []
+ for source in media.get('source', []):
+ source_src = source.get('src')
+ if not source_src:
+ continue
+ ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_src, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(source_src, {'hdcore': '3.7.0'}),
+ video_id, f4m_id='hds', fatal=False))
+ else:
+ formats.append({
+ 'url': source_src,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': get_element_by_attribute('itemprop', 'description', webpage),
+ 'duration': int_or_none(media.get('duration')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py
index 6aa0895..61fe571 100644
--- a/youtube_dl/extractor/normalboots.py
+++ b/youtube_dl/extractor/normalboots.py
@@ -2,7 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from .screenwavemedia import ScreenwaveMediaIE
+from .jwplatform import JWPlatformIE
from ..utils import (
unified_strdate,
@@ -25,7 +25,7 @@ class NormalbootsIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
- 'add_ie': ['ScreenwaveMedia'],
+ 'add_ie': ['JWPlatform'],
}
def _real_extract(self, url):
@@ -39,15 +39,13 @@ class NormalbootsIE(InfoExtractor):
r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
webpage, 'date', fatal=False))
- screenwavemedia_url = self._html_search_regex(
- ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL',
- group='url')
+ jwplatform_url = JWPlatformIE._extract_url(webpage)
return {
'_type': 'url_transparent',
'id': video_id,
- 'url': screenwavemedia_url,
- 'ie_key': ScreenwaveMediaIE.ie_key(),
+ 'url': jwplatform_url,
+ 'ie_key': JWPlatformIE.ie_key(),
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index 3700b7a..c89aac6 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import random
import re
from .common import InfoExtractor
@@ -14,6 +15,25 @@ from ..utils import (
class NRKBaseIE(InfoExtractor):
+ _faked_ip = None
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ # NRK checks X-Forwarded-For HTTP header in order to figure out the
+ # origin of the client behind proxy. This allows to bypass geo
+ # restriction by faking this header's value to some Norway IP.
+ # We will do so once we encounter any geo restriction error.
+ if self._faked_ip:
+ # NB: str is intentional
+ kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip
+ return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs)
+
+ def _fake_ip(self):
+ # Use fake IP from 37.191.128.0/17 in order to workaround geo
+ # restriction
+ def octet(lb=0, ub=255):
+ return random.randint(lb, ub)
+ self._faked_ip = '37.191.%d.%d' % (octet(128), octet())
+
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -24,6 +44,8 @@ class NRKBaseIE(InfoExtractor):
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id
+ http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {}
+
entries = []
media_assets = data.get('mediaAssets')
@@ -54,6 +76,7 @@ class NRKBaseIE(InfoExtractor):
'duration': duration,
'subtitles': subtitles,
'formats': formats,
+ 'http_headers': http_headers,
})
if not entries:
@@ -70,10 +93,23 @@ class NRKBaseIE(InfoExtractor):
}]
if not entries:
- if data.get('usageRights', {}).get('isGeoBlocked'):
- raise ExtractorError(
- 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
- expected=True)
+ message_type = data.get('messageType', '')
+ # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
+ if 'IsGeoBlocked' in message_type and not self._faked_ip:
+ self.report_warning(
+ 'Video is geo restricted, trying to fake IP')
+ self._fake_ip()
+ return self._real_extract(url)
+
+ MESSAGES = {
+ 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
+ 'ProgramRightsHasExpired': 'Programmet har gått ut',
+ 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
+ }
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, MESSAGES.get(
+ message_type, message_type)),
+ expected=True)
conviva = data.get('convivaStatistics') or {}
series = conviva.get('seriesName') or data.get('seriesTitle')
diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py
index 9cbc7c2..0a501b3 100644
--- a/youtube_dl/extractor/onet.py
+++ b/youtube_dl/extractor/onet.py
@@ -56,8 +56,8 @@ class OnetBaseIE(InfoExtractor):
continue
ext = determine_ext(video_url)
if format_id == 'ism':
- # TODO: Support Microsoft Smooth Streaming
- continue
+ formats.extend(self._extract_ism_formats(
+ video_url, video_id, 'mss', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
index 72ec209..c2807d0 100644
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor):
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
- def _extract(self, content_tree_url, video_id, domain='example.org'):
+ def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None):
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
metadata = content_tree[list(content_tree)[0]]
embed_code = metadata['embed_code']
@@ -29,7 +29,7 @@ class OoyalaBaseIE(InfoExtractor):
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
compat_urllib_parse_urlencode({
'domain': domain,
- 'supportedFormats': 'mp4,rtmp,m3u8,hds',
+ 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds',
}), video_id)
cur_auth_data = auth_data['authorization_data'][embed_code]
@@ -145,8 +145,9 @@ class OoyalaIE(OoyalaBaseIE):
url, smuggled_data = unsmuggle_url(url, {})
embed_code = self._match_id(url)
domain = smuggled_data.get('domain')
+ supportedformats = smuggled_data.get('supportedformats')
content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
- return self._extract(content_tree_url, embed_code, domain)
+ return self._extract(content_tree_url, embed_code, domain, supportedformats)
class OoyalaExternalIE(OoyalaBaseIE):
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
index 6cf7e4a..7f19b1b 100644
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals, division
+import re
+
from .common import InfoExtractor
from ..compat import (
compat_chr,
@@ -10,6 +12,10 @@ from ..utils import (
determine_ext,
ExtractorError,
)
+from ..jsinterp import (
+ JSInterpreter,
+ _NAME_RE
+)
class OpenloadIE(InfoExtractor):
@@ -56,6 +62,44 @@ class OpenloadIE(InfoExtractor):
'only_matching': True,
}]
+ def openload_decode(self, txt):
+ symbol_dict = {
+ '(゚Д゚) [゚Θ゚]': '_',
+ '(゚Д゚) [゚ω゚ノ]': 'a',
+ '(゚Д゚) [゚Θ゚ノ]': 'b',
+ '(゚Д゚) [\'c\']': 'c',
+ '(゚Д゚) [゚ー゚ノ]': 'd',
+ '(゚Д゚) [゚Д゚ノ]': 'e',
+ '(゚Д゚) [1]': 'f',
+ '(゚Д゚) [\'o\']': 'o',
+ '(o゚ー゚o)': 'u',
+ '(゚Д゚) [\'c\']': 'c',
+ '((゚ー゚) + (o^_^o))': '7',
+ '((o^_^o) +(o^_^o) +(c^_^o))': '6',
+ '((゚ー゚) + (゚Θ゚))': '5',
+ '(-~3)': '4',
+ '(-~-~1)': '3',
+ '(-~1)': '2',
+ '(-~0)': '1',
+ '((c^_^o)-(c^_^o))': '0',
+ }
+ delim = '(゚Д゚)[゚ε゚]+'
+ end_token = '(゚Д゚)[゚o゚]'
+ symbols = '|'.join(map(re.escape, symbol_dict.keys()))
+ txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt)
+ ret = ''
+ for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt):
+ for aachar in aacode.split(delim):
+ if aachar.isdigit():
+ ret += compat_chr(int(aachar, 8))
+ else:
+ m = re.match(r'^u([\da-f]{4})$', aachar)
+ if m:
+ ret += compat_chr(int(m.group(1), 16))
+ else:
+ self.report_warning("Cannot decode: %s" % aachar)
+ return ret
+
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
@@ -70,19 +114,26 @@ class OpenloadIE(InfoExtractor):
r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"',
webpage, 'encrypted data')
- magic = compat_ord(enc_data[-1])
+ enc_code = self._html_search_regex(r'<script[^>]+>(゚ω゚[^<]+)</script>',
+ webpage, 'encrypted code')
+
+ js_code = self.openload_decode(enc_code)
+ jsi = JSInterpreter(js_code)
+
+ m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function')
+ m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function')
+
+ offset = jsi.call_function(m_offset_fun)
+ diff = jsi.call_function(m_diff_fun)
+
video_url_chars = []
for idx, c in enumerate(enc_data):
j = compat_ord(c)
- if j == magic:
- j -= 1
- elif j == magic - 1:
- j += 1
if j >= 33 and j <= 126:
j = ((j + 14) % 94) + 33
- if idx == len(enc_data) - 1:
- j += 2
+ if idx == len(enc_data) - offset:
+ j += diff
video_url_chars += compat_chr(j)
video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
diff --git a/youtube_dl/extractor/pandatv.py b/youtube_dl/extractor/pandatv.py
new file mode 100644
index 0000000..133cc9b
--- /dev/null
+++ b/youtube_dl/extractor/pandatv.py
@@ -0,0 +1,91 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ qualities,
+)
+
+
+class PandaTVIE(InfoExtractor):
+ IE_DESC = '熊猫TV'
+ _VALID_URL = r'http://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.panda.tv/10091',
+ 'info_dict': {
+ 'id': '10091',
+ 'title': 're:.+',
+ 'uploader': '囚徒',
+ 'ext': 'flv',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Live stream is offline',
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ config = self._download_json(
+ 'http://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
+
+ error_code = config.get('errno', 0)
+ if error_code is not 0:
+ raise ExtractorError(
+ '%s returned error %s: %s'
+ % (self.IE_NAME, error_code, config['errmsg']),
+ expected=True)
+
+ data = config['data']
+ video_info = data['videoinfo']
+
+ # 2 = live, 3 = offline
+ if video_info.get('status') != '2':
+ raise ExtractorError(
+ 'Live stream is offline', expected=True)
+
+ title = data['roominfo']['name']
+ uploader = data.get('hostinfo', {}).get('name')
+ room_key = video_info['room_key']
+ stream_addr = video_info.get(
+ 'stream_addr', {'OD': '1', 'HD': '1', 'SD': '1'})
+
+ # Reverse engineered from web player swf
+ # (http://s6.pdim.gs/static/07153e425f581151.swf at the moment of
+ # writing).
+ plflag0, plflag1 = video_info['plflag'].split('_')
+ plflag0 = int(plflag0) - 1
+ if plflag1 == '21':
+ plflag0 = 10
+ plflag1 = '4'
+ live_panda = 'live_panda' if plflag0 < 1 else ''
+
+ quality_key = qualities(['OD', 'HD', 'SD'])
+ suffix = ['_small', '_mid', '']
+ formats = []
+ for k, v in stream_addr.items():
+ if v != '1':
+ continue
+ quality = quality_key(k)
+ if quality <= 0:
+ continue
+ for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
+ formats.append({
+ 'url': 'http://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
+ % (pl, plflag1, room_key, live_panda, suffix[quality], ext),
+ 'format_id': '%s-%s' % (k, ext),
+ 'quality': quality,
+ 'source_preference': pref,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title),
+ 'uploader': uploader,
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/youtube_dl/extractor/plays.py b/youtube_dl/extractor/plays.py
index c3c38cf..ddfc6f1 100644
--- a/youtube_dl/extractor/plays.py
+++ b/youtube_dl/extractor/plays.py
@@ -8,30 +8,31 @@ from ..utils import int_or_none
class PlaysTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})'
- _TEST = {
- 'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
+ _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})'
+ _TESTS = [{
+ 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
'md5': 'dfeac1198506652b5257a62762cec7bc',
'info_dict': {
'id': '56af17f56c95335490',
'ext': 'mp4',
- 'title': 'When you outplay the Azir wall',
+ 'title': 'Bjergsen - When you outplay the Azir wall',
'description': 'Posted by Bjergsen',
}
- }
+ }, {
+ 'url': 'https://plays.tv/embeds/56af17f56c95335490',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(
+ 'https://plays.tv/video/%s' % video_id, video_id)
+
+ info = self._search_json_ld(webpage, video_id,)
- title = self._og_search_title(webpage)
- content = self._parse_json(
- self._search_regex(
- r'R\.bindContent\(({.+?})\);', webpage,
- 'content'), video_id)['content']
mpd_url, sources = re.search(
r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
- content).groups()
+ webpage).groups()
formats = self._extract_mpd_formats(
self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
@@ -42,10 +43,11 @@ class PlaysTVIE(InfoExtractor):
})
self._sort_formats(formats)
- return {
+ info.update({
'id': video_id,
- 'title': title,
'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
+ 'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage),
'formats': formats,
- }
+ })
+
+ return info
diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py
index 2683c0a..0ffd41e 100644
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dl/extractor/pluralsight.py
@@ -11,6 +11,7 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
+ dict_get,
ExtractorError,
float_or_none,
int_or_none,
@@ -119,14 +120,17 @@ class PluralsightIE(PluralsightBaseIE):
@staticmethod
def _convert_subtitles(duration, subs):
srt = ''
+ TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset')
+ TEXT_KEYS = ('text', 'Text')
for num, current in enumerate(subs):
current = subs[num]
- start, text = float_or_none(
- current.get('DisplayTimeOffset')), current.get('Text')
+ start, text = (
+ float_or_none(dict_get(current, TIME_OFFSET_KEYS)),
+ dict_get(current, TEXT_KEYS))
if start is None or text is None:
continue
end = duration if num == len(subs) - 1 else float_or_none(
- subs[num + 1].get('DisplayTimeOffset'))
+ dict_get(subs[num + 1], TIME_OFFSET_KEYS))
if end is None:
continue
srt += os.linesep.join(
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 0724efc..40dbe69 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
- (?P<id>[0-9a-z]+)
+ (?P<id>[\da-z]+)
'''
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
@@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor):
'only_matching': True,
}]
- @classmethod
- def _extract_url(cls, webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage)
- if mobj:
- return mobj.group('url')
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
+ webpage)
def _extract_count(self, pattern, webpage, name):
return str_to_int(self._search_regex(
diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py
index 1c54af0..80091b8 100644
--- a/youtube_dl/extractor/puls4.py
+++ b/youtube_dl/extractor/puls4.py
@@ -10,7 +10,7 @@ from ..utils import (
class Puls4IE(ProSiebenSat1BaseIE):
- _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>(?:[^/]+/)*?videos/[^?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)'
_TESTS = [{
'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118',
'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03',
@@ -22,6 +22,12 @@ class Puls4IE(ProSiebenSat1BaseIE):
'upload_date': '20160830',
'uploader': 'PULS_4',
},
+ }, {
+ 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598',
+ 'only_matching': True,
}]
_TOKEN = 'puls4'
_SALT = '01!kaNgaiNgah1Ie4AeSha'
diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py
index 6751270..321917a 100644
--- a/youtube_dl/extractor/radiocanada.py
+++ b/youtube_dl/extractor/radiocanada.py
@@ -125,6 +125,14 @@ class RadioCanadaIE(InfoExtractor):
f4m_id='hds', fatal=False))
self._sort_formats(formats)
+ subtitles = {}
+ closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
+ if closed_caption_url:
+ subtitles['fr'] = [{
+ 'url': closed_caption_url,
+ 'ext': determine_ext(closed_caption_url, 'vtt'),
+ }]
+
return {
'id': video_id,
'title': get_meta('Title'),
@@ -135,6 +143,7 @@ class RadioCanadaIE(InfoExtractor):
'season_number': int_or_none('SrcSaison'),
'episode_number': int_or_none('SrcEpisode'),
'upload_date': unified_strdate(get_meta('Date')),
+ 'subtitles': subtitles,
'formats': formats,
}
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py
index 721fc3a..c367a6a 100644
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -10,8 +12,8 @@ from ..utils import (
class RedTubeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
+ _TESTS = [{
'url': 'http://www.redtube.com/66418',
'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
'info_dict': {
@@ -23,11 +25,21 @@ class RedTubeIE(InfoExtractor):
'view_count': int,
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
+ webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(
+ 'http://www.redtube.com/%s' % video_id, video_id)
if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
raise ExtractorError('Video %s has been removed' % video_id, expected=True)
diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py
new file mode 100644
index 0000000..422c02c
--- /dev/null
+++ b/youtube_dl/extractor/rentv.py
@@ -0,0 +1,76 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformBaseIE
+from ..compat import compat_str
+
+
+class RENTVIE(JWPlatformBaseIE):
+ _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://ren.tv/video/epizod/118577',
+ 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
+ 'info_dict': {
+ 'id': '118577',
+ 'ext': 'mp4',
+ 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"'
+ }
+ }, {
+ 'url': 'http://ren.tv/player/118577',
+ 'only_matching': True,
+ }, {
+ 'url': 'rentv:118577',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id)
+ jw_config = self._parse_json(self._search_regex(
+ r'config\s*=\s*({.+});', webpage, 'jw config'), video_id)
+ return self._parse_jwplayer_data(jw_config, video_id, m3u8_id='hls')
+
+
+class RENTVArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v',
+ 'md5': 'ebd63c4680b167693745ab91343df1d6',
+ 'info_dict': {
+ 'id': '136472',
+ 'ext': 'mp4',
+ 'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла',
+ 'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.',
+ }
+ }, {
+ # TODO: invalid m3u8
+ 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
+ 'info_dict': {
+ 'id': 'playlist',
+ 'ext': 'mp4',
+ 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
+ 'uploader': 'ren.tv',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ drupal_settings = self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+ webpage, 'drupal settings'), display_id)
+
+ entries = []
+ for config_profile in drupal_settings.get('ren_jwplayer', {}).values():
+ media_id = config_profile.get('mediaid')
+ if not media_id:
+ continue
+ media_id = compat_str(media_id)
+ entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id))
+ return self.playlist_result(entries, display_id)
diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py
index 2fce4e8..6db3e3e 100644
--- a/youtube_dl/extractor/ruutu.py
+++ b/youtube_dl/extractor/ruutu.py
@@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
determine_ext,
+ ExtractorError,
int_or_none,
xpath_attr,
xpath_text,
@@ -101,6 +102,11 @@ class RuutuIE(InfoExtractor):
})
extract_formats(video_xml.find('./Clip'))
+
+ drm = xpath_text(video_xml, './Clip/DRM', default=None)
+ if not formats and drm:
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
self._sort_formats(formats)
return {
diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py
deleted file mode 100644
index 7d77e88..0000000
--- a/youtube_dl/extractor/screenwavemedia.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- unified_strdate,
- js_to_json,
-)
-
-
-class ScreenwaveMediaIE(InfoExtractor):
- _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
- EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
- _TESTS = [{
- 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- playerdata = self._download_webpage(
- 'http://player.screenwavemedia.com/player.php?id=%s' % video_id,
- video_id, 'Downloading player webpage')
-
- vidtitle = self._search_regex(
- r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
-
- playerconfig = self._download_webpage(
- 'http://player.screenwavemedia.com/player.js',
- video_id, 'Downloading playerconfig webpage')
-
- videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver')
-
- sources = self._parse_json(
- js_to_json(
- re.sub(
- r'(?s)/\*.*?\*/', '',
- self._search_regex(
- r'sources\s*:\s*(\[[^\]]+?\])', playerconfig,
- 'sources',
- ).replace(
- "' + thisObj.options.videoserver + '",
- videoserver
- ).replace(
- "' + playerVidId + '",
- video_id
- )
- )
- ),
- video_id, fatal=False
- )
-
- # Fallback to hardcoded sources if JS changes again
- if not sources:
- self.report_warning('Falling back to a hardcoded list of streams')
- sources = [{
- 'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id),
- 'type': 'mp4',
- 'label': format_label,
- } for format_id, format_label in (
- ('low', '144p Low'), ('med', '160p Med'), ('high', '360p High'), ('hd1', '720p HD1'))]
- sources.append({
- 'file': 'http://%s/vod/smil:%s.smil/playlist.m3u8' % (videoserver, video_id),
- 'type': 'hls',
- })
-
- formats = []
- for source in sources:
- file_ = source.get('file')
- if not file_:
- continue
- if source.get('type') == 'hls':
- formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4'))
- else:
- format_id = self._search_regex(
- r'_(.+?)\.[^.]+$', file_, 'format id', default=None)
- if not self._is_valid_url(file_, video_id, format_id or 'video'):
- continue
- format_label = source.get('label')
- height = int_or_none(self._search_regex(
- r'^(\d+)[pP]', format_label, 'height', default=None))
- formats.append({
- 'url': file_,
- 'format_id': format_id,
- 'format': format_label,
- 'ext': source.get('type'),
- 'height': height,
- })
- self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
-
- return {
- 'id': video_id,
- 'title': vidtitle,
- 'formats': formats,
- }
-
-
-class TeamFourIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
- _TEST = {
- 'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
- 'info_dict': {
- 'id': 'TeamFourStar-5292a02f20bfa',
- 'ext': 'mp4',
- 'upload_date': '20130401',
- 'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
- 'title': 'A Moment With TFS Episode 4',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
-
- playerdata_url = self._search_regex(
- r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
- webpage, 'player data URL')
-
- video_title = self._html_search_regex(
- r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
- webpage, 'title')
- video_date = unified_strdate(self._html_search_regex(
- r'<div class="heroheadingdate">(?P<date>.+?)</div>',
- webpage, 'date', fatal=False))
- video_description = self._html_search_regex(
- r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
- webpage, 'description', fatal=False)
- video_thumbnail = self._og_search_thumbnail(webpage)
-
- return {
- '_type': 'url_transparent',
- 'display_id': display_id,
- 'title': video_title,
- 'description': video_description,
- 'upload_date': video_date,
- 'thumbnail': video_thumbnail,
- 'url': playerdata_url,
- }
diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py
index ca286ab..62d41e8 100644
--- a/youtube_dl/extractor/shahid.py
+++ b/youtube_dl/extractor/shahid.py
@@ -1,17 +1,24 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+import json
+
from .common import InfoExtractor
+from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
str_or_none,
+ urlencode_postdata,
+ clean_html,
)
class ShahidIE(InfoExtractor):
- _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?'
+ _NETRC_MACHINE = 'shahid'
+ _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?P<type>episode|movie)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
'info_dict': {
@@ -28,17 +35,53 @@ class ShahidIE(InfoExtractor):
'skip_download': True,
}
}, {
+ 'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html',
+ 'only_matching': True
+ }, {
# shahid plus subscriber only
'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
'only_matching': True
}]
- def _call_api(self, path, video_id, note):
- data = self._download_json(
- 'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={
- 'apiKey': 'sh@hid0nlin3',
- 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
- }).get('data', {})
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ try:
+ user_data = self._download_json(
+ 'https://shahid.mbc.net/wd/service/users/login',
+ None, 'Logging in', data=json.dumps({
+ 'email': email,
+ 'password': password,
+ 'basic': 'false',
+ }).encode('utf-8'), headers={
+ 'Content-Type': 'application/json; charset=UTF-8',
+ })['user']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ fail_data = self._parse_json(
+ e.cause.read().decode('utf-8'), None, fatal=False)
+ if fail_data:
+ faults = fail_data.get('faults', [])
+ faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
+ if faults_message:
+ raise ExtractorError(faults_message, expected=True)
+ raise
+
+ self._download_webpage(
+ 'https://shahid.mbc.net/populateContext',
+ None, 'Populate Context', data=urlencode_postdata({
+ 'firstName': user_data['firstName'],
+ 'lastName': user_data['lastName'],
+ 'userName': user_data['email'],
+ 'csg_user_name': user_data['email'],
+ 'subscriberId': user_data['id'],
+ 'sessionId': user_data['sessionId'],
+ }))
+
+ def _get_api_data(self, response):
+ data = response.get('data', {})
error = data.get('error')
if error:
@@ -49,11 +92,11 @@ class ShahidIE(InfoExtractor):
return data
def _real_extract(self, url):
- video_id = self._match_id(url)
+ page_type, video_id = re.match(self._VALID_URL, url).groups()
- player = self._call_api(
- 'Content/Episode/%s' % video_id,
- video_id, 'Downloading player JSON')
+ player = self._get_api_data(self._download_json(
+ 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id,
+ video_id, 'Downloading player JSON'))
if player.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
@@ -61,9 +104,12 @@ class ShahidIE(InfoExtractor):
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
self._sort_formats(formats)
- video = self._call_api(
- 'episode/%s' % video_id, video_id,
- 'Downloading video JSON')['episode']
+ video = self._get_api_data(self._download_json(
+ 'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
+ video_id, 'Downloading video JSON', query={
+ 'apiKey': 'sh@hid0nlin3',
+ 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
+ }))[page_type]
title = video['title']
categories = [
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py
index d592dfe..89e19e9 100644
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@@ -10,11 +10,38 @@ from ..utils import (
)
-class SharedIE(InfoExtractor):
- IE_DESC = 'shared.sx and vivo.sx'
- _VALID_URL = r'https?://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'
+class SharedBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ if self._FILE_NOT_FOUND in webpage:
+ raise ExtractorError(
+ 'Video %s does not exist' % video_id, expected=True)
+
+ video_url = self._extract_video_url(webpage, video_id, url)
+
+ title = base64.b64decode(self._html_search_meta(
+ 'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
+ filesize = int_or_none(self._html_search_meta(
+ 'full:size', webpage, 'file size', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'filesize': filesize,
+ 'title': title,
+ }
+
+
+class SharedIE(SharedBaseIE):
+ IE_DESC = 'shared.sx'
+ _VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})'
+ _FILE_NOT_FOUND = '>File does not exist<'
- _TESTS = [{
+ _TEST = {
'url': 'http://shared.sx/0060718775',
'md5': '106fefed92a8a2adb8c98e6a0652f49b',
'info_dict': {
@@ -23,52 +50,47 @@ class SharedIE(InfoExtractor):
'title': 'Bmp4',
'filesize': 1720110,
},
- }, {
- 'url': 'http://vivo.sx/d7ddda0e78',
- 'md5': '15b3af41be0b4fe01f4df075c2678b2c',
- 'info_dict': {
- 'id': 'd7ddda0e78',
- 'ext': 'mp4',
- 'title': 'Chicken',
- 'filesize': 528031,
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage, urlh = self._download_webpage_handle(url, video_id)
-
- if '>File does not exist<' in webpage:
- raise ExtractorError(
- 'Video %s does not exist' % video_id, expected=True)
+ }
+ def _extract_video_url(self, webpage, video_id, url):
download_form = self._hidden_inputs(webpage)
video_page = self._download_webpage(
- urlh.geturl(), video_id, 'Downloading video page',
+ url, video_id, 'Downloading video page',
data=urlencode_postdata(download_form),
headers={
'Content-Type': 'application/x-www-form-urlencoded',
- 'Referer': urlh.geturl(),
+ 'Referer': url,
})
video_url = self._html_search_regex(
r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
video_page, 'video URL', group='url')
- title = base64.b64decode(self._html_search_meta(
- 'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
- filesize = int_or_none(self._html_search_meta(
- 'full:size', webpage, 'file size', fatal=False))
- thumbnail = self._html_search_regex(
- r'data-poster=(["\'])(?P<url>(?:(?!\1).)+)\1',
- video_page, 'thumbnail', default=None, group='url')
- return {
- 'id': video_id,
- 'url': video_url,
+ return video_url
+
+
+class VivoIE(SharedBaseIE):
+ IE_DESC = 'vivo.sx'
+ _VALID_URL = r'https?://vivo\.sx/(?P<id>[\da-z]{10})'
+ _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'
+
+ _TEST = {
+ 'url': 'http://vivo.sx/d7ddda0e78',
+ 'md5': '15b3af41be0b4fe01f4df075c2678b2c',
+ 'info_dict': {
+ 'id': 'd7ddda0e78',
'ext': 'mp4',
- 'filesize': filesize,
- 'title': title,
- 'thumbnail': thumbnail,
- }
+ 'title': 'Chicken',
+ 'filesize': 528031,
+ },
+ }
+
+ def _extract_video_url(self, webpage, video_id, *args):
+ return self._parse_json(
+ self._search_regex(
+ r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'stream', group='url'),
+ video_id,
+ transform_source=lambda x: base64.b64decode(
+ x.encode('ascii')).decode('utf-8'))[0]
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 3b7ecb3..5a201ea 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -121,7 +121,7 @@ class SoundcloudIE(InfoExtractor):
},
]
- _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
+ _CLIENT_ID = 'fDoItMDbsbZz8dY16ZzARCZmzgHBPotA'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@staticmethod
diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py
index 218785e..abfee3e 100644
--- a/youtube_dl/extractor/spike.py
+++ b/youtube_dl/extractor/spike.py
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
+import re
+
from .mtv import MTVServicesInfoExtractor
@@ -17,6 +19,15 @@ class SpikeIE(MTVServicesInfoExtractor):
'upload_date': '20131227',
},
}, {
+ 'url': 'http://www.spike.com/full-episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-209',
+ 'md5': 'b25c6f16418aefb9ad5a6cae2559321f',
+ 'info_dict': {
+ 'id': '37ace3a8-1df6-48be-85b8-38df8229e241',
+ 'ext': 'mp4',
+ 'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1',
+ 'description': 'md5:a739ca8f978a7802f67f8016d27ce114',
+ },
+ }, {
'url': 'http://www.spike.com/video-clips/lhtu8m/',
'only_matching': True,
}, {
@@ -32,3 +43,12 @@ class SpikeIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
+ _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)')
+
+ def _extract_mgid(self, webpage):
+ mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None)
+ if mgid is None:
+ url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id')
+ video_type, episode_id = url_parts.split('/', 1)
+ mgid = 'mgid:arc:{0}:spike.com:{1}'.format(video_type, episode_id)
+ return mgid
diff --git a/youtube_dl/extractor/teamfourstar.py b/youtube_dl/extractor/teamfourstar.py
new file mode 100644
index 0000000..a8c6ed7
--- /dev/null
+++ b/youtube_dl/extractor/teamfourstar.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+from ..utils import unified_strdate
+
+
+class TeamFourStarIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)'
+ _TEST = {
+ 'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/',
+ 'info_dict': {
+ 'id': '0WdZO31W',
+ 'title': 'TFS Abridged Parody Episode 1',
+ 'description': 'md5:d60bc389588ebab2ee7ad432bda953ae',
+ 'ext': 'mp4',
+ 'timestamp': 1394168400,
+ 'upload_date': '20080508',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ jwplatform_url = JWPlatformIE._extract_url(webpage)
+
+ video_title = self._html_search_regex(
+ r'<h1[^>]+class="entry-title"[^>]*>(?P<title>.+?)</h1>',
+ webpage, 'title')
+ video_date = unified_strdate(self._html_search_regex(
+ r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>',
+ webpage, 'date', fatal=False))
+ video_description = self._html_search_regex(
+ r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>',
+ webpage, 'description', fatal=False)
+ video_thumbnail = self._og_search_thumbnail(webpage)
+
+ return {
+ '_type': 'url_transparent',
+ 'display_id': display_id,
+ 'title': video_title,
+ 'description': video_description,
+ 'upload_date': video_date,
+ 'thumbnail': video_thumbnail,
+ 'url': jwplatform_url,
+ }
diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py
index 7629f0d..197258d 100644
--- a/youtube_dl/extractor/thisoldhouse.py
+++ b/youtube_dl/extractor/thisoldhouse.py
@@ -5,10 +5,10 @@ from .common import InfoExtractor
class ThisOldHouseIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
- 'md5': '568acf9ca25a639f0c4ff905826b662f',
+ 'md5': '946f05bbaa12a33f9ae35580d2dfcfe3',
'info_dict': {
'id': '2REGtUDQ',
'ext': 'mp4',
@@ -20,6 +20,9 @@ class ThisOldHouseIE(InfoExtractor):
}, {
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
'only_matching': True,
+ }, {
+ 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/tmz.py b/youtube_dl/extractor/tmz.py
index 979856e..419f9d9 100644
--- a/youtube_dl/extractor/tmz.py
+++ b/youtube_dl/extractor/tmz.py
@@ -32,12 +32,15 @@ class TMZArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
_TEST = {
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
- 'md5': 'e482a414a38db73087450e3a6ce69d00',
+ 'md5': '3316ff838ae5bb7f642537825e1e90d2',
'info_dict': {
'id': '0_6snoelag',
- 'ext': 'mp4',
+ 'ext': 'mov',
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
+ 'timestamp': 1429467813,
+ 'upload_date': '20150419',
+ 'uploader_id': 'batchUser',
}
}
@@ -45,12 +48,9 @@ class TMZArticleIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- embedded_video_info_str = self._html_search_regex(
- r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info')
-
- embedded_video_info = self._parse_json(
- embedded_video_info_str, video_id,
- transform_source=lambda s: s.replace('\\', ''))
+ embedded_video_info = self._parse_json(self._html_search_regex(
+ r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
+ video_id)
return self.url_result(
'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])
diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py
index d2d5c11..26d7709 100644
--- a/youtube_dl/extractor/toutv.py
+++ b/youtube_dl/extractor/toutv.py
@@ -15,11 +15,11 @@ from ..utils import (
class TouTvIE(InfoExtractor):
_NETRC_MACHINE = 'toutv'
IE_NAME = 'tou.tv'
- _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)'
+ _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+E[0-9]+)?)'
_access_token = None
_claims = None
- _TEST = {
+ _TESTS = [{
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
'info_dict': {
'id': '122017',
@@ -33,7 +33,10 @@ class TouTvIE(InfoExtractor):
'skip_download': True,
},
'skip': '404 Not Found',
- }
+ }, {
+ 'url': 'http://ici.tou.tv/hackers',
+ 'only_matching': True,
+ }]
def _real_initialize(self):
email, password = self._get_login_info()
@@ -53,7 +56,7 @@ class TouTvIE(InfoExtractor):
'state': state,
})
login_form = self._search_regex(
- r'(?s)(<form[^>]+id="Form-login".+?</form>)', login_webpage, 'login form')
+ r'(?s)(<form[^>]+(?:id|name)="Form-login".+?</form>)', login_webpage, 'login form')
form_data = self._hidden_inputs(login_form)
form_data.update({
'login-email': email,
diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py
index c6572de..3a37df2 100644
--- a/youtube_dl/extractor/tubitv.py
+++ b/youtube_dl/extractor/tubitv.py
@@ -9,7 +9,6 @@ from ..utils import (
int_or_none,
sanitized_Request,
urlencode_postdata,
- parse_iso8601,
)
@@ -19,17 +18,13 @@ class TubiTvIE(InfoExtractor):
_NETRC_MACHINE = 'tubitv'
_TEST = {
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
+ 'md5': '43ac06be9326f41912dc64ccf7a80320',
'info_dict': {
'id': '283829',
'ext': 'mp4',
'title': 'The Comedian at The Friday',
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
- 'uploader': 'Indie Rights Films',
- 'upload_date': '20160111',
- 'timestamp': 1452555979,
- },
- 'params': {
- 'skip_download': 'HLS download',
+ 'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
},
}
@@ -58,19 +53,28 @@ class TubiTvIE(InfoExtractor):
video_id = self._match_id(url)
video_data = self._download_json(
'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
- title = video_data['n']
+ title = video_data['title']
formats = self._extract_m3u8_formats(
- video_data['mh'], video_id, 'mp4', 'm3u8_native')
+ self._proto_relative_url(video_data['url']),
+ video_id, 'mp4', 'm3u8_native')
self._sort_formats(formats)
+ thumbnails = []
+ for thumbnail_url in video_data.get('thumbnails', []):
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': self._proto_relative_url(thumbnail_url),
+ })
+
subtitles = {}
- for sub in video_data.get('sb', []):
- sub_url = sub.get('u')
+ for sub in video_data.get('subtitles', []):
+ sub_url = sub.get('url')
if not sub_url:
continue
- subtitles.setdefault(sub.get('l', 'en'), []).append({
- 'url': sub_url,
+ subtitles.setdefault(sub.get('lang', 'English'), []).append({
+ 'url': self._proto_relative_url(sub_url),
})
return {
@@ -78,9 +82,8 @@ class TubiTvIE(InfoExtractor):
'title': title,
'formats': formats,
'subtitles': subtitles,
- 'thumbnail': video_data.get('ph'),
- 'description': video_data.get('d'),
- 'duration': int_or_none(video_data.get('s')),
- 'timestamp': parse_iso8601(video_data.get('u')),
- 'uploader': video_data.get('on'),
+ 'thumbnails': thumbnails,
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'uploader_id': video_data.get('publisher_id'),
}
diff --git a/youtube_dl/extractor/tvanouvelles.py b/youtube_dl/extractor/tvanouvelles.py
new file mode 100644
index 0000000..1086176
--- /dev/null
+++ b/youtube_dl/extractor/tvanouvelles.py
@@ -0,0 +1,65 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
+
+
+class TVANouvellesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/videos/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.tvanouvelles.ca/videos/5117035533001',
+ 'info_dict': {
+ 'id': '5117035533001',
+ 'ext': 'mp4',
+ 'title': 'L’industrie du taxi dénonce l’entente entre Québec et Uber: explications',
+ 'description': 'md5:479653b7c8cf115747bf5118066bd8b3',
+ 'uploader_id': '1741764581',
+ 'timestamp': 1473352030,
+ 'upload_date': '20160908',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1741764581/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ brightcove_id = self._match_id(url)
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ BrightcoveNewIE.ie_key(), brightcove_id)
+
+
+class TVANouvellesArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/(?:[^/]+/)+(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://www.tvanouvelles.ca/2016/11/17/des-policiers-qui-ont-la-meche-un-peu-courte',
+ 'info_dict': {
+ 'id': 'des-policiers-qui-ont-la-meche-un-peu-courte',
+ 'title': 'Des policiers qui ont «la mèche un peu courte»?',
+ 'description': 'md5:92d363c8eb0f0f030de9a4a84a90a3a0',
+ },
+ 'playlist_mincount': 4,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TVANouvellesIE.suitable(url) else super(TVANouvellesArticleIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = [
+ self.url_result(
+ 'http://www.tvanouvelles.ca/videos/%s' % mobj.group('id'),
+ ie=TVANouvellesIE.ie_key(), video_id=mobj.group('id'))
+ for mobj in re.finditer(
+ r'data-video-id=(["\'])?(?P<id>\d+)', webpage)]
+
+ title = self._og_search_title(webpage, fatal=False)
+ description = self._og_search_description(webpage)
+
+ return self.playlist_result(entries, display_id, title, description)
diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 2abfb78..06ea2b4 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -69,7 +69,8 @@ class TVPIE(InfoExtractor):
webpage = self._download_webpage(url, page_id)
video_id = self._search_regex([
r'<iframe[^>]+src="[^"]*?object_id=(\d+)',
- "object_id\s*:\s*'(\d+)'"], webpage, 'video id')
+ r"object_id\s*:\s*'(\d+)'",
+ r'data-video-id="(\d+)"'], webpage, 'video id', default=page_id)
return {
'_type': 'url_transparent',
'url': 'tvp:' + video_id,
@@ -138,6 +139,9 @@ class TVPEmbedIE(InfoExtractor):
# formats.extend(self._extract_mpd_formats(
# video_url_base + '.ism/video.mpd',
# video_id, mpd_id='dash', fatal=False))
+ formats.extend(self._extract_ism_formats(
+ video_url_base + '.ism/Manifest',
+ video_id, 'mss', fatal=False))
formats.extend(self._extract_f4m_formats(
video_url_base + '.ism/video.f4m',
video_id, f4m_id='hds', fatal=False))
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index 46c2cfe..77414a2 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -398,7 +398,7 @@ class TwitchStreamIE(TwitchBaseIE):
channel_id = self._match_id(url)
stream = self._call_api(
- 'kraken/streams/%s' % channel_id, channel_id,
+ 'kraken/streams/%s?stream_type=all' % channel_id, channel_id,
'Downloading stream JSON').get('stream')
if not stream:
@@ -417,6 +417,7 @@ class TwitchStreamIE(TwitchBaseIE):
query = {
'allow_source': 'true',
'allow_audio_only': 'true',
+ 'allow_spectre': 'true',
'p': random.randint(1000000, 10000000),
'player': 'twitchweb',
'segment_preference': '4',
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 3411fcf..ac0b221 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -25,7 +25,7 @@ class TwitterBaseIE(InfoExtractor):
class TwitterCardIE(TwitterBaseIE):
IE_NAME = 'twitter:card'
- _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
@@ -84,6 +84,9 @@ class TwitterCardIE(TwitterBaseIE):
'title': 'Twitter web player',
'thumbnail': 're:^https?://.*\.jpg',
},
+ }, {
+ 'url': 'https://twitter.com/i/videos/752274308186120192',
+ 'only_matching': True,
},
]
diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py
index 2cd617b..6b9c227 100644
--- a/youtube_dl/extractor/vessel.py
+++ b/youtube_dl/extractor/vessel.py
@@ -13,7 +13,7 @@ from ..utils import (
class VesselIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
+ _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z-_]+)'
_API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
_LOGIN_URL = 'https://www.vessel.com/api/account/login'
_NETRC_MACHINE = 'vessel'
@@ -32,12 +32,18 @@ class VesselIE(InfoExtractor):
}, {
'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346',
'only_matching': True,
+ }, {
+ 'url': 'https://www.vessel.com/videos/F01_dsLj1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.vessel.com/videos/RRX-sir-J',
+ 'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [url for _, url in re.findall(
- r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z]+.*?)\1',
+ r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1',
webpage)]
@staticmethod
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 783efda..d82261e 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -51,7 +51,7 @@ class VevoIE(VevoBaseIE):
'artist': 'Hurts',
'genre': 'Pop',
},
- 'expected_warnings': ['Unable to download SMIL file'],
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
}, {
'note': 'v3 SMIL format',
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
@@ -67,7 +67,7 @@ class VevoIE(VevoBaseIE):
'artist': 'Cassadee Pope',
'genre': 'Country',
},
- 'expected_warnings': ['Unable to download SMIL file'],
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
}, {
'note': 'Age-limited video',
'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
@@ -83,7 +83,7 @@ class VevoIE(VevoBaseIE):
'artist': 'Justin Timberlake',
'genre': 'Pop',
},
- 'expected_warnings': ['Unable to download SMIL file'],
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
}, {
'note': 'No video_info',
'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
@@ -91,15 +91,33 @@ class VevoIE(VevoBaseIE):
'info_dict': {
'id': 'USUV71503000',
'ext': 'mp4',
- 'title': 'K Camp - Till I Die',
+ 'title': 'K Camp ft. T.I. - Till I Die',
'age_limit': 18,
'timestamp': 1449468000,
'upload_date': '20151207',
'uploader': 'K Camp',
'track': 'Till I Die',
'artist': 'K Camp',
- 'genre': 'Rap/Hip-Hop',
+ 'genre': 'Hip-Hop',
},
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Featured test',
+ 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
+ 'md5': 'd28675e5e8805035d949dc5cf161071d',
+ 'info_dict': {
+ 'id': 'USUV71402190',
+ 'ext': 'mp4',
+ 'title': 'Lemaitre ft. LoLo - Wait',
+ 'age_limit': 0,
+ 'timestamp': 1413432000,
+ 'upload_date': '20141016',
+ 'uploader': 'Lemaitre',
+ 'track': 'Wait',
+ 'artist': 'Lemaitre',
+ 'genre': 'Electronic',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
}, {
'note': 'Only available via webpage',
'url': 'http://www.vevo.com/watch/GBUV71600656',
@@ -242,8 +260,11 @@ class VevoIE(VevoBaseIE):
timestamp = parse_iso8601(video_info.get('releaseDate'))
artists = video_info.get('artists')
- if artists:
- artist = uploader = artists[0]['name']
+ for curr_artist in artists:
+ if curr_artist.get('role') == 'Featured':
+ featured_artist = curr_artist['name']
+ else:
+ artist = uploader = curr_artist['name']
view_count = int_or_none(video_info.get('views', {}).get('total'))
for video_version in video_versions:
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
index e2b2ce0..8a00c8f 100644
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@@ -1,12 +1,93 @@
+# coding: utf-8
from __future__ import unicode_literals
import re
+import time
+import hashlib
+import json
+from .adobepass import AdobePassIE
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..compat import compat_HTTPError
+from ..utils import (
+ int_or_none,
+ parse_age_limit,
+ str_or_none,
+ parse_duration,
+ ExtractorError,
+ extract_attributes,
+)
-class ViceIE(InfoExtractor):
+class ViceBaseIE(AdobePassIE):
+ def _extract_preplay_video(self, url, webpage):
+ watch_hub_data = extract_attributes(self._search_regex(
+ r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
+ video_id = watch_hub_data['vms-id']
+ title = watch_hub_data['video-title']
+
+ query = {}
+ is_locked = watch_hub_data.get('video-locked') == '1'
+ if is_locked:
+ resource = self._get_mvpd_resource(
+ 'VICELAND', title, video_id,
+ watch_hub_data.get('video-rating'))
+ query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
+
+ # signature generation algorithm is reverse engineered from signatureGenerator in
+ # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
+ # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
+ exp = int(time.time()) + 14400
+ query.update({
+ 'exp': exp,
+ 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
+ })
+
+ try:
+ host = 'www.viceland' if is_locked else self._PREPLAY_HOST
+ preplay = self._download_json('https://%s.com/en_us/preplay/%s' % (host, video_id), video_id, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ error = json.loads(e.cause.read().decode())
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
+ raise
+
+ video_data = preplay['video']
+ base = video_data['base']
+ uplynk_preplay_url = preplay['preplayURL']
+ episode = video_data.get('episode', {})
+ channel = video_data.get('channel', {})
+
+ subtitles = {}
+ cc_url = preplay.get('ccURL')
+ if cc_url:
+ subtitles['en'] = [{
+ 'url': cc_url,
+ }]
+
+ return {
+ '_type': 'url_transparent',
+ 'url': uplynk_preplay_url,
+ 'id': video_id,
+ 'title': title,
+ 'description': base.get('body'),
+ 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
+ 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
+ 'timestamp': int_or_none(video_data.get('created_at')),
+ 'age_limit': parse_age_limit(video_data.get('video_rating')),
+ 'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
+ 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
+ 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
+ 'season_number': int_or_none(watch_hub_data.get('season')),
+ 'season_id': str_or_none(episode.get('season_id')),
+ 'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
+ 'uploader_id': str_or_none(channel.get('id')),
+ 'subtitles': subtitles,
+ 'ie_key': 'UplynkPreplay',
+ }
+
+
+class ViceIE(ViceBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)'
_TESTS = [{
@@ -21,7 +102,7 @@ class ViceIE(InfoExtractor):
'add_ie': ['Ooyala'],
}, {
'url': 'http://www.vice.com/video/how-to-hack-a-car',
- 'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
+ 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
'info_dict': {
'id': '3jstaBeXgAs',
'ext': 'mp4',
@@ -33,6 +114,22 @@ class ViceIE(InfoExtractor):
},
'add_ie': ['Youtube'],
}, {
+ 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
+ 'md5': '',
+ 'info_dict': {
+ 'id': '5816510690b70e6c5fd39a56',
+ 'ext': 'mp4',
+ 'uploader': 'Waypoint',
+ 'title': 'The Signal From Tölva',
+ 'uploader_id': '57f7d621e05ca860fa9ccaf9',
+ 'timestamp': 1477941983938,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['UplynkPreplay'],
+ }, {
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
'only_matching': True,
}, {
@@ -42,21 +139,21 @@ class ViceIE(InfoExtractor):
'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
'only_matching': True,
}]
+ _PREPLAY_HOST = 'video.vice'
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- try:
- embed_code = self._search_regex(
- r'embedCode=([^&\'"]+)', webpage,
- 'ooyala embed code', default=None)
- if embed_code:
- return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
- youtube_id = self._search_regex(
- r'data-youtube-id="([^"]+)"', webpage, 'youtube id')
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+ embed_code = self._search_regex(
+ r'embedCode=([^&\'"]+)', webpage,
+ 'ooyala embed code', default=None)
+ if embed_code:
+ return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
+ youtube_id = self._search_regex(
+ r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None)
+ if youtube_id:
return self.url_result(youtube_id, 'Youtube')
- except ExtractorError:
- raise ExtractorError('The page doesn\'t contain a video', expected=True)
+ return self._extract_preplay_video(urlh.geturl(), webpage)
class ViceShowIE(InfoExtractor):
diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py
index 8742b60..0eff055 100644
--- a/youtube_dl/extractor/viceland.py
+++ b/youtube_dl/extractor/viceland.py
@@ -1,23 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals
-import time
-import hashlib
-import json
+from .vice import ViceBaseIE
-from .adobepass import AdobePassIE
-from ..compat import compat_HTTPError
-from ..utils import (
- int_or_none,
- parse_age_limit,
- str_or_none,
- parse_duration,
- ExtractorError,
- extract_attributes,
-)
-
-class VicelandIE(AdobePassIE):
+class VicelandIE(ViceBaseIE):
_VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)'
_TEST = {
'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e',
@@ -38,70 +25,9 @@ class VicelandIE(AdobePassIE):
},
'add_ie': ['UplynkPreplay'],
}
+ _PREPLAY_HOST = 'www.viceland'
def _real_extract(self, url):
video_id = self._match_id(url)
-
webpage = self._download_webpage(url, video_id)
- watch_hub_data = extract_attributes(self._search_regex(
- r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
- video_id = watch_hub_data['vms-id']
- title = watch_hub_data['video-title']
-
- query = {}
- if watch_hub_data.get('video-locked') == '1':
- resource = self._get_mvpd_resource(
- 'VICELAND', title, video_id,
- watch_hub_data.get('video-rating'))
- query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
-
- # signature generation algorithm is reverse engineered from signatureGenerator in
- # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
- # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
- exp = int(time.time()) + 14400
- query.update({
- 'exp': exp,
- 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
- })
-
- try:
- preplay = self._download_json('https://www.viceland.com/en_us/preplay/%s' % video_id, video_id, query=query)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
- error = json.loads(e.cause.read().decode())
- raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
- raise
-
- video_data = preplay['video']
- base = video_data['base']
- uplynk_preplay_url = preplay['preplayURL']
- episode = video_data.get('episode', {})
- channel = video_data.get('channel', {})
-
- subtitles = {}
- cc_url = preplay.get('ccURL')
- if cc_url:
- subtitles['en'] = [{
- 'url': cc_url,
- }]
-
- return {
- '_type': 'url_transparent',
- 'url': uplynk_preplay_url,
- 'id': video_id,
- 'title': title,
- 'description': base.get('body'),
- 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
- 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
- 'timestamp': int_or_none(video_data.get('created_at')),
- 'age_limit': parse_age_limit(video_data.get('video_rating')),
- 'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
- 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
- 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
- 'season_number': int_or_none(watch_hub_data.get('season')),
- 'season_id': str_or_none(episode.get('season_id')),
- 'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
- 'uploader_id': str_or_none(channel.get('id')),
- 'subtitles': subtitles,
- 'ie_key': 'UplynkPreplay',
- }
+ return self._extract_preplay_video(url, webpage)
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index 4351ac4..9c48701 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -1,11 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
-import time
-import hmac
import hashlib
+import hmac
import itertools
+import json
+import re
+import time
from .common import InfoExtractor
from ..utils import (
@@ -276,10 +277,14 @@ class VikiIE(VikiBaseIE):
height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None))
for protocol, format_dict in stream_dict.items():
+ # rtmps URLs does not seem to work
+ if protocol == 'rtmps':
+ continue
+ format_url = format_dict['url']
if format_id == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
- format_dict['url'], video_id, 'mp4',
- entry_protocol='m3u8_native', preference=-1,
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native',
m3u8_id='m3u8-%s' % protocol, fatal=False)
# Despite CODECS metadata in m3u8 all video-only formats
# are actually video+audio
@@ -287,9 +292,23 @@ class VikiIE(VikiBaseIE):
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
f['acodec'] = None
formats.extend(m3u8_formats)
+ elif format_url.startswith('rtmp'):
+ mobj = re.search(
+ r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
+ format_url)
+ if not mobj:
+ continue
+ formats.append({
+ 'format_id': 'rtmp-%s' % format_id,
+ 'ext': 'flv',
+ 'url': mobj.group('url'),
+ 'play_path': mobj.group('playpath'),
+ 'app': mobj.group('app'),
+ 'page_url': url,
+ })
else:
formats.append({
- 'url': format_dict['url'],
+ 'url': format_url,
'format_id': '%s-%s' % (format_id, protocol),
'height': height,
})
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index b566241..51c69a8 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -323,6 +323,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
'expected_warnings': ['Unable to download JSON metadata'],
},
{
+ # redirects to ondemand extractor and should be passed throught it
+ # for successful extraction
+ 'url': 'https://vimeo.com/73445910',
+ 'info_dict': {
+ 'id': '73445910',
+ 'ext': 'mp4',
+ 'title': 'The Reluctant Revolutionary',
+ 'uploader': '10Ft Films',
+ 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
+ 'uploader_id': 'tenfootfilms',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
'only_matching': True,
},
@@ -414,7 +430,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
# Retrieve video webpage to extract further information
request = sanitized_Request(url, headers=headers)
try:
- webpage = self._download_webpage(request, video_id)
+ webpage, urlh = self._download_webpage_handle(request, video_id)
+ # Some URLs redirect to ondemand can't be extracted with
+ # this extractor right away thus should be passed through
+ # ondemand extractor (e.g. https://vimeo.com/73445910)
+ if VimeoOndemandIE.suitable(urlh.geturl()):
+ return self.url_result(urlh.geturl(), VimeoOndemandIE.ie_key())
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
errmsg = ee.cause.read()
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index df43ba8..1990e70 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import collections
import re
-import json
import sys
from .common import InfoExtractor
@@ -369,8 +368,18 @@ class VKIE(VKBaseIE):
opts_url = 'http:' + opts_url
return self.url_result(opts_url)
- data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars')
- data = json.loads(data_json)
+ # vars does not look to be served anymore since 24.10.2016
+ data = self._parse_json(
+ self._search_regex(
+ r'var\s+vars\s*=\s*({.+?});', info_page, 'vars', default='{}'),
+ video_id, fatal=False)
+
+ # <!json> is served instead
+ if not data:
+ data = self._parse_json(
+ self._search_regex(
+ r'<!json>\s*({.+?})\s*<!>', info_page, 'json'),
+ video_id)['player']['params'][0]
title = unescapeHTML(data['md_title'])
diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py
index 8d671cc..acf9fda 100644
--- a/youtube_dl/extractor/vlive.py
+++ b/youtube_dl/extractor/vlive.py
@@ -17,7 +17,7 @@ from ..compat import compat_urllib_parse_urlencode
class VLiveIE(InfoExtractor):
IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
@@ -27,7 +27,20 @@ class VLiveIE(InfoExtractor):
'creator': "Girl's Day",
'view_count': int,
},
- }
+ }, {
+ 'url': 'http://www.vlive.tv/video/16937',
+ 'info_dict': {
+ 'id': '16937',
+ 'ext': 'mp4',
+ 'title': '[V LIVE] 첸백시 걍방',
+ 'creator': 'EXO',
+ 'view_count': int,
+ 'subtitles': 'mincount:12',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -116,7 +129,7 @@ class VLiveIE(InfoExtractor):
subtitles = {}
for caption in playinfo.get('captions', {}).get('list', []):
- lang = dict_get(caption, ('language', 'locale', 'country', 'label'))
+ lang = dict_get(caption, ('locale', 'language', 'country', 'label'))
if lang and caption.get('source'):
subtitles[lang] = [{
'ext': 'vtt',
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py
index c85b474..bbfa6e5 100644
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dl/extractor/vodlocker.py
@@ -31,7 +31,8 @@ class VodlockerIE(InfoExtractor):
if any(p in webpage for p in (
'>THIS FILE WAS DELETED<',
'>File Not Found<',
- 'The file you were looking for could not be found, sorry for any inconvenience.<')):
+ 'The file you were looking for could not be found, sorry for any inconvenience.<',
+ '>The file was removed')):
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
fields = self._hidden_inputs(webpage)
diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py
new file mode 100644
index 0000000..b270f08
--- /dev/null
+++ b/youtube_dl/extractor/vzaar.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+)
+
+
+class VzaarIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://vzaar.com/videos/1152805',
+ 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
+ 'info_dict': {
+ 'id': '1152805',
+ 'ext': 'mp4',
+ 'title': 'sample video (public)',
+ },
+ }, {
+ 'url': 'https://view.vzaar.com/27272/player',
+ 'md5': '3b50012ac9bbce7f445550d54e0508f2',
+ 'info_dict': {
+ 'id': '27272',
+ 'ext': 'mp3',
+ 'title': 'MP3',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
+ source_url = video_data['sourceUrl']
+
+ info = {
+ 'id': video_id,
+ 'title': video_data['videoTitle'],
+ 'url': source_url,
+ 'thumbnail': self._proto_relative_url(video_data.get('poster')),
+ 'duration': float_or_none(video_data.get('videoDuration')),
+ }
+ if 'audio' in source_url:
+ info.update({
+ 'vcodec': 'none',
+ 'ext': 'mp3',
+ })
+ else:
+ info.update({
+ 'width': int_or_none(video_data.get('width')),
+ 'height': int_or_none(video_data.get('height')),
+ 'ext': 'mp4',
+ })
+ return info
diff --git a/youtube_dl/extractor/webcaster.py b/youtube_dl/extractor/webcaster.py
new file mode 100644
index 0000000..7486cb3
--- /dev/null
+++ b/youtube_dl/extractor/webcaster.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ xpath_text,
+)
+
+
+class WebcasterIE(InfoExtractor):
+ _VALID_URL = r'https?://bl\.webcaster\.pro/(?:quote|media)/start/free_(?P<id>[^/]+)'
+ _TESTS = [{
+ # http://video.khl.ru/quotes/393859
+ 'url': 'http://bl.webcaster.pro/quote/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104?sr%3D105%26fa%3D1%26type_id%3D18',
+ 'md5': '0c162f67443f30916ff1c89425dcd4cd',
+ 'info_dict': {
+ 'id': 'c8cefd240aa593681c8d068cff59f407_hd',
+ 'ext': 'mp4',
+ 'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_xml(url, video_id)
+
+ title = xpath_text(video, './/event_name', 'event name', fatal=True)
+
+ def make_id(parts, separator):
+ return separator.join(filter(None, parts))
+
+ formats = []
+ for format_id in (None, 'noise'):
+ track_tag = make_id(('track', format_id), '_')
+ for track in video.findall('.//iphone/%s' % track_tag):
+ track_url = track.text
+ if not track_url:
+ continue
+ if determine_ext(track_url) == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ track_url, video_id, 'mp4',
+ entry_protocol='m3u8_native',
+ m3u8_id=make_id(('hls', format_id), '-'), fatal=False)
+ for f in m3u8_formats:
+ f.update({
+ 'source_preference': 0 if format_id == 'noise' else 1,
+ 'format_note': track.get('title'),
+ })
+ formats.extend(m3u8_formats)
+ self._sort_formats(formats)
+
+ thumbnail = xpath_text(video, './/image', 'thumbnail')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
+
+
+class WebcasterFeedIE(InfoExtractor):
+ _VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104',
+ 'only_matching': True,
+ }
+
+ @staticmethod
+ def _extract_url(ie, webpage):
+ mobj = re.search(
+ r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+ for secure in (True, False):
+ video_url = ie._og_search_video_url(
+ webpage, secure=secure, default=None)
+ if video_url:
+ mobj = re.search(
+ r'config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_[^?&=]+)',
+ video_url)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ feed = self._download_xml(url, video_id)
+
+ video_url = xpath_text(
+ feed, ('video_hd', 'video'), 'video url', fatal=True)
+
+ return self.url_result(video_url, WebcasterIE.ie_key())
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index 91f0a0d..4951414 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -201,6 +201,32 @@ class YahooIE(InfoExtractor):
},
'skip': 'redirect to https://www.yahoo.com/music',
},
+ {
+ # yahoo://article/
+ 'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html',
+ 'info_dict': {
+ 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
+ 'ext': 'mp4',
+ 'title': "'True Story' Trailer",
+ 'description': 'True Story',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # ytwnews://cavideo/
+ 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
+ 'info_dict': {
+ 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
+ 'ext': 'mp4',
+ 'title': '單車天使 - 中文版預',
+ 'description': '中文版預',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
]
def _real_extract(self, url):
@@ -269,7 +295,8 @@ class YahooIE(InfoExtractor):
r'"first_videoid"\s*:\s*"([^"]+)"',
r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
r'<article[^>]data-uuid=["\']([^"\']+)',
- r'yahoo://article/view\?.*\buuid=([^&"\']+)',
+ r'<meta[^<>]+yahoo://article/view\?.*\buuid=([^&"\']+)',
+ r'<meta[^<>]+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
]
video_id = self._search_regex(
CONTENT_ID_REGEXES, webpage, 'content ID')
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 545246b..bd24a28 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1796,7 +1796,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
)"""
- _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
+ _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
IE_NAME = 'youtube:playlist'
_TESTS = [{
@@ -2175,7 +2175,7 @@ class YoutubeUserIE(YoutubeChannelIE):
class YoutubeLiveIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com live streams'
- _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+))/live'
+ _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
IE_NAME = 'youtube:live'
_TESTS = [{
@@ -2204,6 +2204,9 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/TheYoungTurks/live',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 9737f70..a8df4ae 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -198,12 +198,12 @@ class JSInterpreter(object):
return opfunc(x, y)
m = re.match(
- r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr)
+ r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
if m:
fname = m.group('func')
argvals = tuple([
int(v) if v.isdigit() else local_vars[v]
- for v in m.group('args').split(',')])
+ for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple()
if fname not in self._functions:
self._functions[fname] = self.extract_function(fname)
return self._functions[fname](argvals)
diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py
index 1048072..fece280 100644
--- a/youtube_dl/socks.py
+++ b/youtube_dl/socks.py
@@ -60,7 +60,7 @@ class ProxyError(IOError):
def __init__(self, code=None, msg=None):
if code is not None and msg is None:
- msg = self.CODES.get(code) and 'unknown error'
+ msg = self.CODES.get(code) or 'unknown error'
super(ProxyError, self).__init__(code, msg)
@@ -103,6 +103,7 @@ class ProxyType(object):
SOCKS4A = 1
SOCKS5 = 2
+
Proxy = collections.namedtuple('Proxy', (
'type', 'host', 'port', 'username', 'password', 'remote_dns'))
diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py
index 7cf490a..0c71585 100644
--- a/youtube_dl/swfinterp.py
+++ b/youtube_dl/swfinterp.py
@@ -115,6 +115,8 @@ def _u30(reader):
res = _read_int(reader)
assert res & 0xf0000000 == 0
return res
+
+
_u32 = _read_int
@@ -176,6 +178,7 @@ class _Undefined(object):
return 'undefined'
__repr__ = __str__
+
undefined = _Undefined()
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 2894167..9595bcf 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1691,6 +1691,10 @@ def url_basename(url):
return path.strip('/').split('/')[-1]
+def base_url(url):
+ return re.match(r'https?://[^?#&]+/', url).group()
+
+
class HEADRequest(compat_urllib_request.Request):
def get_method(self):
return 'HEAD'
@@ -1818,8 +1822,12 @@ def get_exe_version(exe, args=['--version'],
""" Returns the version of the specified executable,
or False if the executable is not present """
try:
+ # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
+ # SIGTTOU if youtube-dl is run in the background.
+ # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
out, _ = subprocess.Popen(
[encodeArgument(exe)] + args,
+ stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
except OSError:
return False
@@ -2341,11 +2349,18 @@ def _match_one(filter_part, dct):
m = operator_rex.search(filter_part)
if m:
op = COMPARISON_OPERATORS[m.group('op')]
- if m.group('strval') is not None:
+ actual_value = dct.get(m.group('key'))
+ if (m.group('strval') is not None or
+ # If the original field is a string and matching comparisonvalue is
+ # a number we should respect the origin of the original field
+ # and process comparison value as a string (see
+ # https://github.com/rg3/youtube-dl/issues/11082).
+ actual_value is not None and m.group('intval') is not None and
+ isinstance(actual_value, compat_str)):
if m.group('op') not in ('=', '!='):
raise ValueError(
'Operator %s does not support string values!' % m.group('op'))
- comparison_value = m.group('strval')
+ comparison_value = m.group('strval') or m.group('intval')
else:
try:
comparison_value = int(m.group('intval'))
@@ -2357,7 +2372,6 @@ def _match_one(filter_part, dct):
raise ValueError(
'Invalid integer value %r in filter part %r' % (
m.group('intval'), filter_part))
- actual_value = dct.get(m.group('key'))
if actual_value is None:
return m.group('none_inclusive')
return op(actual_value, comparison_value)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 583c829..1acb630 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2016.10.21.1'
+__version__ = '2016.12.01'