aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRogério Brito <rbrito@ime.usp.br>2015-07-22 00:58:06 -0300
committerRogério Brito <rbrito@ime.usp.br>2015-07-22 00:58:06 -0300
commit38bb9b1b0a044cabaf5691553815e334cd2e9213 (patch)
tree6b165657fe120f02ee5e813156069e7107382e08
parentfd1446fb8834d066905da673eef608c5030392ba (diff)
parent0652569e481a8cf0995a00635ef62143fa779add (diff)
downloadyoutube-dl-38bb9b1b0a044cabaf5691553815e334cd2e9213.zip
youtube-dl-38bb9b1b0a044cabaf5691553815e334cd2e9213.tar.gz
youtube-dl-38bb9b1b0a044cabaf5691553815e334cd2e9213.tar.bz2
Merge tag 'upstream/2015.07.21'
Upstream version 2015.07.21
-rw-r--r--README.md38
-rw-r--r--README.txt48
-rw-r--r--docs/supportedsites.md78
-rw-r--r--test/test_compat.py25
-rwxr-xr-xyoutube-dlbin901174 -> 954068 bytes
-rw-r--r--youtube-dl.162
-rw-r--r--youtube-dl.bash-completion2
-rw-r--r--youtube-dl.fish16
-rw-r--r--youtube-dl.zsh2
-rwxr-xr-xyoutube_dl/YoutubeDL.py35
-rw-r--r--youtube_dl/__init__.py7
-rw-r--r--youtube_dl/compat.py103
-rw-r--r--youtube_dl/downloader/__init__.py2
-rw-r--r--youtube_dl/downloader/dash.py66
-rw-r--r--youtube_dl/downloader/external.py11
-rw-r--r--youtube_dl/extractor/__init__.py73
-rw-r--r--youtube_dl/extractor/adobetv.py60
-rw-r--r--youtube_dl/extractor/appleconnect.py50
-rw-r--r--youtube_dl/extractor/ard.py203
-rw-r--r--youtube_dl/extractor/baidu.py1
-rw-r--r--youtube_dl/extractor/bbccouk.py33
-rw-r--r--youtube_dl/extractor/bet.py4
-rw-r--r--youtube_dl/extractor/bilibili.py13
-rw-r--r--youtube_dl/extractor/bliptv.py66
-rw-r--r--youtube_dl/extractor/brightcove.py34
-rw-r--r--youtube_dl/extractor/cbs.py23
-rw-r--r--youtube_dl/extractor/ceskatelevize.py3
-rw-r--r--youtube_dl/extractor/clipsyndicate.py14
-rw-r--r--youtube_dl/extractor/cnet.py19
-rw-r--r--youtube_dl/extractor/common.py66
-rw-r--r--youtube_dl/extractor/crunchyroll.py34
-rw-r--r--youtube_dl/extractor/ctsnews.py1
-rw-r--r--youtube_dl/extractor/dailymotion.py52
-rw-r--r--youtube_dl/extractor/dfb.py27
-rw-r--r--youtube_dl/extractor/discovery.py52
-rw-r--r--youtube_dl/extractor/douyutv.py1
-rw-r--r--youtube_dl/extractor/dramafever.py216
-rw-r--r--youtube_dl/extractor/drbonanza.py12
-rw-r--r--youtube_dl/extractor/drtuber.py21
-rw-r--r--youtube_dl/extractor/ehow.py6
-rw-r--r--youtube_dl/extractor/empflix.py31
-rw-r--r--youtube_dl/extractor/facebook.py4
-rw-r--r--youtube_dl/extractor/faz.py21
-rw-r--r--youtube_dl/extractor/fivetv.py88
-rw-r--r--youtube_dl/extractor/francetv.py91
-rw-r--r--youtube_dl/extractor/gamespot.py4
-rw-r--r--youtube_dl/extractor/generic.py220
-rw-r--r--youtube_dl/extractor/gfycat.py28
-rw-r--r--youtube_dl/extractor/gorillavid.py7
-rw-r--r--youtube_dl/extractor/hentaistigma.py11
-rw-r--r--youtube_dl/extractor/hostingbulk.py6
-rw-r--r--youtube_dl/extractor/howcast.py35
-rw-r--r--youtube_dl/extractor/howstuffworks.py6
-rw-r--r--youtube_dl/extractor/imdb.py2
-rw-r--r--youtube_dl/extractor/ina.py2
-rw-r--r--youtube_dl/extractor/infoq.py18
-rw-r--r--youtube_dl/extractor/instagram.py11
-rw-r--r--youtube_dl/extractor/iqiyi.py273
-rw-r--r--youtube_dl/extractor/izlesene.py18
-rw-r--r--youtube_dl/extractor/jeuxvideo.py9
-rw-r--r--youtube_dl/extractor/karaoketv.py4
-rw-r--r--youtube_dl/extractor/kickstarter.py15
-rw-r--r--youtube_dl/extractor/kuwo.py314
-rw-r--r--youtube_dl/extractor/letv.py1
-rw-r--r--youtube_dl/extractor/lifenews.py15
-rw-r--r--youtube_dl/extractor/liveleak.py16
-rw-r--r--youtube_dl/extractor/lynda.py8
-rw-r--r--youtube_dl/extractor/malemotion.py6
-rw-r--r--youtube_dl/extractor/metacafe.py3
-rw-r--r--youtube_dl/extractor/mitele.py3
-rw-r--r--youtube_dl/extractor/mixcloud.py6
-rw-r--r--youtube_dl/extractor/mofosex.py4
-rw-r--r--youtube_dl/extractor/myspass.py3
-rw-r--r--youtube_dl/extractor/myvi.py60
-rw-r--r--youtube_dl/extractor/myvideo.py11
-rw-r--r--youtube_dl/extractor/nationalgeographic.py7
-rw-r--r--youtube_dl/extractor/neteasemusic.py459
-rw-r--r--youtube_dl/extractor/newstube.py2
-rw-r--r--youtube_dl/extractor/nextmedia.py3
-rw-r--r--youtube_dl/extractor/nfl.py6
-rw-r--r--youtube_dl/extractor/niconico.py3
-rw-r--r--youtube_dl/extractor/noco.py6
-rw-r--r--youtube_dl/extractor/nowtv.py2
-rw-r--r--youtube_dl/extractor/npo.py162
-rw-r--r--youtube_dl/extractor/nrk.py24
-rw-r--r--youtube_dl/extractor/odnoklassniki.py4
-rw-r--r--youtube_dl/extractor/onionstudios.py76
-rw-r--r--youtube_dl/extractor/openfilm.py4
-rw-r--r--youtube_dl/extractor/pbs.py43
-rw-r--r--youtube_dl/extractor/photobucket.py4
-rw-r--r--youtube_dl/extractor/pinkbike.py96
-rw-r--r--youtube_dl/extractor/planetaplay.py3
-rw-r--r--youtube_dl/extractor/played.py4
-rw-r--r--youtube_dl/extractor/playvid.py7
-rw-r--r--youtube_dl/extractor/pornhub.py28
-rw-r--r--youtube_dl/extractor/pornovoisines.py4
-rw-r--r--youtube_dl/extractor/primesharetv.py9
-rw-r--r--youtube_dl/extractor/promptfile.py5
-rw-r--r--youtube_dl/extractor/prosiebensat1.py30
-rw-r--r--youtube_dl/extractor/qqmusic.py127
-rw-r--r--youtube_dl/extractor/quickvid.py1
-rw-r--r--youtube_dl/extractor/rds.py73
-rw-r--r--youtube_dl/extractor/rtbf.py20
-rw-r--r--youtube_dl/extractor/rtlnl.py49
-rw-r--r--youtube_dl/extractor/ruutu.py119
-rw-r--r--youtube_dl/extractor/safari.py8
-rw-r--r--youtube_dl/extractor/sbs.py40
-rw-r--r--youtube_dl/extractor/shared.py4
-rw-r--r--youtube_dl/extractor/smotri.py55
-rw-r--r--youtube_dl/extractor/snagfilms.py171
-rw-r--r--youtube_dl/extractor/sohu.py54
-rw-r--r--youtube_dl/extractor/soundcloud.py5
-rw-r--r--youtube_dl/extractor/spankwire.py12
-rw-r--r--youtube_dl/extractor/spiegeltv.py47
-rw-r--r--youtube_dl/extractor/sunporno.py2
-rw-r--r--youtube_dl/extractor/tagesschau.py10
-rw-r--r--youtube_dl/extractor/teamcoco.py29
-rw-r--r--youtube_dl/extractor/theplatform.py23
-rw-r--r--youtube_dl/extractor/thesixtyone.py18
-rw-r--r--youtube_dl/extractor/thisamericanlife.py40
-rw-r--r--youtube_dl/extractor/tlc.py15
-rw-r--r--youtube_dl/extractor/tnaflix.py279
-rw-r--r--youtube_dl/extractor/tube8.py2
-rw-r--r--youtube_dl/extractor/tumblr.py27
-rw-r--r--youtube_dl/extractor/turbo.py4
-rw-r--r--youtube_dl/extractor/tvc.py109
-rw-r--r--youtube_dl/extractor/tvplay.py17
-rw-r--r--youtube_dl/extractor/twitch.py69
-rw-r--r--youtube_dl/extractor/twitter.py72
-rw-r--r--youtube_dl/extractor/udemy.py35
-rw-r--r--youtube_dl/extractor/udn.py1
-rw-r--r--youtube_dl/extractor/vbox7.py21
-rw-r--r--youtube_dl/extractor/veehd.py3
-rw-r--r--youtube_dl/extractor/vice.py37
-rw-r--r--youtube_dl/extractor/videomega.py28
-rw-r--r--youtube_dl/extractor/viki.py42
-rw-r--r--youtube_dl/extractor/vimeo.py22
-rw-r--r--youtube_dl/extractor/vimple.py46
-rw-r--r--youtube_dl/extractor/vk.py166
-rw-r--r--youtube_dl/extractor/vodlocker.py9
-rw-r--r--youtube_dl/extractor/vube.py1
-rw-r--r--youtube_dl/extractor/webofstories.py41
-rw-r--r--youtube_dl/extractor/xbef.py6
-rw-r--r--youtube_dl/extractor/xhamster.py34
-rw-r--r--youtube_dl/extractor/xnxx.py6
-rw-r--r--youtube_dl/extractor/xtube.py6
-rw-r--r--youtube_dl/extractor/xuite.py1
-rw-r--r--youtube_dl/extractor/xvideos.py31
-rw-r--r--youtube_dl/extractor/yam.py1
-rw-r--r--youtube_dl/extractor/yinyuetai.py56
-rw-r--r--youtube_dl/extractor/ynet.py4
-rw-r--r--youtube_dl/extractor/youku.py307
-rw-r--r--youtube_dl/extractor/youtube.py241
-rw-r--r--youtube_dl/options.py21
-rw-r--r--youtube_dl/postprocessor/common.py10
-rw-r--r--youtube_dl/postprocessor/embedthumbnail.py5
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py209
-rw-r--r--youtube_dl/update.py2
-rw-r--r--youtube_dl/utils.py481
-rw-r--r--youtube_dl/version.py2
160 files changed, 6001 insertions, 1299 deletions
diff --git a/README.md b/README.md
index f3d83c8..ac54d7b 100644
--- a/README.md
+++ b/README.md
@@ -52,8 +52,9 @@ which means you can modify it, redistribute it or use it however you like.
-i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist
--abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
--dump-user-agent Display the current browser identification
- --list-extractors List all supported extractors and the URLs they would handle
+ --list-extractors List all supported extractors
--extractor-descriptions Output descriptions of all supported extractors
+ --force-generic-extractor Force extraction to use the generic extractor
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
@@ -74,7 +75,7 @@ which means you can modify it, redistribute it or use it however you like.
## Video Selection:
--playlist-start NUMBER Playlist video to start at (default is 1)
--playlist-end NUMBER Playlist video to end at (default is last)
- --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
+ --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8"
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
--match-title REGEX Download only matching titles (regex or caseless sub-string)
@@ -107,7 +108,7 @@ which means you can modify it, redistribute it or use it however you like.
--playlist-reverse Download playlist videos in reverse order
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
- --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
+ --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,httpie,wget
--external-downloader-args ARGS Give these arguments to the external downloader
## Filesystem Options:
@@ -189,8 +190,8 @@ which means you can modify it, redistribute it or use it however you like.
--all-formats Download all available video formats
--prefer-free-formats Prefer free video formats unless a specific one is requested
-F, --list-formats List all available formats
- --youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
- --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
+ --youtube-skip-dash-manifest Do not download the DASH manifests and related data on YouTube videos
+ --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv. Ignored if no
merge is required
## Subtitle Options:
@@ -213,7 +214,8 @@ which means you can modify it, redistribute it or use it however you like.
--audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
5)
- --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
+ --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)
+ --postprocessor-args ARGS Give these arguments to the postprocessor
-k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default
--no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default
--embed-subs Embed subtitles in the video (only for mkv and mp4 videos)
@@ -223,7 +225,7 @@ which means you can modify it, redistribute it or use it however you like.
parameters replace existing values. Additional templates: %(album)s, %(artist)s. Example: --metadata-from-title "%(artist)s -
%(title)s" matches a title like "Coldplay - Paradise"
--xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards)
- --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
+ --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default;
fix file if we can, warn otherwise)
--prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
--prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors
@@ -236,6 +238,26 @@ which means you can modify it, redistribute it or use it however you like.
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
+### Authentication with `.netrc` file ###
+
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in shell command history. You can achieve this using [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
+```
+touch $HOME/.netrc
+chmod a-rwx,u+rw $HOME/.netrc
+```
+After that you can add credentials for extractor in the following format, where *extractor* is the name of extractor in lowercase:
+```
+machine <extractor> login <login> password <password>
+```
+For example:
+```
+machine youtube login myaccount@gmail.com password my_youtube_password
+machine twitch login my_twitch_account_name password my_twitch_password
+```
+To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or to place it in [configuration file](#configuration).
+
+On Windows you may also need to setup `%HOME%` environment variable manually.
+
# OUTPUT TEMPLATE
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:
@@ -379,7 +401,7 @@ In February 2015, the new YouTube player contained a character sequence in a str
### HTTP Error 429: Too Many Requests or 402: Payment Required
-These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address.
+These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
### SyntaxError: Non-ASCII character ###
diff --git a/README.txt b/README.txt
index bc4f294..e40f1d4 100644
--- a/README.txt
+++ b/README.txt
@@ -68,8 +68,9 @@ OPTIONS
-i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist
--abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
--dump-user-agent Display the current browser identification
- --list-extractors List all supported extractors and the URLs they would handle
+ --list-extractors List all supported extractors
--extractor-descriptions Output descriptions of all supported extractors
+ --force-generic-extractor Force extraction to use the generic extractor
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
@@ -94,7 +95,7 @@ Video Selection:
--playlist-start NUMBER Playlist video to start at (default is 1)
--playlist-end NUMBER Playlist video to end at (default is last)
- --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
+ --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8"
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
--match-title REGEX Download only matching titles (regex or caseless sub-string)
@@ -129,7 +130,7 @@ Download Options:
--playlist-reverse Download playlist videos in reverse order
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
- --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
+ --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,httpie,wget
--external-downloader-args ARGS Give these arguments to the external downloader
@@ -221,8 +222,8 @@ Video Format Options:
--all-formats Download all available video formats
--prefer-free-formats Prefer free video formats unless a specific one is requested
-F, --list-formats List all available formats
- --youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
- --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
+ --youtube-skip-dash-manifest Do not download the DASH manifests and related data on YouTube videos
+ --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv. Ignored if no
merge is required
@@ -251,7 +252,8 @@ Post-processing Options:
--audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
5)
- --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
+ --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)
+ --postprocessor-args ARGS Give these arguments to the postprocessor
-k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default
--no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default
--embed-subs Embed subtitles in the video (only for mkv and mp4 videos)
@@ -261,7 +263,7 @@ Post-processing Options:
parameters replace existing values. Additional templates: %(album)s, %(artist)s. Example: --metadata-from-title "%(artist)s -
%(title)s" matches a title like "Coldplay - Paradise"
--xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards)
- --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
+ --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default;
fix file if we can, warn otherwise)
--prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
--prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors
@@ -282,6 +284,36 @@ Windows, the configuration file locations are
%APPDATA%\youtube-dl\config.txt and
C:\Users\<user name>\youtube-dl.conf.
+Authentication with .netrc file
+
+You may also want to configure automatic credentials storage for
+extractors that support authentication (by providing login and password
+with --username and --password) in order not to pass credentials as
+command line arguments on every youtube-dl execution and prevent
+tracking plain text passwords in shell command history. You can achieve
+this using .netrc file on per extractor basis. For that you will need to
+create .netrc file in your $HOME and restrict permissions to read/write
+by you only:
+
+ touch $HOME/.netrc
+ chmod a-rwx,u+rw $HOME/.netrc
+
+After that you can add credentials for extractor in the following
+format, where _extractor_ is the name of extractor in lowercase:
+
+ machine <extractor> login <login> password <password>
+
+For example:
+
+ machine youtube login myaccount@gmail.com password my_youtube_password
+ machine twitch login my_twitch_account_name password my_twitch_password
+
+To activate authentication with .netrc file you should pass --netrc to
+youtube-dl or to place it in configuration file.
+
+On Windows you may also need to setup %HOME% environment variable
+manually.
+
OUTPUT TEMPLATE
@@ -556,7 +588,7 @@ HTTP Error 429: Too Many Requests or 402: Payment Required
These two error codes indicate that the service is blocking your IP
address because of overuse. Contact the service and ask them to unblock
your IP address, or - if you have acquired a whitelisted IP address
-already - use the --proxy or --network-address options to select another
+already - use the --proxy or --source-address options to select another
IP address.
SyntaxError: Non-ASCII character
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index d147b53..7344513 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -17,6 +17,7 @@
- **AcademicEarth:Course**
- **AddAnime**
- **AdobeTV**
+ - **AdobeTVVideo**
- **AdultSwim**
- **Aftenposten**
- **Aftonbladet**
@@ -27,7 +28,8 @@
- **anitube.se**
- **AnySex**
- **Aparat**
- - **AppleDaily**
+ - **AppleConnect**
+ - **AppleDaily**: 臺灣蘋果日報
- **AppleTrailers**
- **archive.org**: archive.org videos
- **ARD**
@@ -44,7 +46,7 @@
- **audiomack**
- **audiomack:album**
- **Azubu**
- - **BaiduVideo**
+ - **BaiduVideo**: 百度视频
- **bambuser**
- **bambuser:channel**
- **Bandcamp**
@@ -105,11 +107,12 @@
- **Crunchyroll**
- **crunchyroll:playlist**
- **CSpan**: C-SPAN
- - **CtsNews**
+ - **CtsNews**: 華視新聞
- **culturebox.francetvinfo.fr**
- **dailymotion**
- **dailymotion:playlist**
- **dailymotion:user**
+ - **DailymotionCloud**
- **daum.net**
- **DBTV**
- **DctpTv**
@@ -119,7 +122,9 @@
- **Discovery**
- **divxstage**: DivxStage
- **Dotsub**
- - **DouyuTV**
+ - **DouyuTV**: 斗鱼
+ - **dramafever**
+ - **dramafever:series**
- **DRBonanza**
- **Dropbox**
- **DrTuber**
@@ -153,6 +158,7 @@
- **fernsehkritik.tv**
- **fernsehkritik.tv:postecke**
- **Firstpost**
+ - **FiveTV**
- **Flickr**
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
- **FootyRoom**
@@ -217,6 +223,7 @@
- **instagram:user**: Instagram user profile
- **InternetVideoArchive**
- **IPrima**
+ - **iqiyi**: 爱奇艺
- **ivi**: ivi.ru
- **ivi:compilation**: ivi.ru compilations
- **Izlesene**
@@ -237,9 +244,15 @@
- **kontrtube**: KontrTube.ru - Труба зовёт
- **KrasView**: Красвью
- **Ku6**
+ - **kuwo:album**: 酷我音乐 - 专辑
+ - **kuwo:category**: 酷我音乐 - 分类
+ - **kuwo:chart**: 酷我音乐 - 排行榜
+ - **kuwo:mv**: 酷我音乐 - MV
+ - **kuwo:singer**: 酷我音乐 - 歌手
+ - **kuwo:song**: 酷我音乐
- **la7.tv**
- **Laola1Tv**
- - **Letv**
+ - **Letv**: 乐视网
- **LetvPlaylist**
- **LetvTv**
- **Libsyn**
@@ -277,6 +290,7 @@
- **Motherless**
- **Motorsport**: motorsport.com
- **MovieClips**
+ - **MovieFap**
- **Moviezine**
- **movshare**: MovShare
- **MPORA**
@@ -290,6 +304,7 @@
- **MySpace**
- **MySpace:album**
- **MySpass**
+ - **Myvi**
- **myvideo**
- **MyVidster**
- **N-JOY**
@@ -305,11 +320,18 @@
- **NDTV**
- **NerdCubedFeed**
- **Nerdist**
+ - **netease:album**: 网易云音乐 - 专辑
+ - **netease:djradio**: 网易云音乐 - 电台
+ - **netease:mv**: 网易云音乐 - MV
+ - **netease:playlist**: 网易云音乐 - 歌单
+ - **netease:program**: 网易云音乐 - 电台节目
+ - **netease:singer**: 网易云音乐 - 歌手
+ - **netease:song**: 网易云音乐
- **Netzkino**
- **Newgrounds**
- **Newstube**
- - **NextMedia**
- - **NextMediaActionNews**
+ - **NextMedia**: 蘋果日報
+ - **NextMediaActionNews**: 蘋果日報 - 動新聞
- **nfb**: National Film Board of Canada
- **nfl.com**
- **nhl.com**
@@ -325,13 +347,14 @@
- **Nowness**
- **NowTV**
- **nowvideo**: NowVideo
- - **npo.nl**
+ - **npo**: npo.nl and ntr.nl
+ - **npo**: npo.nl and ntr.nl
- **npo.nl:live**
- **npo.nl:radio**
- **npo.nl:radio:fragment**
- **NRK**
- **NRKPlaylist**
- - **NRKTV**
+ - **NRKTV**: NRK TV and NRK Radio
- **ntv.ru**
- **Nuvid**
- **NYTimes**
@@ -340,6 +363,7 @@
- **Odnoklassniki**
- **OktoberfestTV**
- **on.aol.com**
+ - **OnionStudios**
- **Ooyala**
- **OoyalaExternal**
- **OpenFilm**
@@ -353,6 +377,7 @@
- **PhilharmonieDeParis**: Philharmonie de Paris
- **Phoenix**
- **Photobucket**
+ - **Pinkbike**
- **Pladform**
- **PlanetaPlay**
- **play.fm**
@@ -373,10 +398,11 @@
- **prosiebensat1**: ProSiebenSat.1 Digital
- **Puls4**
- **Pyvideo**
- - **qqmusic**
- - **qqmusic:album**
- - **qqmusic:singer**
- - **qqmusic:toplist**
+ - **qqmusic**: QQ音乐
+ - **qqmusic:album**: QQ音乐 - 专辑
+ - **qqmusic:playlist**: QQ音乐 - 歌单
+ - **qqmusic:singer**: QQ音乐 - 歌手
+ - **qqmusic:toplist**: QQ音乐 - 排行榜
- **QuickVid**
- **R7**
- **radio.de**
@@ -385,6 +411,7 @@
- **RadioJavan**
- **Rai**
- **RBMARadio**
+ - **RDS**: RDS.ca
- **RedTube**
- **Restudy**
- **ReverbNation**
@@ -407,6 +434,7 @@
- **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos
- **RUTV**: RUTV.RU
+ - **Ruutu**
- **safari**: safaribooksonline.com online video
- **safari:course**: safaribooksonline.com online courses
- **Sandia**: Sandia National Laboratories
@@ -431,6 +459,8 @@
- **smotri:broadcast**: Smotri.com broadcasts
- **smotri:community**: Smotri.com community videos
- **smotri:user**: Smotri.com user videos
+ - **SnagFilms**
+ - **SnagFilmsEmbed**
- **Snotr**
- **Sohu**
- **soompi**
@@ -457,6 +487,7 @@
- **SportBox**
- **SportBoxEmbed**
- **SportDeutschland**
+ - **Sportschau**
- **Srf**
- **SRMediathek**: Saarländischer Rundfunk
- **SSA**
@@ -482,7 +513,6 @@
- **TechTalks**
- **techtv.mit.edu**
- **ted**
- - **tegenlicht.vpro.nl**
- **TeleBruxelles**
- **telecinco.es**
- **TeleMB**
@@ -493,6 +523,7 @@
- **TheOnion**
- **ThePlatform**
- **TheSixtyOne**
+ - **ThisAmericanLife**
- **ThisAV**
- **THVideo**
- **THVideoPlaylist**
@@ -519,6 +550,8 @@
- **TV2**
- **TV2Article**
- **TV4**: tv4.se and tv4play.se
+ - **TVC**
+ - **TVCArticle**
- **tvigle**: Интернет-телевидение Tvigle.ru
- **tvp.pl**
- **tvp.pl:Series**
@@ -531,10 +564,11 @@
- **twitch:stream**
- **twitch:video**
- **twitch:vod**
+ - **TwitterCard**
- **Ubu**
- **udemy**
- **udemy:course**
- - **UDNEmbed**
+ - **UDNEmbed**: 聯合影音
- **Ultimedia**
- **Unistra**
- **Urort**: NRK P3 Urørt
@@ -579,8 +613,8 @@
- **Vimple**: Vimple - one-click video hosting
- **Vine**
- **vine:user**
- - **vk.com**
- - **vk.com:user-videos**: vk.com:All of a user's videos
+ - **vk**: VK
+ - **vk:uservideos**: VK - User's Videos
- **Vodlocker**
- **VoiceRepublic**
- **Vporn**
@@ -596,32 +630,36 @@
- **wdr:mobile**
- **WDRMaus**: Sendung mit der Maus
- **WebOfStories**
+ - **WebOfStoriesPlaylist**
- **Weibo**
- **Wimp**
- **Wistia**
+ - **WNL**
- **WorldStarHipHop**
- **wrzuta.pl**
- **WSJ**: Wall Street Journal
- **XBef**
- **XboxClips**
- **XHamster**
+ - **XHamsterEmbed**
- **XMinus**
- **XNXX**
- **Xstream**
- **XTube**
- **XTubeUser**: XTube user profile
- - **Xuite**
+ - **Xuite**: 隨意窩Xuite影音
- **XVideos**
- **XXXYMovies**
- **Yahoo**: Yahoo screen and movies
- - **Yam**
+ - **Yam**: 蕃薯藤yam天空部落
- **yandexmusic:album**: Яндекс.Музыка - Альбом
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
- **yandexmusic:track**: Яндекс.Музыка - Трек
- **YesJapan**
+ - **yinyuetai:video**: 音悦Tai
- **Ynet**
- **YouJizz**
- - **Youku**
+ - **youku**: 优酷
- **YouPorn**
- **YourUpload**
- **youtube**: YouTube.com
diff --git a/test/test_compat.py b/test/test_compat.py
index 1eb454e..c3ba8ad 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -14,6 +14,8 @@ from youtube_dl.utils import get_filesystem_encoding
from youtube_dl.compat import (
compat_getenv,
compat_expanduser,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
)
@@ -42,5 +44,28 @@ class TestCompat(unittest.TestCase):
dir(youtube_dl.compat))) - set(['unicode_literals'])
self.assertEqual(all_names, sorted(present_names))
+ def test_compat_urllib_parse_unquote(self):
+ self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
+ self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
+ self.assertEqual(compat_urllib_parse_unquote(''), '')
+ self.assertEqual(compat_urllib_parse_unquote('%'), '%')
+ self.assertEqual(compat_urllib_parse_unquote('%%'), '%%')
+ self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%')
+ self.assertEqual(compat_urllib_parse_unquote('%2F'), '/')
+ self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
+ self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波')
+ self.assertEqual(
+ compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
+%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
+ '''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
+%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
+ self.assertEqual(
+ compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''),
+ '''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
+
+ def test_compat_urllib_parse_unquote_plus(self):
+ self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
+ self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube-dl b/youtube-dl
index 79f0e06..aeb2b54 100755
--- a/youtube-dl
+++ b/youtube-dl
Binary files differ
diff --git a/youtube-dl.1 b/youtube-dl.1
index 5b15884..cde2abf 100644
--- a/youtube-dl.1
+++ b/youtube-dl.1
@@ -24,8 +24,9 @@ redistribute it or use it however you like.
\-i,\ \-\-ignore\-errors\ \ \ \ \ \ \ \ \ \ \ \ \ \ Continue\ on\ download\ errors,\ for\ example\ to\ skip\ unavailable\ videos\ in\ a\ playlist
\-\-abort\-on\-error\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Abort\ downloading\ of\ further\ videos\ (in\ the\ playlist\ or\ the\ command\ line)\ if\ an\ error\ occurs
\-\-dump\-user\-agent\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Display\ the\ current\ browser\ identification
-\-\-list\-extractors\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they\ would\ handle
+\-\-list\-extractors\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ List\ all\ supported\ extractors
\-\-extractor\-descriptions\ \ \ \ \ \ \ \ \ Output\ descriptions\ of\ all\ supported\ extractors
+\-\-force\-generic\-extractor\ \ \ \ \ \ \ \ Force\ extraction\ to\ use\ the\ generic\ extractor
\-\-default\-search\ PREFIX\ \ \ \ \ \ \ \ \ \ Use\ this\ prefix\ for\ unqualified\ URLs.\ For\ example\ "gvsearch2:"\ downloads\ two\ videos\ from\ google\ videos\ for\ youtube\-dl\ "large\ apple".
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ value\ "auto"\ to\ let\ youtube\-dl\ guess\ ("auto_warning"\ to\ emit\ a\ warning\ when\ guessing).\ "error"\ just\ throws\ an\ error.\ The
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default\ value\ "fixup_error"\ repairs\ broken\ URLs,\ but\ emits\ an\ error\ if\ this\ is\ not\ possible\ instead\ of\ searching.
@@ -54,7 +55,7 @@ redistribute it or use it however you like.
\f[C]
\-\-playlist\-start\ NUMBER\ \ \ \ \ \ \ \ \ \ Playlist\ video\ to\ start\ at\ (default\ is\ 1)
\-\-playlist\-end\ NUMBER\ \ \ \ \ \ \ \ \ \ \ \ Playlist\ video\ to\ end\ at\ (default\ is\ last)
-\-\-playlist\-items\ ITEM_SPEC\ \ \ \ \ \ \ Playlist\ video\ items\ to\ download.\ Specify\ indices\ of\ the\ videos\ in\ the\ playlist\ seperated\ by\ commas\ like:\ "\-\-playlist\-items\ 1,2,5,8"
+\-\-playlist\-items\ ITEM_SPEC\ \ \ \ \ \ \ Playlist\ video\ items\ to\ download.\ Specify\ indices\ of\ the\ videos\ in\ the\ playlist\ separated\ by\ commas\ like:\ "\-\-playlist\-items\ 1,2,5,8"
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ if\ you\ want\ to\ download\ videos\ indexed\ 1,\ 2,\ 5,\ 8\ in\ the\ playlist.\ You\ can\ specify\ range:\ "\-\-playlist\-items\ 1\-3,7,10\-13",\ it\ will
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ the\ videos\ at\ index\ 1,\ 2,\ 3,\ 7,\ 10,\ 11,\ 12\ and\ 13.
\-\-match\-title\ REGEX\ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ matching\ titles\ (regex\ or\ caseless\ sub\-string)
@@ -91,7 +92,7 @@ redistribute it or use it however you like.
\-\-playlist\-reverse\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ playlist\ videos\ in\ reverse\ order
\-\-xattr\-set\-filesize\ \ \ \ \ \ \ \ \ \ \ \ \ Set\ file\ xattribute\ ytdl.filesize\ with\ expected\ filesize\ (experimental)
\-\-hls\-prefer\-native\ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ native\ HLS\ downloader\ instead\ of\ ffmpeg\ (experimental)
-\-\-external\-downloader\ COMMAND\ \ \ \ Use\ the\ specified\ external\ downloader.\ Currently\ supports\ aria2c,curl,wget
+\-\-external\-downloader\ COMMAND\ \ \ \ Use\ the\ specified\ external\ downloader.\ Currently\ supports\ aria2c,curl,httpie,wget
\-\-external\-downloader\-args\ ARGS\ \ Give\ these\ arguments\ to\ the\ external\ downloader
\f[]
.fi
@@ -193,8 +194,8 @@ redistribute it or use it however you like.
\-\-all\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ all\ available\ video\ formats
\-\-prefer\-free\-formats\ \ \ \ \ \ \ \ \ \ \ \ Prefer\ free\ video\ formats\ unless\ a\ specific\ one\ is\ requested
\-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ List\ all\ available\ formats
-\-\-youtube\-skip\-dash\-manifest\ \ \ \ \ Do\ not\ download\ the\ DASH\ manifest\ on\ YouTube\ videos
-\-\-merge\-output\-format\ FORMAT\ \ \ \ \ If\ a\ merge\ is\ required\ (e.g.\ bestvideo+bestaudio),\ output\ to\ given\ container\ format.\ One\ of\ mkv,\ mp4,\ ogg,\ webm,\ flv.Ignored\ if\ no
+\-\-youtube\-skip\-dash\-manifest\ \ \ \ \ Do\ not\ download\ the\ DASH\ manifests\ and\ related\ data\ on\ YouTube\ videos
+\-\-merge\-output\-format\ FORMAT\ \ \ \ \ If\ a\ merge\ is\ required\ (e.g.\ bestvideo+bestaudio),\ output\ to\ given\ container\ format.\ One\ of\ mkv,\ mp4,\ ogg,\ webm,\ flv.\ Ignored\ if\ no
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ merge\ is\ required
\f[]
.fi
@@ -229,7 +230,8 @@ redistribute it or use it however you like.
\-\-audio\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ Specify\ audio\ format:\ "best",\ "aac",\ "vorbis",\ "mp3",\ "m4a",\ "opus",\ or\ "wav";\ "best"\ by\ default
\-\-audio\-quality\ QUALITY\ \ \ \ \ \ \ \ \ \ Specify\ ffmpeg/avconv\ audio\ quality,\ insert\ a\ value\ between\ 0\ (better)\ and\ 9\ (worse)\ for\ VBR\ or\ a\ specific\ bitrate\ like\ 128K\ (default
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 5)
-\-\-recode\-video\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ Encode\ the\ video\ to\ another\ format\ if\ necessary\ (currently\ supported:\ mp4|flv|ogg|webm|mkv)
+\-\-recode\-video\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ Encode\ the\ video\ to\ another\ format\ if\ necessary\ (currently\ supported:\ mp4|flv|ogg|webm|mkv|avi)
+\-\-postprocessor\-args\ ARGS\ \ \ \ \ \ \ \ Give\ these\ arguments\ to\ the\ postprocessor
\-k,\ \-\-keep\-video\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Keep\ the\ video\ file\ on\ disk\ after\ the\ post\-processing;\ the\ video\ is\ erased\ by\ default
\-\-no\-post\-overwrites\ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ overwrite\ post\-processed\ files;\ the\ post\-processed\ files\ are\ overwritten\ by\ default
\-\-embed\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Embed\ subtitles\ in\ the\ video\ (only\ for\ mkv\ and\ mp4\ videos)
@@ -239,7 +241,7 @@ redistribute it or use it however you like.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ parameters\ replace\ existing\ values.\ Additional\ templates:\ %(album)s,\ %(artist)s.\ Example:\ \-\-metadata\-from\-title\ "%(artist)s\ \-
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(title)s"\ matches\ a\ title\ like\ "Coldplay\ \-\ Paradise"
\-\-xattrs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ metadata\ to\ the\ video\ file\[aq]s\ xattrs\ (using\ dublin\ core\ and\ xdg\ standards)
-\-\-fixup\ POLICY\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Automatically\ correct\ known\ faults\ of\ the\ file.\ One\ of\ never\ (do\ nothing),\ warn\ (only\ emit\ a\ warning),\ detect_or_warn(the\ default;
+\-\-fixup\ POLICY\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Automatically\ correct\ known\ faults\ of\ the\ file.\ One\ of\ never\ (do\ nothing),\ warn\ (only\ emit\ a\ warning),\ detect_or_warn\ (the\ default;
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ fix\ file\ if\ we\ can,\ warn\ otherwise)
\-\-prefer\-avconv\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ avconv\ over\ ffmpeg\ for\ running\ the\ postprocessors\ (default)
\-\-prefer\-ffmpeg\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ ffmpeg\ over\ avconv\ for\ running\ the\ postprocessors
@@ -258,6 +260,50 @@ and not copy the mtime) into \f[C]/etc/youtube\-dl.conf\f[] and/or
On Windows, the configuration file locations are
\f[C]%APPDATA%\\youtube\-dl\\config.txt\f[] and
\f[C]C:\\Users\\<user\ name>\\youtube\-dl.conf\f[].
+.SS Authentication with \f[C]\&.netrc\f[] file
+.PP
+You may also want to configure automatic credentials storage for
+extractors that support authentication (by providing login and password
+with \f[C]\-\-username\f[] and \f[C]\-\-password\f[]) in order not to
+pass credentials as command line arguments on every youtube\-dl
+execution and prevent tracking plain text passwords in shell command
+history.
+You can achieve this using \f[C]\&.netrc\f[]
+file (http://stackoverflow.com/tags/.netrc/info) on per extractor basis.
+For that you will need to create \f[C]\&.netrc\f[] file in your
+\f[C]$HOME\f[] and restrict permissions to read/write by you only:
+.IP
+.nf
+\f[C]
+touch\ $HOME/.netrc
+chmod\ a\-rwx,u+rw\ $HOME/.netrc
+\f[]
+.fi
+.PP
+After that you can add credentials for extractor in the following
+format, where \f[I]extractor\f[] is the name of extractor in lowercase:
+.IP
+.nf
+\f[C]
+machine\ <extractor>\ login\ <login>\ password\ <password>
+\f[]
+.fi
+.PP
+For example:
+.IP
+.nf
+\f[C]
+machine\ youtube\ login\ myaccount\@gmail.com\ password\ my_youtube_password
+machine\ twitch\ login\ my_twitch_account_name\ password\ my_twitch_password
+\f[]
+.fi
+.PP
+To activate authentication with \f[C]\&.netrc\f[] file you should pass
+\f[C]\-\-netrc\f[] to youtube\-dl or to place it in configuration
+file (#configuration).
+.PP
+On Windows you may also need to setup \f[C]%HOME%\f[] environment
+variable manually.
.SH OUTPUT TEMPLATE
.PP
The \f[C]\-o\f[] option allows users to indicate a template for the
@@ -571,7 +617,7 @@ These two error codes indicate that the service is blocking your IP
address because of overuse.
Contact the service and ask them to unblock your IP address, or \- if
you have acquired a whitelisted IP address already \- use the
-\f[C]\-\-proxy\f[] or \f[C]\-\-network\-address\f[]
+\f[C]\-\-proxy\f[] or \f[C]\-\-source\-address\f[]
options (#network-options) to select another IP address.
.SS SyntaxError: Non\-ASCII character
.PP
diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion
index 8367bdb..893d7e7 100644
--- a/youtube-dl.bash-completion
+++ b/youtube-dl.bash-completion
@@ -4,7 +4,7 @@ __youtube_dl()
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
- opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --cn-verification-proxy --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles"
+ opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --cn-verification-proxy --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
diropts="--cache-dir"
diff --git a/youtube-dl.fish b/youtube-dl.fish
index 82f424b..eb9828d 100644
--- a/youtube-dl.fish
+++ b/youtube-dl.fish
@@ -5,8 +5,9 @@ complete --command youtube-dl --long-option update --short-option U --descriptio
complete --command youtube-dl --long-option ignore-errors --short-option i --description 'Continue on download errors, for example to skip unavailable videos in a playlist'
complete --command youtube-dl --long-option abort-on-error --description 'Abort downloading of further videos (in the playlist or the command line) if an error occurs'
complete --command youtube-dl --long-option dump-user-agent --description 'Display the current browser identification'
-complete --command youtube-dl --long-option list-extractors --description 'List all supported extractors and the URLs they would handle'
+complete --command youtube-dl --long-option list-extractors --description 'List all supported extractors'
complete --command youtube-dl --long-option extractor-descriptions --description 'Output descriptions of all supported extractors'
+complete --command youtube-dl --long-option force-generic-extractor --description 'Force extraction to use the generic extractor'
complete --command youtube-dl --long-option default-search --description 'Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.'
complete --command youtube-dl --long-option ignore-config --description 'Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)'
complete --command youtube-dl --long-option flat-playlist --description 'Do not extract the videos of a playlist, only list them.'
@@ -19,7 +20,7 @@ complete --command youtube-dl --long-option force-ipv6 --short-option 6 --descri
complete --command youtube-dl --long-option cn-verification-proxy --description 'Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
complete --command youtube-dl --long-option playlist-start --description 'Playlist video to start at (default is %default)'
complete --command youtube-dl --long-option playlist-end --description 'Playlist video to end at (default is last)'
-complete --command youtube-dl --long-option playlist-items --description 'Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.'
+complete --command youtube-dl --long-option playlist-items --description 'Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.'
complete --command youtube-dl --long-option match-title --description 'Download only matching titles (regex or caseless sub-string)'
complete --command youtube-dl --long-option reject-title --description 'Skip download for matching titles (regex or caseless sub-string)'
complete --command youtube-dl --long-option max-downloads --description 'Abort after downloading NUMBER files'
@@ -44,7 +45,7 @@ complete --command youtube-dl --long-option test
complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
complete --command youtube-dl --long-option xattr-set-filesize --description 'Set file xattribute ytdl.filesize with expected filesize (experimental)'
complete --command youtube-dl --long-option hls-prefer-native --description 'Use the native HLS downloader instead of ffmpeg (experimental)'
-complete --command youtube-dl --long-option external-downloader --description 'Use the specified external downloader. Currently supports aria2c,curl,wget'
+complete --command youtube-dl --long-option external-downloader --description 'Use the specified external downloader. Currently supports aria2c,curl,httpie,wget'
complete --command youtube-dl --long-option external-downloader-args --description 'Give these arguments to the external downloader'
complete --command youtube-dl --long-option batch-file --short-option a --description 'File containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
complete --command youtube-dl --long-option id --description 'Use only video ID in file name'
@@ -108,8 +109,8 @@ complete --command youtube-dl --long-option all-formats --description 'Download
complete --command youtube-dl --long-option prefer-free-formats --description 'Prefer free video formats unless a specific one is requested'
complete --command youtube-dl --long-option list-formats --short-option F --description 'List all available formats'
complete --command youtube-dl --long-option youtube-include-dash-manifest
-complete --command youtube-dl --long-option youtube-skip-dash-manifest --description 'Do not download the DASH manifest on YouTube videos'
-complete --command youtube-dl --long-option merge-output-format --description 'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no merge is required'
+complete --command youtube-dl --long-option youtube-skip-dash-manifest --description 'Do not download the DASH manifests and related data on YouTube videos'
+complete --command youtube-dl --long-option merge-output-format --description 'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv. Ignored if no merge is required'
complete --command youtube-dl --long-option write-sub --description 'Write subtitle file'
complete --command youtube-dl --long-option write-auto-sub --description 'Write automatic subtitle file (YouTube only)'
complete --command youtube-dl --long-option all-subs --description 'Download all the available subtitles of the video'
@@ -124,7 +125,8 @@ complete --command youtube-dl --long-option video-password --description 'Video
complete --command youtube-dl --long-option extract-audio --short-option x --description 'Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)'
complete --command youtube-dl --long-option audio-format --description 'Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default'
complete --command youtube-dl --long-option audio-quality --description 'Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)'
-complete --command youtube-dl --long-option recode-video --description 'Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)' --arguments 'mp4 flv ogg webm mkv' --exclusive
+complete --command youtube-dl --long-option recode-video --description 'Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)' --arguments 'mp4 flv ogg webm mkv' --exclusive
+complete --command youtube-dl --long-option postprocessor-args --description 'Give these arguments to the postprocessor'
complete --command youtube-dl --long-option keep-video --short-option k --description 'Keep the video file on disk after the post-processing; the video is erased by default'
complete --command youtube-dl --long-option no-post-overwrites --description 'Do not overwrite post-processed files; the post-processed files are overwritten by default'
complete --command youtube-dl --long-option embed-subs --description 'Embed subtitles in the video (only for mkv and mp4 videos)'
@@ -132,7 +134,7 @@ complete --command youtube-dl --long-option embed-thumbnail --description 'Embed
complete --command youtube-dl --long-option add-metadata --description 'Write metadata to the video file'
complete --command youtube-dl --long-option metadata-from-title --description 'Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed parameters replace existing values. Additional templates: %(album)s, %(artist)s. Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise"'
complete --command youtube-dl --long-option xattrs --description 'Write metadata to the video file'"'"'s xattrs (using dublin core and xdg standards)'
-complete --command youtube-dl --long-option fixup --description 'Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; fix file if we can, warn otherwise)'
+complete --command youtube-dl --long-option fixup --description 'Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default; fix file if we can, warn otherwise)'
complete --command youtube-dl --long-option prefer-avconv --description 'Prefer avconv over ffmpeg for running the postprocessors (default)'
complete --command youtube-dl --long-option prefer-ffmpeg --description 'Prefer ffmpeg over avconv for running the postprocessors'
complete --command youtube-dl --long-option ffmpeg-location --description 'Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.'
diff --git a/youtube-dl.zsh b/youtube-dl.zsh
index 7dc3ad4..73c95b0 100644
--- a/youtube-dl.zsh
+++ b/youtube-dl.zsh
@@ -19,7 +19,7 @@ __youtube_dl() {
elif [[ ${prev} == "--recode-video" ]]; then
_arguments '*: :(mp4 flv ogg webm mkv)'
else
- _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --cn-verification-proxy --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles)'
+ _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --cn-verification-proxy --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles)'
fi
;;
esac
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index aa6ec9d..00af78e 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -119,7 +119,7 @@ class YoutubeDL(object):
username: Username for authentication purposes.
password: Password for authentication purposes.
- videopassword: Password for acces a video.
+ videopassword: Password for accessing a video.
usenetrc: Use netrc for authentication instead.
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
@@ -139,6 +139,7 @@ class YoutubeDL(object):
outtmpl: Template for output names.
restrictfilenames: Do not allow "&" and spaces in file names
ignoreerrors: Do not stop on download errors.
+ force_generic_extractor: Force downloader to use the generic extractor
nooverwrites: Prevent overwriting files.
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
@@ -261,6 +262,8 @@ class YoutubeDL(object):
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
otherwise prefer avconv.
+ postprocessor_args: A list of additional command-line arguments for the
+ postprocessor.
"""
params = None
@@ -626,13 +629,16 @@ class YoutubeDL(object):
info_dict.setdefault(key, value)
def extract_info(self, url, download=True, ie_key=None, extra_info={},
- process=True):
+ process=True, force_generic_extractor=False):
'''
Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos.
extra_info is a dict containing the extra values to add to each result
'''
+ if not ie_key and force_generic_extractor:
+ ie_key = 'Generic'
+
if ie_key:
ies = [self.get_info_extractor(ie_key)]
else:
@@ -1004,7 +1010,7 @@ class YoutubeDL(object):
t.get('preference'), t.get('width'), t.get('height'),
t.get('id'), t.get('url')))
for i, t in enumerate(thumbnails):
- if 'width' in t and 'height' in t:
+ if t.get('width') and t.get('height'):
t['resolution'] = '%dx%d' % (t['width'], t['height'])
if t.get('id') is None:
t['id'] = '%d' % i
@@ -1016,13 +1022,13 @@ class YoutubeDL(object):
info_dict['display_id'] = info_dict['id']
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
- # Working around negative timestamps in Windows
- # (see http://bugs.python.org/issue1646728)
- if info_dict['timestamp'] < 0 and os.name == 'nt':
- info_dict['timestamp'] = 0
- upload_date = datetime.datetime.utcfromtimestamp(
- info_dict['timestamp'])
- info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+ # see http://bugs.python.org/issue1646728)
+ try:
+ upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
+ info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+ except (ValueError, OverflowError, OSError):
+ pass
if self.params.get('listsubtitles', False):
if 'automatic_captions' in info_dict:
@@ -1033,12 +1039,6 @@ class YoutubeDL(object):
info_dict['id'], info_dict.get('subtitles'),
info_dict.get('automatic_captions'))
- # This extractors handle format selection themselves
- if info_dict['extractor'] in ['Youku']:
- if download:
- self.process_info(info_dict)
- return info_dict
-
# We now pick which formats have to be downloaded
if info_dict.get('formats') is None:
# There's only one format available
@@ -1499,7 +1499,8 @@ class YoutubeDL(object):
for url in url_list:
try:
# It also downloads the videos
- res = self.extract_info(url)
+ res = self.extract_info(
+ url, force_generic_extractor=self.params.get('force_generic_extractor', False))
except UnavailableVideoError:
self.report_error('unable to download video')
except MaxDownloadsReached:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index ace1785..55b22c8 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -169,7 +169,7 @@ def _real_main(argv=None):
if not opts.audioquality.isdigit():
parser.error('invalid audio quality specified')
if opts.recodevideo is not None:
- if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
+ if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
parser.error('invalid video recode format specified')
if opts.convertsubtitles is not None:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
@@ -263,6 +263,9 @@ def _real_main(argv=None):
external_downloader_args = None
if opts.external_downloader_args:
external_downloader_args = shlex.split(opts.external_downloader_args)
+ postprocessor_args = None
+ if opts.postprocessor_args:
+ postprocessor_args = shlex.split(opts.postprocessor_args)
match_filter = (
None if opts.match_filter is None
else match_filter_func(opts.match_filter))
@@ -293,6 +296,7 @@ def _real_main(argv=None):
'autonumber_size': opts.autonumber_size,
'restrictfilenames': opts.restrictfilenames,
'ignoreerrors': opts.ignoreerrors,
+ 'force_generic_extractor': opts.force_generic_extractor,
'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites,
'retries': opts_retries,
@@ -366,6 +370,7 @@ def _real_main(argv=None):
'ffmpeg_location': opts.ffmpeg_location,
'hls_prefer_native': opts.hls_prefer_native,
'external_downloader_args': external_downloader_args,
+ 'postprocessor_args': postprocessor_args,
'cn_verification_proxy': opts.cn_verification_proxy,
}
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index f952921..0c57c7a 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -9,6 +9,7 @@ import shutil
import socket
import subprocess
import sys
+import itertools
try:
@@ -74,42 +75,74 @@ except ImportError:
import BaseHTTPServer as compat_http_server
try:
+ from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote
-except ImportError:
- def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
- if string == '':
+ from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
+except ImportError: # Python 2
+ _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
+ else re.compile('([\x00-\x7f]+)'))
+
+ # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
+ # implementations from cpython 3.4.3's stdlib. Python 2's version
+ # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
+
+ def compat_urllib_parse_unquote_to_bytes(string):
+ """unquote_to_bytes('abc%20def') -> b'abc def'."""
+ # Note: strings are encoded as UTF-8. This is only an issue if it contains
+ # unescaped non-ASCII characters, which URIs should not.
+ if not string:
+ # Is it a string-like object?
+ string.split
+ return b''
+ if isinstance(string, unicode):
+ string = string.encode('utf-8')
+ bits = string.split(b'%')
+ if len(bits) == 1:
return string
- res = string.split('%')
- if len(res) == 1:
+ res = [bits[0]]
+ append = res.append
+ for item in bits[1:]:
+ try:
+ append(compat_urllib_parse._hextochr[item[:2]])
+ append(item[2:])
+ except KeyError:
+ append(b'%')
+ append(item)
+ return b''.join(res)
+
+ def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
+ """Replace %xx escapes by their single-character equivalent. The optional
+ encoding and errors parameters specify how to decode percent-encoded
+ sequences into Unicode characters, as accepted by the bytes.decode()
+ method.
+ By default, percent-encoded sequences are decoded with UTF-8, and invalid
+ sequences are replaced by a placeholder character.
+
+ unquote('abc%20def') -> 'abc def'.
+ """
+ if '%' not in string:
+ string.split
return string
if encoding is None:
encoding = 'utf-8'
if errors is None:
errors = 'replace'
- # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
- pct_sequence = b''
- string = res[0]
- for item in res[1:]:
- try:
- if not item:
- raise ValueError
- pct_sequence += item[:2].decode('hex')
- rest = item[2:]
- if not rest:
- # This segment was just a single percent-encoded character.
- # May be part of a sequence of code units, so delay decoding.
- # (Stored in pct_sequence).
- continue
- except ValueError:
- rest = '%' + item
- # Encountered non-percent-encoded characters. Flush the current
- # pct_sequence.
- string += pct_sequence.decode(encoding, errors) + rest
- pct_sequence = b''
- if pct_sequence:
- # Flush the final pct_sequence
- string += pct_sequence.decode(encoding, errors)
- return string
+ bits = _asciire.split(string)
+ res = [bits[0]]
+ append = res.append
+ for i in range(1, len(bits), 2):
+ append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
+ append(bits[i + 1])
+ return ''.join(res)
+
+ def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
+ """Like unquote(), but also replace plus signs by spaces, as required for
+ unquoting HTML form values.
+
+ unquote_plus('%7e/abc+def') -> '~/abc def'
+ """
+ string = string.replace('+', ' ')
+ return compat_urllib_parse_unquote(string, encoding, errors)
try:
compat_str = unicode # Python 2
@@ -388,6 +421,15 @@ else:
pass
return _terminal_size(columns, lines)
+try:
+ itertools.count(start=0, step=1)
+ compat_itertools_count = itertools.count
+except TypeError: # Python 2.6
+ def compat_itertools_count(start=0, step=1):
+ n = start
+ while True:
+ yield n
+ n += step
__all__ = [
'compat_HTTPError',
@@ -401,6 +443,7 @@ __all__ = [
'compat_html_entities',
'compat_http_client',
'compat_http_server',
+ 'compat_itertools_count',
'compat_kwargs',
'compat_ord',
'compat_parse_qs',
@@ -411,6 +454,8 @@ __all__ = [
'compat_urllib_error',
'compat_urllib_parse',
'compat_urllib_parse_unquote',
+ 'compat_urllib_parse_unquote_plus',
+ 'compat_urllib_parse_unquote_to_bytes',
'compat_urllib_parse_urlparse',
'compat_urllib_request',
'compat_urlparse',
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
index f110830..dccc592 100644
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -8,6 +8,7 @@ from .hls import NativeHlsFD
from .http import HttpFD
from .rtsp import RtspFD
from .rtmp import RtmpFD
+from .dash import DashSegmentsFD
from ..utils import (
determine_protocol,
@@ -20,6 +21,7 @@ PROTOCOL_MAP = {
'mms': RtspFD,
'rtsp': RtspFD,
'f4m': F4mFD,
+ 'http_dash_segments': DashSegmentsFD,
}
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
new file mode 100644
index 0000000..a4685d3
--- /dev/null
+++ b/youtube_dl/downloader/dash.py
@@ -0,0 +1,66 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import FileDownloader
+from ..compat import compat_urllib_request
+
+
+class DashSegmentsFD(FileDownloader):
+ """
+ Download segments in a DASH manifest
+ """
+ def real_download(self, filename, info_dict):
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+ base_url = info_dict['url']
+ segment_urls = info_dict['segment_urls']
+
+ is_test = self.params.get('test', False)
+ remaining_bytes = self._TEST_FILE_SIZE if is_test else None
+ byte_counter = 0
+
+ def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
+ self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
+ req = compat_urllib_request.Request(target_url)
+ if remaining_bytes is not None:
+ req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
+
+ data = self.ydl.urlopen(req).read()
+
+ if remaining_bytes is not None:
+ data = data[:remaining_bytes]
+
+ outf.write(data)
+ return len(data)
+
+ def combine_url(base_url, target_url):
+ if re.match(r'^https?://', target_url):
+ return target_url
+ return '%s/%s' % (base_url, target_url)
+
+ with open(tmpfilename, 'wb') as outf:
+ append_url_to_file(
+ outf, combine_url(base_url, info_dict['initialization_url']),
+ 'initialization segment')
+ for i, segment_url in enumerate(segment_urls):
+ segment_len = append_url_to_file(
+ outf, combine_url(base_url, segment_url),
+ 'segment %d / %d' % (i + 1, len(segment_urls)),
+ remaining_bytes)
+ byte_counter += segment_len
+ if remaining_bytes is not None:
+ remaining_bytes -= segment_len
+ if remaining_bytes <= 0:
+ break
+
+ self.try_rename(tmpfilename, filename)
+
+ self._hook_progress({
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': byte_counter,
+ 'filename': filename,
+ 'status': 'finished',
+ })
+
+ return True
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 7ca2d31..1d5cc99 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -109,6 +109,14 @@ class Aria2cFD(ExternalFD):
cmd += ['--', info_dict['url']]
return cmd
+
+class HttpieFD(ExternalFD):
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['%s:%s' % (key, val)]
+ return cmd
+
_BY_NAME = dict(
(klass.get_basename(), klass)
for name, klass in globals().items()
@@ -123,5 +131,6 @@ def list_external_downloaders():
def get_external_downloader(external_downloader):
""" Given the name of the executable, see whether we support the given
downloader . """
- bn = os.path.basename(external_downloader)
+ # Drop .exe extension on Windows
+ bn = os.path.splitext(os.path.basename(external_downloader))[0]
return _BY_NAME[bn]
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 67eb960..50da088 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -4,7 +4,10 @@ from .abc import ABCIE
from .abc7news import Abc7NewsIE
from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
-from .adobetv import AdobeTVIE
+from .adobetv import (
+ AdobeTVIE,
+ AdobeTVVideoIE,
+)
from .adultswim import AdultSwimIE
from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE
@@ -16,9 +19,14 @@ from .anysex import AnySexIE
from .aol import AolIE
from .allocine import AllocineIE
from .aparat import AparatIE
+from .appleconnect import AppleConnectIE
from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE
-from .ard import ARDIE, ARDMediathekIE
+from .ard import (
+ ARDIE,
+ ARDMediathekIE,
+ SportschauIE,
+)
from .arte import (
ArteTvIE,
ArteTVPlus7IE,
@@ -103,6 +111,7 @@ from .dailymotion import (
DailymotionIE,
DailymotionPlaylistIE,
DailymotionUserIE,
+ DailymotionCloudIE,
)
from .daum import DaumIE
from .dbtv import DBTVIE
@@ -112,6 +121,10 @@ from .dfb import DFBIE
from .dhm import DHMIE
from .dotsub import DotsubIE
from .douyutv import DouyuTVIE
+from .dramafever import (
+ DramaFeverIE,
+ DramaFeverSeriesIE,
+)
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
from .drtuber import DrTuberIE
@@ -136,7 +149,6 @@ from .ellentv import (
)
from .elpais import ElPaisIE
from .embedly import EmbedlyIE
-from .empflix import EMPFlixIE
from .engadget import EngadgetIE
from .eporner import EpornerIE
from .eroprofile import EroProfileIE
@@ -152,6 +164,7 @@ from .fc2 import FC2IE
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE
+from .fivetv import FiveTVIE
from .fktv import (
FKTVIE,
FKTVPosteckeIE,
@@ -229,6 +242,7 @@ from .infoq import InfoQIE
from .instagram import InstagramIE, InstagramUserIE
from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE
+from .iqiyi import IqiyiIE
from .ivi import (
IviIE,
IviCompilationIE
@@ -251,6 +265,14 @@ from .keek import KeekIE
from .kontrtube import KontrTubeIE
from .krasview import KrasViewIE
from .ku6 import Ku6IE
+from .kuwo import (
+ KuwoIE,
+ KuwoAlbumIE,
+ KuwoChartIE,
+ KuwoSingerIE,
+ KuwoCategoryIE,
+ KuwoMvIE,
+)
from .la7 import LA7IE
from .laola1tv import Laola1TvIE
from .letv import (
@@ -314,6 +336,7 @@ from .musicvault import MusicVaultIE
from .muzu import MuzuTVIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
+from .myvi import MyviIE
from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE
from .nationalgeographic import NationalGeographicIE
@@ -333,6 +356,15 @@ from .ndtv import NDTVIE
from .netzkino import NetzkinoIE
from .nerdcubed import NerdCubedFeedIE
from .nerdist import NerdistIE
+from .neteasemusic import (
+ NetEaseMusicIE,
+ NetEaseMusicAlbumIE,
+ NetEaseMusicSingerIE,
+ NetEaseMusicListIE,
+ NetEaseMusicMvIE,
+ NetEaseMusicProgramIE,
+ NetEaseMusicDjRadioIE,
+)
from .newgrounds import NewgroundsIE
from .newstube import NewstubeIE
from .nextmedia import (
@@ -362,7 +394,8 @@ from .npo import (
NPOLiveIE,
NPORadioIE,
NPORadioFragmentIE,
- TegenlichtVproIE,
+ VPROIE,
+ WNLIE
)
from .nrk import (
NRKIE,
@@ -378,6 +411,7 @@ from .nytimes import (
from .nuvid import NuvidIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
+from .onionstudios import OnionStudiosIE
from .ooyala import (
OoyalaIE,
OoyalaExternalIE,
@@ -395,6 +429,7 @@ from .pbs import PBSIE
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
+from .pinkbike import PinkbikeIE
from .planetaplay import PlanetaPlayIE
from .pladform import PladformIE
from .played import PlayedIE
@@ -421,6 +456,7 @@ from .qqmusic import (
QQMusicSingerIE,
QQMusicAlbumIE,
QQMusicToplistIE,
+ QQMusicPlaylistIE,
)
from .quickvid import QuickVidIE
from .r7 import R7IE
@@ -430,6 +466,7 @@ from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
from .rai import RaiIE
from .rbmaradio import RBMARadioIE
+from .rds import RDSIE
from .redtube import RedTubeIE
from .restudy import RestudyIE
from .reverbnation import ReverbNationIE
@@ -453,6 +490,7 @@ from .rutube import (
RutubePersonIE,
)
from .rutv import RUTVIE
+from .ruutu import RuutuIE
from .sandia import SandiaIE
from .safari import (
SafariIE,
@@ -480,6 +518,10 @@ from .smotri import (
SmotriUserIE,
SmotriBroadcastIE,
)
+from .snagfilms import (
+ SnagFilmsIE,
+ SnagFilmsEmbedIE,
+)
from .snotr import SnotrIE
from .sohu import SohuIE
from .soompi import (
@@ -553,6 +595,7 @@ from .tf1 import TF1IE
from .theonion import TheOnionIE
from .theplatform import ThePlatformIE
from .thesixtyone import TheSixtyOneIE
+from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .tinypic import TinyPicIE
from .tlc import TlcIE, TlcDeIE
@@ -560,7 +603,11 @@ from .tmz import (
TMZIE,
TMZArticleIE,
)
-from .tnaflix import TNAFlixIE
+from .tnaflix import (
+ TNAFlixIE,
+ EMPFlixIE,
+ MovieFapIE,
+)
from .thvideo import (
THVideoIE,
THVideoPlaylistIE
@@ -582,6 +629,10 @@ from .tv2 import (
TV2ArticleIE,
)
from .tv4 import TV4IE
+from .tvc import (
+ TVCIE,
+ TVCArticleIE,
+)
from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE
from .tvplay import TVPlayIE
@@ -600,6 +651,7 @@ from .twitch import (
TwitchBookmarksIE,
TwitchStreamIE,
)
+from .twitter import TwitterCardIE
from .ubu import UbuIE
from .udemy import (
UdemyIE,
@@ -676,7 +728,10 @@ from .wdr import (
WDRMobileIE,
WDRMausIE,
)
-from .webofstories import WebOfStoriesIE
+from .webofstories import (
+ WebOfStoriesIE,
+ WebOfStoriesPlaylistIE,
+)
from .weibo import WeiboIE
from .wimp import WimpIE
from .wistia import WistiaIE
@@ -685,7 +740,10 @@ from .wrzuta import WrzutaIE
from .wsj import WSJIE
from .xbef import XBefIE
from .xboxclips import XboxClipsIE
-from .xhamster import XHamsterIE
+from .xhamster import (
+ XHamsterIE,
+ XHamsterEmbedIE,
+)
from .xminus import XMinusIE
from .xnxx import XNXXIE
from .xstream import XstreamIE
@@ -704,6 +762,7 @@ from .yandexmusic import (
YandexMusicPlaylistIE,
)
from .yesjapan import YesJapanIE
+from .yinyuetai import YinYueTaiIE
from .ynet import YnetIE
from .youjizz import YouJizzIE
from .youku import YoukuIE
diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py
index 97d1285..5e43adc 100644
--- a/youtube_dl/extractor/adobetv.py
+++ b/youtube_dl/extractor/adobetv.py
@@ -5,6 +5,8 @@ from ..utils import (
parse_duration,
unified_strdate,
str_to_int,
+ float_or_none,
+ ISO639Utils,
)
@@ -69,3 +71,61 @@ class AdobeTVIE(InfoExtractor):
'view_count': view_count,
'formats': formats,
}
+
+
+class AdobeTVVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
+
+ _TEST = {
+ # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
+ 'url': 'https://video.tv.adobe.com/v/2456/',
+ 'md5': '43662b577c018ad707a63766462b1e87',
+ 'info_dict': {
+ 'id': '2456',
+ 'ext': 'mp4',
+ 'title': 'New experience with Acrobat DC',
+ 'description': 'New experience with Acrobat DC',
+ 'duration': 248.667,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ player_params = self._parse_json(self._search_regex(
+ r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
+ video_id)
+
+ formats = [{
+ 'url': source['src'],
+ 'width': source.get('width'),
+ 'height': source.get('height'),
+ 'tbr': source.get('bitrate'),
+ } for source in player_params['sources']]
+
+ # For both metadata and downloaded files the duration varies among
+ # formats. I just pick the max one
+ duration = max(filter(None, [
+ float_or_none(source.get('duration'), scale=1000)
+ for source in player_params['sources']]))
+
+ subtitles = {}
+ for translation in player_params.get('translations', []):
+ lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
+ if lang_id not in subtitles:
+ subtitles[lang_id] = []
+ subtitles[lang_id].append({
+ 'url': translation['vttPath'],
+ 'ext': 'vtt',
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': player_params['title'],
+ 'description': self._og_search_description(webpage),
+ 'duration': duration,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/extractor/appleconnect.py b/youtube_dl/extractor/appleconnect.py
new file mode 100644
index 0000000..ea7a703
--- /dev/null
+++ b/youtube_dl/extractor/appleconnect.py
@@ -0,0 +1,50 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ str_to_int,
+ ExtractorError
+)
+
+
+class AppleConnectIE(InfoExtractor):
+ _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
+ _TEST = {
+ 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
+ 'md5': '10d0f2799111df4cb1c924520ca78f98',
+ 'info_dict': {
+ 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
+ 'ext': 'm4v',
+ 'title': 'Energy',
+ 'uploader': 'Drake',
+ 'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg',
+ 'upload_date': '20150710',
+ 'timestamp': 1436545535,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ try:
+ video_json = self._html_search_regex(
+ r'class="auc-video-data">(\{.*?\})', webpage, 'json')
+ except ExtractorError:
+ raise ExtractorError('This post doesn\'t contain a video', expected=True)
+
+ video_data = self._parse_json(video_json, video_id)
+ timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
+ like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
+
+ return {
+ 'id': video_id,
+ 'url': video_data['sslSrc'],
+ 'title': video_data['title'],
+ 'description': video_data['description'],
+ 'uploader': video_data['artistName'],
+ 'thumbnail': video_data['artworkUrl'],
+ 'timestamp': timestamp,
+ 'like_count': like_count,
+ }
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 6a35ea4..6f46578 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -8,6 +8,7 @@ from .generic import GenericIE
from ..utils import (
determine_ext,
ExtractorError,
+ get_element_by_attribute,
qualities,
int_or_none,
parse_duration,
@@ -22,19 +23,125 @@ class ARDMediathekIE(InfoExtractor):
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_TESTS = [{
- 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
- 'only_matching': True,
+ 'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
+ 'info_dict': {
+ 'id': '29582122',
+ 'ext': 'mp4',
+ 'title': 'Ich liebe das Leben trotzdem',
+ 'description': 'md5:45e4c225c72b27993314b31a84a5261c',
+ 'duration': 4557,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}, {
- 'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
+ 'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
+ 'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
'info_dict': {
- 'id': '22490580',
+ 'id': '29522730',
'ext': 'mp4',
- 'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)',
- 'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.',
+ 'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)',
+ 'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
+ 'duration': 5252,
},
- 'skip': 'Blocked outside of Germany',
+ }, {
+ # audio
+ 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
+ 'md5': '219d94d8980b4f538c7fcb0865eb7f2c',
+ 'info_dict': {
+ 'id': '28488308',
+ 'ext': 'mp3',
+ 'title': 'Tod eines Fußballers',
+ 'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
+ 'duration': 3240,
+ },
+ }, {
+ 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
+ 'only_matching': True,
}]
+ def _extract_media_info(self, media_info_url, webpage, video_id):
+ media_info = self._download_json(
+ media_info_url, video_id, 'Downloading media JSON')
+
+ formats = self._extract_formats(media_info, video_id)
+
+ if not formats:
+ if '"fsk"' in webpage:
+ raise ExtractorError(
+ 'This video is only available after 20:00', expected=True)
+ elif media_info.get('_geoblocked'):
+ raise ExtractorError('This video is not available due to geo restriction', expected=True)
+
+ self._sort_formats(formats)
+
+ duration = int_or_none(media_info.get('_duration'))
+ thumbnail = media_info.get('_previewImage')
+
+ subtitles = {}
+ subtitle_url = media_info.get('_subtitleUrl')
+ if subtitle_url:
+ subtitles['de'] = [{
+ 'ext': 'srt',
+ 'url': subtitle_url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _extract_formats(self, media_info, video_id):
+ type_ = media_info.get('_type')
+ media_array = media_info.get('_mediaArray', [])
+ formats = []
+ for num, media in enumerate(media_array):
+ for stream in media.get('_mediaStreamArray', []):
+ stream_urls = stream.get('_stream')
+ if not stream_urls:
+ continue
+ if not isinstance(stream_urls, list):
+ stream_urls = [stream_urls]
+ quality = stream.get('_quality')
+ server = stream.get('_server')
+ for stream_url in stream_urls:
+ ext = determine_ext(stream_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
+ video_id, preference=-1, f4m_id='hds'))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4', preference=1, m3u8_id='hls'))
+ else:
+ if server and server.startswith('rtmp'):
+ f = {
+ 'url': server,
+ 'play_path': stream_url,
+ 'format_id': 'a%s-rtmp-%s' % (num, quality),
+ }
+ elif stream_url.startswith('http'):
+ f = {
+ 'url': stream_url,
+ 'format_id': 'a%s-%s-%s' % (num, ext, quality)
+ }
+ else:
+ continue
+ m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
+ if m:
+ f.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ if type_ == 'audio':
+ f['vcodec'] = 'none'
+ formats.append(f)
+ return formats
+
def _real_extract(self, url):
# determine video id from url
m = re.match(self._VALID_URL, url)
@@ -92,46 +199,22 @@ class ARDMediathekIE(InfoExtractor):
'format_id': fid,
'url': furl,
})
+ self._sort_formats(formats)
+ info = {
+ 'formats': formats,
+ }
else: # request JSON file
- media_info = self._download_json(
- 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
- # The second element of the _mediaArray contains the standard http urls
- streams = media_info['_mediaArray'][1]['_mediaStreamArray']
- if not streams:
- if '"fsk"' in webpage:
- raise ExtractorError('This video is only available after 20:00')
-
- formats = []
- for s in streams:
- if type(s['_stream']) == list:
- for index, url in enumerate(s['_stream'][::-1]):
- quality = s['_quality'] + index
- formats.append({
- 'quality': quality,
- 'url': url,
- 'format_id': '%s-%s' % (determine_ext(url), quality)
- })
- continue
-
- format = {
- 'quality': s['_quality'],
- 'url': s['_stream'],
- }
-
- format['format_id'] = '%s-%s' % (
- determine_ext(format['url']), format['quality'])
+ info = self._extract_media_info(
+ 'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
- formats.append(format)
-
- self._sort_formats(formats)
-
- return {
+ info.update({
'id': video_id,
'title': title,
'description': description,
- 'formats': formats,
'thumbnail': thumbnail,
- }
+ })
+
+ return info
class ARDIE(InfoExtractor):
@@ -189,3 +272,41 @@ class ARDIE(InfoExtractor):
'upload_date': upload_date,
'thumbnail': thumbnail,
}
+
+
+class SportschauIE(ARDMediathekIE):
+ IE_NAME = 'Sportschau'
+ _VALID_URL = r'(?P<baseurl>https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P<id>[^/#?]+))\.html'
+ _TESTS = [{
+ 'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html',
+ 'info_dict': {
+ 'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100',
+ 'ext': 'mp4',
+ 'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ base_url = mobj.group('baseurl')
+
+ webpage = self._download_webpage(url, video_id)
+ title = get_element_by_attribute('class', 'headline', webpage)
+ description = self._html_search_meta('description', webpage, 'description')
+
+ info = self._extract_media_info(
+ base_url + '-mc_defaultQuality-h.json', webpage, video_id)
+
+ info.update({
+ 'title': title,
+ 'description': description,
+ })
+
+ return info
diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py
index 906895c..e37ee44 100644
--- a/youtube_dl/extractor/baidu.py
+++ b/youtube_dl/extractor/baidu.py
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
class BaiduVideoIE(InfoExtractor):
+ IE_DESC = '百度视频'
_VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
_TESTS = [{
'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py
index 249bc6b..5825d28 100644
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -130,6 +130,20 @@ class BBCCoUkIE(InfoExtractor):
},
'skip': 'geolocation',
}, {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
+ 'info_dict': {
+ 'id': 'b05zmgw1',
+ 'ext': 'flv',
+ 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
+ 'title': 'Royal Academy Summer Exhibition',
+ 'duration': 3540,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'geolocation',
+ }, {
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
'only_matching': True,
}, {
@@ -237,26 +251,11 @@ class BBCCoUkIE(InfoExtractor):
for connection in self._extract_connections(media):
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
- ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
- srt = ''
-
- def _extract_text(p):
- if p.text is not None:
- stripped_text = p.text.strip()
- if stripped_text:
- return stripped_text
- return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
- for pos, p in enumerate(ps):
- srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
subtitles[lang] = [
{
'url': connection.get('href'),
'ext': 'ttml',
},
- {
- 'data': srt,
- 'ext': 'srt',
- },
]
return subtitles
@@ -267,7 +266,7 @@ class BBCCoUkIE(InfoExtractor):
programme_id, 'Downloading media selection XML')
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
- media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
+ media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
else:
raise
@@ -362,7 +361,7 @@ class BBCCoUkIE(InfoExtractor):
formats, subtitles = self._download_media_selector(programme_id)
title = self._og_search_title(webpage)
description = self._search_regex(
- r'<p class="medium-description">([^<]+)</p>',
+ r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
webpage, 'description', fatal=False)
else:
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py
index 26b9345..03dad46 100644
--- a/youtube_dl/extractor/bet.py
+++ b/youtube_dl/extractor/bet.py
@@ -1,7 +1,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
xpath_text,
xpath_with_ns,
@@ -57,7 +57,7 @@ class BetIE(InfoExtractor):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- media_url = compat_urllib_parse.unquote(self._search_regex(
+ media_url = compat_urllib_parse_unquote(self._search_regex(
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
webpage, 'media URL'))
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
index 2103ed7..ecc17eb 100644
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -41,8 +41,15 @@ class BiliBiliIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- if self._search_regex(r'(此视频不存在或被删除)', webpage, 'error message', default=None):
- raise ExtractorError('The video does not exist or was deleted', expected=True)
+ if '(此视频不存在或被删除)' in webpage:
+ raise ExtractorError(
+ 'The video does not exist or was deleted', expected=True)
+
+ if '>你没有权限浏览! 由于版权相关问题 我们不对您所在的地区提供服务<' in webpage:
+ raise ExtractorError(
+ 'The video is not available in your region due to copyright reasons',
+ expected=True)
+
video_code = self._search_regex(
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
@@ -105,7 +112,7 @@ class BiliBiliIE(InfoExtractor):
'filesize': int_or_none(
lq_durl.find('./size'), get_attr='text'),
}]
- if hq_durl:
+ if hq_durl is not None:
formats.append({
'format_id': 'hq',
'quality': 2,
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
index fb56cd7..c329628 100644
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_str,
compat_urllib_request,
compat_urlparse,
)
@@ -14,6 +13,8 @@ from ..utils import (
int_or_none,
parse_iso8601,
unescapeHTML,
+ xpath_text,
+ xpath_with_ns,
)
@@ -23,10 +24,10 @@ class BlipTVIE(InfoExtractor):
_TESTS = [
{
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
- 'md5': 'c6934ad0b6acf2bd920720ec888eb812',
+ 'md5': '80baf1ec5c3d2019037c1c707d676b9f',
'info_dict': {
'id': '5779306',
- 'ext': 'mov',
+ 'ext': 'm4v',
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
'timestamp': 1323138843,
@@ -100,6 +101,20 @@ class BlipTVIE(InfoExtractor):
'vcodec': 'none',
}
},
+ {
+ # missing duration
+ 'url': 'http://blip.tv/rss/flash/6700880',
+ 'info_dict': {
+ 'id': '6684191',
+ 'ext': 'm4v',
+ 'title': 'Cowboy Bebop: Gateway Shuffle Review',
+ 'description': 'md5:3acc480c0f9ae157f5fe88547ecaf3f8',
+ 'timestamp': 1386639757,
+ 'upload_date': '20131210',
+ 'uploader': 'sfdebris',
+ 'uploader_id': '706520',
+ }
+ }
]
@staticmethod
@@ -128,35 +143,34 @@ class BlipTVIE(InfoExtractor):
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
- def blip(s):
- return '{http://blip.tv/dtd/blip/1.0}%s' % s
-
- def media(s):
- return '{http://search.yahoo.com/mrss/}%s' % s
-
- def itunes(s):
- return '{http://www.itunes.com/dtds/podcast-1.0.dtd}%s' % s
+ def _x(p):
+ return xpath_with_ns(p, {
+ 'blip': 'http://blip.tv/dtd/blip/1.0',
+ 'media': 'http://search.yahoo.com/mrss/',
+ 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
+ })
item = rss.find('channel/item')
- video_id = item.find(blip('item_id')).text
- title = item.find('./title').text
- description = clean_html(compat_str(item.find(blip('puredescription')).text))
- timestamp = parse_iso8601(item.find(blip('datestamp')).text)
- uploader = item.find(blip('user')).text
- uploader_id = item.find(blip('userid')).text
- duration = int(item.find(blip('runtime')).text)
- media_thumbnail = item.find(media('thumbnail'))
- thumbnail = media_thumbnail.get('url') if media_thumbnail is not None else item.find(itunes('image')).text
- categories = [category.text for category in item.findall('category')]
+ video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id
+ title = xpath_text(item, 'title', 'title', fatal=True)
+ description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description'))
+ timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp'))
+ uploader = xpath_text(item, _x('blip:user'), 'uploader')
+ uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id')
+ duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration'))
+ media_thumbnail = item.find(_x('media:thumbnail'))
+ thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None
+ else xpath_text(item, 'image', 'thumbnail'))
+ categories = [category.text for category in item.findall('category') if category is not None]
formats = []
subtitles_urls = {}
- media_group = item.find(media('group'))
- for media_content in media_group.findall(media('content')):
+ media_group = item.find(_x('media:group'))
+ for media_content in media_group.findall(_x('media:content')):
url = media_content.get('url')
- role = media_content.get(blip('role'))
+ role = media_content.get(_x('blip:role'))
msg = self._download_webpage(
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
video_id, 'Resolving URL for %s' % role)
@@ -175,8 +189,8 @@ class BlipTVIE(InfoExtractor):
'url': real_url,
'format_id': role,
'format_note': media_type,
- 'vcodec': media_content.get(blip('vcodec')) or 'none',
- 'acodec': media_content.get(blip('acodec')),
+ 'vcodec': media_content.get(_x('blip:vcodec')) or 'none',
+ 'acodec': media_content.get(_x('blip:acodec')),
'filesize': media_content.get('filesize'),
'width': int_or_none(media_content.get('width')),
'height': int_or_none(media_content.get('height')),
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 4f60d53..4721c22 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -13,6 +13,7 @@ from ..compat import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
+ compat_xml_parse_error,
)
from ..utils import (
determine_ext,
@@ -119,7 +120,7 @@ class BrightcoveIE(InfoExtractor):
try:
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
- except xml.etree.ElementTree.ParseError:
+ except compat_xml_parse_error:
return
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
@@ -156,6 +157,28 @@ class BrightcoveIE(InfoExtractor):
linkBase = find_param('linkBaseURL')
if linkBase is not None:
params['linkBaseURL'] = linkBase
+ return cls._make_brightcove_url(params)
+
+ @classmethod
+ def _build_brighcove_url_from_js(cls, object_js):
+ # The layout of JS is as follows:
+ # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
+ # // build Brightcove <object /> XML
+ # }
+ m = re.search(
+ r'''(?x)customBC.\createVideo\(
+ .*? # skipping width and height
+ ["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
+ ["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
+ # in length, however it's appended to itself
+ # in places, so truncate
+ ["\'](?P<videoID>\d+)["\'] # @videoPlayer
+ ''', object_js)
+ if m:
+ return cls._make_brightcove_url(m.groupdict())
+
+ @classmethod
+ def _make_brightcove_url(cls, params):
data = compat_urllib_parse.urlencode(params)
return cls._FEDERATED_URL_TEMPLATE % data
@@ -172,7 +195,7 @@ class BrightcoveIE(InfoExtractor):
"""Return a list of all Brightcove URLs from the webpage """
url_m = re.search(
- r'<meta\s+property="og:video"\s+content="(https?://(?:secure|c)\.brightcove.com/[^"]+)"',
+ r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]',
webpage)
if url_m:
url = unescapeHTML(url_m.group(1))
@@ -188,7 +211,12 @@ class BrightcoveIE(InfoExtractor):
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?>\s*</object>''',
webpage)
- return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
+ if matches:
+ return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
+
+ return list(filter(None, [
+ cls._build_brighcove_url_from_js(custom_bc)
+ for custom_bc in re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)]))
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py
index 1ceb9d8..75fffb1 100644
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@@ -4,12 +4,13 @@ from .common import InfoExtractor
class CBSIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)/(?P<id>[^/]+)/.*'
+ _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
'info_dict': {
'id': '4JUVEwq3wUT7',
+ 'display_id': 'connect-chat-feat-garth-brooks',
'ext': 'flv',
'title': 'Connect Chat feat. Garth Brooks',
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
@@ -24,6 +25,7 @@ class CBSIE(InfoExtractor):
'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
'info_dict': {
'id': 'WWF_5KqY3PK1',
+ 'display_id': 'st-vincent',
'ext': 'flv',
'title': 'Live on Letterman - St. Vincent',
'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
@@ -34,12 +36,23 @@ class CBSIE(InfoExtractor):
'skip_download': True,
},
'_skip': 'Blocked outside the US',
+ }, {
+ 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
real_id = self._search_regex(
- r"video\.settings\.pid\s*=\s*'([^']+)';",
+ [r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
webpage, 'real video ID')
- return self.url_result('theplatform:%s' % real_id)
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': 'theplatform:%s' % real_id,
+ 'display_id': display_id,
+ }
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
index 65f6be6..dda5836 100644
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
)
from ..utils import (
@@ -88,7 +89,7 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
- req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
+ req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url)
playlist = self._download_json(req, video_id)
diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dl/extractor/clipsyndicate.py
index d07d544..8306d6f 100644
--- a/youtube_dl/extractor/clipsyndicate.py
+++ b/youtube_dl/extractor/clipsyndicate.py
@@ -1,7 +1,5 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
@@ -10,9 +8,9 @@ from ..utils import (
class ClipsyndicateIE(InfoExtractor):
- _VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
+ _VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
'md5': '4d7d549451bad625e0ff3d7bd56d776c',
'info_dict': {
@@ -22,11 +20,13 @@ class ClipsyndicateIE(InfoExtractor):
'duration': 612,
'thumbnail': 're:^https?://.+\.jpg',
},
- }
+ }, {
+ 'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
js_player = self._download_webpage(
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
video_id, 'Downlaoding player')
diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py
index 3145b30..5dd69bf 100644
--- a/youtube_dl/extractor/cnet.py
+++ b/youtube_dl/extractor/cnet.py
@@ -11,7 +11,7 @@ from ..utils import (
class CNETIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
'info_dict': {
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
@@ -25,7 +25,20 @@ class CNETIE(InfoExtractor):
'params': {
'skip_download': 'requires rtmpdump',
}
- }
+ }, {
+ 'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
+ 'info_dict': {
+ 'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
+ 'ext': 'flv',
+ 'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
+ 'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
+ 'uploader': 'Ashley Esqueda',
+ 'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
+ },
+ 'params': {
+ 'skip_download': True, # requires rtmpdump
+ },
+ }]
def _real_extract(self, url):
display_id = self._match_id(url)
@@ -42,7 +55,7 @@ class CNETIE(InfoExtractor):
raise ExtractorError('Cannot find video data')
mpx_account = data['config']['players']['default']['mpx_account']
- vid = vdata['files']['rtmp']
+ vid = vdata['files'].get('rtmp', vdata['files']['hds'])
tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
video_id = vdata['id']
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index cecf917..b9014fc 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -22,18 +22,20 @@ from ..compat import (
compat_str,
)
from ..utils import (
+ NO_DEFAULT,
age_restricted,
bug_reports_message,
clean_html,
compiled_regex_type,
+ determine_ext,
ExtractorError,
+ fix_xml_ampersands,
float_or_none,
int_or_none,
RegexNotFoundError,
sanitize_filename,
unescapeHTML,
)
-_NO_DEFAULT = object()
class InfoExtractor(object):
@@ -523,7 +525,7 @@ class InfoExtractor(object):
video_info['description'] = playlist_description
return video_info
- def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
+ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
"""
Perform a regex search on the given string, using a single or a list of
patterns returning the first matching group.
@@ -549,7 +551,7 @@ class InfoExtractor(object):
return next(g for g in mobj.groups() if g is not None)
else:
return mobj.group(group)
- elif default is not _NO_DEFAULT:
+ elif default is not NO_DEFAULT:
return default
elif fatal:
raise RegexNotFoundError('Unable to extract %s' % _name)
@@ -557,7 +559,7 @@ class InfoExtractor(object):
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
return None
- def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
+ def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
"""
Like _search_regex, but strips HTML tags and unescapes entities.
"""
@@ -705,6 +707,25 @@ class InfoExtractor(object):
return self._html_search_meta('twitter:player', html,
'twitter card player')
+ @staticmethod
+ def _hidden_inputs(html):
+ return dict([
+ (input.group('name'), input.group('value')) for input in re.finditer(
+ r'''(?x)
+ <input\s+
+ type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
+ name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
+ (?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
+ value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
+ ''', html)
+ ])
+
+ def _form_hidden_inputs(self, form_id, html):
+ form = self._search_regex(
+ r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
+ html, '%s form' % form_id, group='form')
+ return self._hidden_inputs(form)
+
def _sort_formats(self, formats, field_preference=None):
if not formats:
raise ExtractorError('No video formats found')
@@ -815,10 +836,14 @@ class InfoExtractor(object):
self.to_screen(msg)
time.sleep(timeout)
- def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
+ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
+ transform_source=lambda s: fix_xml_ampersands(s).strip()):
manifest = self._download_xml(
manifest_url, video_id, 'Downloading f4m manifest',
- 'Unable to download f4m manifest')
+ 'Unable to download f4m manifest',
+ # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+ # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
+ transform_source=transform_source)
formats = []
manifest_version = '1.0'
@@ -828,8 +853,19 @@ class InfoExtractor(object):
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0':
- manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
- (media_el.attrib.get('href') or media_el.attrib.get('url')))
+ media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
+ if not media_url:
+ continue
+ manifest_url = (
+ media_url if media_url.startswith('http://') or media_url.startswith('https://')
+ else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url))
+ # If media_url is itself a f4m manifest do the recursive extraction
+ # since bitrates in parent manifest (this one) and media_url manifest
+ # may differ leading to inability to resolve the format by requested
+ # bitrate in f4m downloader
+ if determine_ext(manifest_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(manifest_url, video_id, preference, f4m_id))
+ continue
tbr = int_or_none(media_el.attrib.get('bitrate'))
formats.append({
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
@@ -846,7 +882,8 @@ class InfoExtractor(object):
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
entry_protocol='m3u8', preference=None,
- m3u8_id=None):
+ m3u8_id=None, note=None, errnote=None,
+ fatal=True):
formats = [{
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
@@ -865,8 +902,11 @@ class InfoExtractor(object):
m3u8_doc = self._download_webpage(
m3u8_url, video_id,
- note='Downloading m3u8 information',
- errnote='Failed to download m3u8 information')
+ note=note or 'Downloading m3u8 information',
+ errnote=errnote or 'Failed to download m3u8 information',
+ fatal=fatal)
+ if m3u8_doc is False:
+ return m3u8_doc
last_info = None
last_media = None
kv_rex = re.compile(
@@ -956,7 +996,7 @@ class InfoExtractor(object):
def _parse_smil_video(self, video, video_id, base, rtmp_count):
src = video.get('src')
if not src:
- return ([], rtmp_count)
+ return [], rtmp_count
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
@@ -969,7 +1009,7 @@ class InfoExtractor(object):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
- return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
+ return self._extract_m3u8_formats(src, video_id, ext), rtmp_count
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 41f0c73..d1b6d73 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -12,6 +12,7 @@ from math import pow, sqrt, floor
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
@@ -27,7 +28,7 @@ from ..aes import (
class CrunchyrollIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_NETRC_MACHINE = 'crunchyroll'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@@ -46,6 +47,22 @@ class CrunchyrollIE(InfoExtractor):
'skip_download': True,
},
}, {
+ 'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
+ 'info_dict': {
+ 'id': '589804',
+ 'ext': 'flv',
+ 'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
+ 'description': 'md5:fe2743efedb49d279552926d0bd0cd9e',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'Danny Choo Network',
+ 'upload_date': '20120213',
+ },
+ 'params': {
+ # rtmp
+ 'skip_download': True,
+ },
+
+ }, {
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
'only_matching': True,
}]
@@ -238,7 +255,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
video_upload_date = unified_strdate(video_upload_date)
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
- playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
+ playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
@@ -251,16 +268,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p'
- streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
- # urlencode doesn't work!
- streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format
+ streamdata_req = compat_urllib_request.Request(
+ 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
+ % (stream_id, stream_format, stream_quality),
+ compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
- streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
streamdata = self._download_xml(
streamdata_req, video_id,
note='Downloading media info for %s' % video_format)
- video_url = streamdata.find('./host').text
- video_play_path = streamdata.find('./file').text
+ stream_info = streamdata.find('./{default}preload/stream_info')
+ video_url = stream_info.find('./host').text
+ video_play_path = stream_info.find('./file').text
formats.append({
'url': video_url,
'play_path': video_play_path,
diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py
index 0226f80..45049bf 100644
--- a/youtube_dl/extractor/ctsnews.py
+++ b/youtube_dl/extractor/ctsnews.py
@@ -6,6 +6,7 @@ from ..utils import parse_iso8601, ExtractorError
class CtsNewsIE(InfoExtractor):
+ IE_DESC = '華視新聞'
# https connection failed (Connection reset)
_VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
_TESTS = [{
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 70aa433..1a41c0d 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -53,6 +53,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'uploader': 'IGN',
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
'upload_date': '20150306',
+ 'duration': 74,
}
},
# Vevo video
@@ -164,6 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'thumbnail': info['thumbnail_url'],
'age_limit': age_limit,
'view_count': view_count,
+ 'duration': info['duration']
}
def _get_subtitles(self, video_id, webpage):
@@ -251,3 +253,53 @@ class DailymotionUserIE(DailymotionPlaylistIE):
'title': full_user,
'entries': self._extract_entries(user),
}
+
+
+class DailymotionCloudIE(DailymotionBaseInfoExtractor):
+ _VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/'
+ _VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX
+ _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX
+
+ _TESTS = [{
+ # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html
+ # Tested at FranceTvInfo_2
+ 'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1',
+ 'only_matching': True,
+ }, {
+ # http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html
+ 'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def _extract_dmcloud_url(self, webpage):
+ mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % self._VALID_EMBED_URL, webpage)
+ if mobj:
+ return mobj.group(1)
+
+ mobj = re.search(
+ r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % self._VALID_EMBED_URL,
+ webpage)
+ if mobj:
+ return mobj.group(1)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ request = self._build_request(url)
+ webpage = self._download_webpage(request, video_id)
+
+ title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')
+
+ video_info = self._parse_json(self._search_regex(
+ r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id)
+
+ # TODO: parse ios_url, which is in fact a manifest
+ video_url = video_info['mp4_url']
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': video_info.get('thumbnail_url'),
+ }
diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py
index 8049779..263532c 100644
--- a/youtube_dl/extractor/dfb.py
+++ b/youtube_dl/extractor/dfb.py
@@ -3,42 +3,47 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..utils import unified_strdate
class DFBIE(InfoExtractor):
IE_NAME = 'tv.dfb.de'
- _VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)'
+ _VALID_URL = r'https?://tv\.dfb\.de/video/(?P<display_id>[^/]+)/(?P<id>\d+)'
_TEST = {
- 'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/',
+ 'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
# The md5 is different each time
'info_dict': {
- 'id': '9070',
+ 'id': '11633',
+ 'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
'ext': 'flv',
- 'title': 'Highlights des Empfangs in Berlin',
- 'upload_date': '20140716',
+ 'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
+ 'upload_date': '20150714',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(url, display_id)
player_info = self._download_xml(
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
- video_id)
+ display_id)
video_info = player_info.find('video')
- f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
+ f4m_info = self._download_xml(
+ self._proto_relative_url(video_info.find('url').text.strip()), display_id)
token_el = f4m_info.find('token')
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
+ formats = self._extract_f4m_formats(manifest_url, display_id)
return {
'id': video_id,
+ 'display_id': display_id,
'title': video_info.find('title').text,
- 'url': manifest_url,
- 'ext': 'flv',
'thumbnail': self._og_search_thumbnail(webpage),
- 'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]),
+ 'upload_date': unified_strdate(video_info.find('time_date').text),
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py
index d3e6675..d6723ec 100644
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -2,19 +2,19 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
+ parse_duration,
parse_iso8601,
- int_or_none,
)
+from ..compat import compat_str
class DiscoveryIE(InfoExtractor):
_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
- 'md5': '3c69d77d9b0d82bfd5e5932a60f26504',
'info_dict': {
- 'id': 'mission-impossible-outtakes',
- 'ext': 'flv',
+ 'id': '20769',
+ 'ext': 'mp4',
'title': 'Mission Impossible Outtakes',
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
' each other -- to the point of confusing Jamie\'s dog -- and '
@@ -24,22 +24,36 @@ class DiscoveryIE(InfoExtractor):
'timestamp': 1303099200,
'upload_date': '20110418',
},
- }
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ }
+ }, {
+ 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons',
+ 'info_dict': {
+ 'id': 'mythbusters-the-simpsons',
+ 'title': 'MythBusters: The Simpsons',
+ },
+ 'playlist_count': 9,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ info = self._download_json(url + '?flat=1', video_id)
- info = self._parse_json(self._search_regex(
- r'(?s)<script type="application/ld\+json">(.*?)</script>',
- webpage, 'video info'), video_id)
+ video_title = info.get('playlist_title') or info.get('video_title')
- return {
- 'id': video_id,
- 'title': info['name'],
- 'url': info['contentURL'],
- 'description': info.get('description'),
- 'thumbnail': info.get('thumbnailUrl'),
- 'timestamp': parse_iso8601(info.get('uploadDate')),
- 'duration': int_or_none(info.get('duration')),
- }
+ entries = [{
+ 'id': compat_str(video_info['id']),
+ 'formats': self._extract_m3u8_formats(
+ video_info['src'], video_id, ext='mp4',
+ note='Download m3u8 information for video %d' % (idx + 1)),
+ 'title': video_info['title'],
+ 'description': video_info.get('description'),
+ 'duration': parse_duration(video_info.get('video_length')),
+ 'webpage_url': video_info.get('href'),
+ 'thumbnail': video_info.get('thumbnailURL'),
+ 'alt_title': video_info.get('secondary_title'),
+ 'timestamp': parse_iso8601(video_info.get('publishedDate')),
+ } for idx, video_info in enumerate(info['playlist'])]
+
+ return self.playlist_result(entries, video_id, video_title)
diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py
index 479430c..373b3b4 100644
--- a/youtube_dl/extractor/douyutv.py
+++ b/youtube_dl/extractor/douyutv.py
@@ -9,6 +9,7 @@ from ..compat import (compat_str, compat_basestring)
class DouyuTVIE(InfoExtractor):
+ IE_DESC = '斗鱼'
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
_TESTS = [{
'url': 'http://www.douyutv.com/iseven',
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
new file mode 100644
index 0000000..38e6597
--- /dev/null
+++ b/youtube_dl/extractor/dramafever.py
@@ -0,0 +1,216 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_urllib_parse,
+ compat_urllib_request,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class DramaFeverBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
+ _NETRC_MACHINE = 'dramafever'
+
+ _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
+
+ _consumer_secret = None
+
+ def _get_consumer_secret(self):
+ mainjs = self._download_webpage(
+ 'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
+ None, 'Downloading main.js', fatal=False)
+ if not mainjs:
+ return self._CONSUMER_SECRET
+ return self._search_regex(
+ r"var\s+cs\s*=\s*'([^']+)'", mainjs,
+ 'consumer secret', default=self._CONSUMER_SECRET)
+
+ def _real_initialize(self):
+ self._login()
+ self._consumer_secret = self._get_consumer_secret()
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ login_form = {
+ 'username': username,
+ 'password': password,
+ }
+
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+ response = self._download_webpage(
+ request, None, 'Logging in as %s' % username)
+
+ if all(logout_pattern not in response
+ for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
+ error = self._html_search_regex(
+ r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class DramaFeverIE(DramaFeverBaseIE):
+ IE_NAME = 'dramafever'
+ _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
+ _TEST = {
+ 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
+ 'info_dict': {
+ 'id': '4512.1',
+ 'ext': 'flv',
+ 'title': 'Cooking with Shin 4512.1',
+ 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'timestamp': 1404336058,
+ 'upload_date': '20140702',
+ 'duration': 343,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).replace('/', '.')
+
+ try:
+ feed = self._download_json(
+ 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
+ video_id, 'Downloading episode JSON')['channel']['item']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ raise ExtractorError(
+ 'Currently unavailable in your country.', expected=True)
+ raise
+
+ media_group = feed.get('media-group', {})
+
+ formats = []
+ for media_content in media_group['media-content']:
+ src = media_content.get('@attributes', {}).get('url')
+ if not src:
+ continue
+ ext = determine_ext(src)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ src, video_id, f4m_id='hds'))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', m3u8_id='hls'))
+ else:
+ formats.append({
+ 'url': src,
+ })
+ self._sort_formats(formats)
+
+ title = media_group.get('media-title')
+ description = media_group.get('media-description')
+ duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
+ thumbnail = self._proto_relative_url(
+ media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
+ timestamp = parse_iso8601(feed.get('pubDate'), ' ')
+
+ subtitles = {}
+ for media_subtitle in media_group.get('media-subTitle', []):
+ lang = media_subtitle.get('@attributes', {}).get('lang')
+ href = media_subtitle.get('@attributes', {}).get('href')
+ if not lang or not href:
+ continue
+ subtitles[lang] = [{
+ 'ext': 'ttml',
+ 'url': href,
+ }]
+
+ series_id, episode_number = video_id.split('.')
+ episode_info = self._download_json(
+ # We only need a single episode info, so restricting page size to one episode
+ # and dealing with page number as with episode number
+ r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
+ % (self._consumer_secret, series_id, episode_number),
+ video_id, 'Downloading episode info JSON', fatal=False)
+ if episode_info:
+ value = episode_info.get('value')
+ if value:
+ subfile = value[0].get('subfile') or value[0].get('new_subfile')
+ if subfile and subfile != 'http://www.dramafever.com/st/':
+ subtitles.setdefault('English', []).append({
+ 'ext': 'srt',
+ 'url': subfile,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class DramaFeverSeriesIE(DramaFeverBaseIE):
+ IE_NAME = 'dramafever:series'
+ _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
+ _TESTS = [{
+ 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
+ 'info_dict': {
+ 'id': '4512',
+ 'title': 'Cooking with Shin',
+ 'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
+ },
+ 'playlist_count': 4,
+ }, {
+ 'url': 'http://www.dramafever.com/drama/124/IRIS/',
+ 'info_dict': {
+ 'id': '124',
+ 'title': 'IRIS',
+ 'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
+ },
+ 'playlist_count': 20,
+ }]
+
+ _PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+
+ series = self._download_json(
+ 'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
+ % (self._consumer_secret, series_id),
+ series_id, 'Downloading series JSON')['series'][series_id]
+
+ title = clean_html(series['name'])
+ description = clean_html(series.get('description') or series.get('description_short'))
+
+ entries = []
+ for page_num in itertools.count(1):
+ episodes = self._download_json(
+ 'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
+ % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
+ series_id, 'Downloading episodes JSON page #%d' % page_num)
+ for episode in episodes.get('value', []):
+ episode_url = episode.get('episode_url')
+ if not episode_url:
+ continue
+ entries.append(self.url_result(
+ compat_urlparse.urljoin(url, episode_url),
+ 'DramaFever', episode.get('guid')))
+ if page_num == episodes['num_pages']:
+ break
+
+ return self.playlist_result(entries, series_id, title, description)
diff --git a/youtube_dl/extractor/drbonanza.py b/youtube_dl/extractor/drbonanza.py
index 7626219..8b98b01 100644
--- a/youtube_dl/extractor/drbonanza.py
+++ b/youtube_dl/extractor/drbonanza.py
@@ -15,7 +15,6 @@ class DRBonanzaIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
- 'md5': 'fe330252ddea607635cf2eb2c99a0af3',
'info_dict': {
'id': '65517',
'ext': 'mp4',
@@ -26,6 +25,9 @@ class DRBonanzaIE(InfoExtractor):
'upload_date': '20110120',
'duration': 3664,
},
+ 'params': {
+ 'skip_download': True, # requires rtmp
+ },
}, {
'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
'md5': '6dfe039417e76795fb783c52da3de11d',
@@ -93,6 +95,11 @@ class DRBonanzaIE(InfoExtractor):
'format_id': file['Type'].replace('Video', ''),
'preference': preferencemap.get(file['Type'], -10),
})
+ if format['url'].startswith('rtmp'):
+ rtmp_url = format['url']
+ format['rtmp_live'] = True # --resume does not work
+ if '/bonanza/' in rtmp_url:
+ format['play_path'] = rtmp_url.split('/bonanza/')[1]
formats.append(format)
elif file['Type'] == "Thumb":
thumbnail = file['Location']
@@ -111,9 +118,6 @@ class DRBonanzaIE(InfoExtractor):
description = '%s\n%s\n%s\n' % (
info['Description'], info['Actors'], info['Colophon'])
- for f in formats:
- f['url'] = f['url'].replace('rtmp://vod-bonanza.gss.dr.dk/bonanza/', 'http://vodfiles.dr.dk/')
- f['url'] = f['url'].replace('mp4:bonanza', 'bonanza')
self._sort_formats(formats)
display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py
index 37c5c18..639f918 100644
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dl/extractor/drtuber.py
@@ -36,25 +36,24 @@ class DrTuberIE(InfoExtractor):
r'<source src="([^"]+)"', webpage, 'video URL')
title = self._html_search_regex(
- [r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
+ [r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'],
webpage, 'title')
thumbnail = self._html_search_regex(
r'poster="([^"]+)"',
webpage, 'thumbnail', fatal=False)
- like_count = str_to_int(self._html_search_regex(
- r'<span id="rate_likes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
- webpage, 'like count', fatal=False))
- dislike_count = str_to_int(self._html_search_regex(
- r'<span id="rate_dislikes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
- webpage, 'like count', fatal=False))
- comment_count = str_to_int(self._html_search_regex(
- r'<span class="comments_count">([\d,\.]+)</span>',
- webpage, 'comment count', fatal=False))
+ def extract_count(id_, name):
+ return str_to_int(self._html_search_regex(
+ r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_,
+ webpage, '%s count' % name, fatal=False))
+
+ like_count = extract_count('rate_likes', 'like')
+ dislike_count = extract_count('rate_dislikes', 'dislike')
+ comment_count = extract_count('comments_count', 'comment')
cats_str = self._search_regex(
- r'<span>Categories:</span><div>(.+?)</div>', webpage, 'categories', fatal=False)
+ r'<div[^>]+class="categories_list">(.+?)</div>', webpage, 'categories', fatal=False)
categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str)
return {
diff --git a/youtube_dl/extractor/ehow.py b/youtube_dl/extractor/ehow.py
index 9cb1bf3..b1cd4f5 100644
--- a/youtube_dl/extractor/ehow.py
+++ b/youtube_dl/extractor/ehow.py
@@ -1,9 +1,7 @@
from __future__ import unicode_literals
-from ..compat import (
- compat_urllib_parse,
-)
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
class EHowIE(InfoExtractor):
@@ -26,7 +24,7 @@ class EHowIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
- final_url = compat_urllib_parse.unquote(video_url)
+ final_url = compat_urllib_parse_unquote(video_url)
uploader = self._html_search_meta('uploader', webpage)
title = self._og_search_title(webpage).replace(' | eHow', '')
diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py
deleted file mode 100644
index 4827022..0000000
--- a/youtube_dl/extractor/empflix.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from __future__ import unicode_literals
-
-from .tnaflix import TNAFlixIE
-
-
-class EMPFlixIE(TNAFlixIE):
- _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
-
- _TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
- _DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
- _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
-
- _TESTS = [
- {
- 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
- 'md5': 'b1bc15b6412d33902d6e5952035fcabc',
- 'info_dict': {
- 'id': '33051',
- 'display_id': 'Amateur-Finger-Fuck',
- 'ext': 'mp4',
- 'title': 'Amateur Finger Fuck',
- 'description': 'Amateur solo finger fucking.',
- 'thumbnail': 're:https?://.*\.jpg$',
- 'age_limit': 18,
- }
- },
- {
- 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
- 'only_matching': True,
- }
- ]
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 82dc27b..e17bb9a 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -9,7 +9,7 @@ from ..compat import (
compat_http_client,
compat_str,
compat_urllib_error,
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
@@ -136,7 +136,7 @@ class FacebookIE(InfoExtractor):
else:
raise ExtractorError('Cannot parse data')
data = dict(json.loads(m.group(1)))
- params_raw = compat_urllib_parse.unquote(data['params'])
+ params_raw = compat_urllib_parse_unquote(data['params'])
params = json.loads(params_raw)
video_data = params['video_data'][0]
diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py
index 3c39ca4..cebdd01 100644
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -6,9 +6,9 @@ from .common import InfoExtractor
class FazIE(InfoExtractor):
IE_NAME = 'faz.net'
- _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
'info_dict': {
'id': '12610585',
@@ -16,7 +16,22 @@ class FazIE(InfoExtractor):
'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
'description': 'md5:1453fbf9a0d041d985a47306192ea253',
},
- }
+ }, {
+ 'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.faz.net/-13659345.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.faz.net/aktuell/politik/-13659345.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.faz.net/foobarblafasel-13659345.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py
new file mode 100644
index 0000000..13fbc4d
--- /dev/null
+++ b/youtube_dl/extractor/fivetv.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class FiveTVIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ http://
+ (?:www\.)?5-tv\.ru/
+ (?:
+ (?:[^/]+/)+(?P<id>\d+)|
+ (?P<path>[^/?#]+)(?:[/?#])?
+ )
+ '''
+
+ _TESTS = [{
+ 'url': 'http://5-tv.ru/news/96814/',
+ 'md5': 'bbff554ad415ecf5416a2f48c22d9283',
+ 'info_dict': {
+ 'id': '96814',
+ 'ext': 'mp4',
+ 'title': 'Россияне выбрали имя для общенациональной платежной системы',
+ 'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 180,
+ },
+ }, {
+ 'url': 'http://5-tv.ru/video/1021729/',
+ 'info_dict': {
+ 'id': '1021729',
+ 'ext': 'mp4',
+ 'title': '3D принтер',
+ 'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 180,
+ },
+ }, {
+ 'url': 'http://www.5-tv.ru/glavnoe/#itemDetails',
+ 'info_dict': {
+ 'id': 'glavnoe',
+ 'ext': 'mp4',
+ 'title': 'Итоги недели с 8 по 14 июня 2015 года',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://5-tv.ru/films/1507502/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://5-tv.ru/programs/broadcast/508713/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://5-tv.ru/angel/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.5-tv.ru/schedule/?iframe=true&width=900&height=450',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('path')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"',
+ webpage, 'video url')
+
+ title = self._og_search_title(webpage, default=None) or self._search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title')
+ duration = int_or_none(self._og_search_property(
+ 'video:duration', webpage, 'duration', default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'duration': duration,
+ }
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index edf555b..75723c0 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -6,18 +6,15 @@ import re
import json
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlparse,
- compat_urlparse,
-)
+from ..compat import compat_urlparse
from ..utils import (
clean_html,
ExtractorError,
int_or_none,
- float_or_none,
parse_duration,
determine_ext,
)
+from .dailymotion import DailymotionCloudIE
class FranceTVBaseInfoExtractor(InfoExtractor):
@@ -58,12 +55,12 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
# See https://github.com/rg3/youtube-dl/issues/3963
# m3u8 urls work fine
continue
- video_url_parsed = compat_urllib_parse_urlparse(video_url)
f4m_url = self._download_webpage(
- 'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
+ 'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
video_id, 'Downloading f4m manifest token', fatal=False)
if f4m_url:
- formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
+ formats.extend(self._extract_f4m_formats(
+ f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
elif video_url.startswith('rtmp'):
@@ -86,7 +83,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
'title': info['titre'],
'description': clean_html(info['synopsis']),
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
- 'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
+ 'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
'timestamp': int_or_none(info['diffusion']['timestamp']),
'formats': formats,
}
@@ -131,12 +128,26 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
'skip_download': 'HLS (reqires ffmpeg)'
},
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
+ }, {
+ 'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
+ 'md5': 'f485bda6e185e7d15dbc69b72bae993e',
+ 'info_dict': {
+ 'id': '556e03339473995ee145930c',
+ 'ext': 'mp4',
+ 'title': 'Les entreprises familiales : le secret de la réussite',
+ 'thumbnail': 're:^https?://.*\.jpe?g$',
+ }
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
+
+ dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
+ if dmcloud_url:
+ return self.url_result(dmcloud_url, 'DailymotionCloud')
+
video_id, catalogue = self._search_regex(
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
return self._extract_video(video_id, catalogue)
@@ -145,11 +156,21 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
class FranceTVIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetv'
IE_DESC = 'France 2, 3, 4, 5 and Ô'
- _VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
- (?:
- emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
- | (emissions?|jt)/(?P<key>[^/?]+)
- )'''
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?france[2345o]\.fr/
+ (?:
+ emissions/[^/]+/(?:videos|diffusions)|
+ emission/[^/]+|
+ videos|
+ jt
+ )
+ /|
+ embed\.francetv\.fr/\?ue=
+ )
+ (?P<id>[^/?]+)
+ '''
_TESTS = [
# france2
@@ -206,24 +227,46 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
},
# franceo
{
- 'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
- 'md5': '52f0bfe202848b15915a2f39aaa8981b',
+ 'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015',
+ 'md5': '47d5816d3b24351cdce512ad7ab31da8',
'info_dict': {
- 'id': '108634970',
+ 'id': '125377621',
'ext': 'flv',
- 'title': 'Infô Afrique',
- 'description': 'md5:ebf346da789428841bee0fd2a935ea55',
- 'upload_date': '20140915',
- 'timestamp': 1410822000,
+ 'title': 'Infô soir',
+ 'description': 'md5:01b8c6915a3d93d8bbbd692651714309',
+ 'upload_date': '20150718',
+ 'timestamp': 1437241200,
+ 'duration': 414,
+ },
+ },
+ {
+ # francetv embed
+ 'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87',
+ 'info_dict': {
+ 'id': 'EV_30231',
+ 'ext': 'flv',
+ 'title': 'Alcaline, le concert avec Calogero',
+ 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
+ 'upload_date': '20150226',
+ 'timestamp': 1424989860,
+ 'duration': 5400,
},
},
+ {
+ 'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.franceo.fr/videos/125377617',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- webpage = self._download_webpage(url, mobj.group('key') or mobj.group('id'))
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
video_id, catalogue = self._html_search_regex(
- r'href="http://videos\.francetv\.fr/video/([^@]+@[^"]+)"',
+ r'href="http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
webpage, 'video ID').split('@')
return self._extract_video(video_id, catalogue)
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index 2d33fa7..b3f1baf 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -5,7 +5,7 @@ import json
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import (
@@ -75,7 +75,7 @@ class GameSpotIE(InfoExtractor):
return {
'id': data_video['guid'],
'display_id': page_id,
- 'title': compat_urllib_parse.unquote(data_video['title']),
+ 'title': compat_urllib_parse_unquote(data_video['title']),
'formats': formats,
'description': self._html_search_meta('description', webpage),
'thumbnail': self._og_search_thumbnail(webpage),
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 96ca398..cd133a1 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -8,7 +8,6 @@ import re
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import (
- compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_request,
compat_urlparse,
@@ -34,13 +33,21 @@ from .brightcove import BrightcoveIE
from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
+from .tvc import TVCIE
from .sportbox import SportBoxEmbedIE
from .smotri import SmotriIE
+from .myvi import MyviIE
from .condenast import CondeNastIE
from .udn import UDNEmbedIE
from .senateisvp import SenateISVPIE
from .bliptv import BlipTVIE
from .svt import SVTIE
+from .pornhub import PornHubIE
+from .xhamster import XHamsterEmbedIE
+from .vimeo import VimeoIE
+from .dailymotion import DailymotionCloudIE
+from .onionstudios import OnionStudiosIE
+from .snagfilms import SnagFilmsEmbedIE
class GenericIE(InfoExtractor):
@@ -291,6 +298,15 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
+ # TVC embed
+ {
+ 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
+ 'info_dict': {
+ 'id': '55304',
+ 'ext': 'mp4',
+ 'title': 'Дошкольное воспитание',
+ },
+ },
# SportBox embed
{
'url': 'http://www.vestifinance.ru/articles/25753',
@@ -322,6 +338,26 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
+ # Myvi.ru embed
+ {
+ 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
+ 'info_dict': {
+ 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
+ 'ext': 'mp4',
+ 'title': 'Ужастики, русский трейлер (2015)',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 153,
+ }
+ },
+ # XHamster embed
+ {
+ 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
+ 'info_dict': {
+ 'id': 'showthread',
+ 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
+ },
+ 'playlist_mincount': 7,
+ },
# Embedded TED video
{
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
@@ -371,6 +407,26 @@ class GenericIE(InfoExtractor):
'skip_download': 'Requires rtmpdump'
}
},
+ # francetv embed
+ {
+ 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
+ 'info_dict': {
+ 'id': 'EV_30231',
+ 'ext': 'mp4',
+ 'title': 'Alcaline, le concert avec Calogero',
+ 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
+ 'upload_date': '20150226',
+ 'timestamp': 1424989860,
+ 'duration': 5400,
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Forbidden'
+ ]
+ },
# Condé Nast embed
{
'url': 'http://www.wired.com/2014/04/honda-asimo/',
@@ -644,6 +700,18 @@ class GenericIE(InfoExtractor):
'title': 'John Carlson Postgame 2/25/15',
},
},
+ # Kaltura embed (different embed code)
+ {
+ 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
+ 'info_dict': {
+ 'id': '1_a52wc67y',
+ 'ext': 'flv',
+ 'upload_date': '20150127',
+ 'uploader_id': 'PremierMedia',
+ 'timestamp': int,
+ 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
+ },
+ },
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -789,6 +857,62 @@ class GenericIE(InfoExtractor):
# rtmpe downloads
'skip_download': True,
}
+ },
+ # Brightcove URL in single quotes
+ {
+ 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
+ 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
+ 'info_dict': {
+ 'id': '4255764656001',
+ 'ext': 'mp4',
+ 'title': 'SN Presents: Russell Martin, World Citizen',
+ 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
+ 'uploader': 'Rogers Sportsnet',
+ },
+ },
+ # Dailymotion Cloud video
+ {
+ 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
+ 'md5': '49444254273501a64675a7e68c502681',
+ 'info_dict': {
+ 'id': '5585de919473990de4bee11b',
+ 'ext': 'mp4',
+ 'title': 'Le débat',
+ 'thumbnail': 're:^https?://.*\.jpe?g$',
+ }
+ },
+ # OnionStudios embed
+ {
+ 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
+ 'info_dict': {
+ 'id': '2855',
+ 'ext': 'mp4',
+ 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
+ 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'uploader': 'ClickHole',
+ 'uploader_id': 'clickhole',
+ }
+ },
+ # SnagFilms embed
+ {
+ 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
+ 'info_dict': {
+ 'id': '74849a00-85a9-11e1-9660-123139220831',
+ 'ext': 'mp4',
+ 'title': '#whilewewatch',
+ }
+ },
+ # AdobeTVVideo embed
+ {
+ 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
+ 'md5': '43662b577c018ad707a63766462b1e87',
+ 'info_dict': {
+ 'id': '2456',
+ 'ext': 'mp4',
+ 'title': 'New experience with Acrobat DC',
+ 'description': 'New experience with Acrobat DC',
+ 'duration': 248.667,
+ },
}
]
@@ -956,7 +1080,9 @@ class GenericIE(InfoExtractor):
}
if not self._downloader.params.get('test', False) and not is_intentional:
- self._downloader.report_warning('Falling back on generic information extractor.')
+ force = self._downloader.params.get('force_generic_extractor', False)
+ self._downloader.report_warning(
+ '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
if not full_response:
request = compat_urllib_request.Request(url)
@@ -1008,7 +1134,7 @@ class GenericIE(InfoExtractor):
# Sometimes embedded video player is hidden behind percent encoding
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
# Unescaping the whole page allows to handle those cases in a generic way
- webpage = compat_urllib_parse.unquote(webpage)
+ webpage = compat_urllib_parse_unquote(webpage)
# it's tempting to parse this further, but you would
# have to take into account all the variations like
@@ -1061,23 +1187,20 @@ class GenericIE(InfoExtractor):
# Look for embedded rtl.nl player
matches = re.findall(
- r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
+ r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
webpage)
if matches:
return _playlist_from_matches(matches, ie='RtlNl')
- # Look for embedded (iframe) Vimeo player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
- if mobj:
- player_url = unescapeHTML(mobj.group('url'))
- surl = smuggle_url(player_url, {'Referer': url})
- return self.url_result(surl)
- # Look for embedded (swf embed) Vimeo player
- mobj = re.search(
- r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
- if mobj:
- return self.url_result(mobj.group(1))
+ vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
+ if vimeo_url is not None:
+ return self.url_result(vimeo_url)
+
+ vid_me_embed_url = self._search_regex(
+ r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
+ webpage, 'vid.me embed', default=None)
+ if vid_me_embed_url is not None:
+ return self.url_result(vid_me_embed_url, 'Vidme')
# Look for embedded YouTube player
matches = re.findall(r'''(?x)
@@ -1271,7 +1394,7 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
if mobj is not None:
- return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
+ return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
# Look for funnyordie embed
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
@@ -1289,11 +1412,32 @@ class GenericIE(InfoExtractor):
if rutv_url:
return self.url_result(rutv_url, 'RUTV')
+ # Look for embedded TVC player
+ tvc_url = TVCIE._extract_url(webpage)
+ if tvc_url:
+ return self.url_result(tvc_url, 'TVC')
+
# Look for embedded SportBox player
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
if sportbox_urls:
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
+ # Look for embedded PornHub player
+ pornhub_url = PornHubIE._extract_url(webpage)
+ if pornhub_url:
+ return self.url_result(pornhub_url, 'PornHub')
+
+ # Look for embedded XHamster player
+ xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
+ if xhamster_urls:
+ return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
+
+ # Look for embedded Tvigle player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Tvigle')
+
# Look for embedded TED player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
@@ -1313,11 +1457,23 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
+ # Look for embedded francetv player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
# Look for embedded smotri.com player
smotri_url = SmotriIE._extract_url(webpage)
if smotri_url:
return self.url_result(smotri_url, 'Smotri')
+ # Look for embedded Myvi.ru player
+ myvi_url = MyviIE._extract_url(webpage)
+ if myvi_url:
+ return self.url_result(myvi_url)
+
# Look for embeded soundcloud player
mobj = re.search(
r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
@@ -1397,8 +1553,8 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
- mobj = re.search(
- r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
+ mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
+ re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
@@ -1455,6 +1611,30 @@ class GenericIE(InfoExtractor):
if senate_isvp_url:
return self.url_result(senate_isvp_url, 'SenateISVP')
+ # Look for Dailymotion Cloud videos
+ dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
+ if dmcloud_url:
+ return self.url_result(dmcloud_url, 'DailymotionCloud')
+
+ # Look for OnionStudios embeds
+ onionstudios_url = OnionStudiosIE._extract_url(webpage)
+ if onionstudios_url:
+ return self.url_result(onionstudios_url)
+
+ # Look for SnagFilms embeds
+ snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
+ if snagfilms_url:
+ return self.url_result(snagfilms_url)
+
+ # Look for AdobeTVVideo embeds
+ mobj = re.search(
+ r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
+ webpage)
+ if mobj is not None:
+ return self.url_result(
+ self._proto_relative_url(unescapeHTML(mobj.group(1))),
+ 'AdobeTVVideo')
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
@@ -1534,7 +1714,7 @@ class GenericIE(InfoExtractor):
entries = []
for video_url in found:
video_url = compat_urlparse.urljoin(url, video_url)
- video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
+ video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
# Sometimes, jwplayer extraction will result in a YouTube URL
if YoutubeIE.suitable(video_url):
diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py
index 397f1d4..884700c 100644
--- a/youtube_dl/extractor/gfycat.py
+++ b/youtube_dl/extractor/gfycat.py
@@ -6,12 +6,13 @@ from ..utils import (
int_or_none,
float_or_none,
qualities,
+ ExtractorError,
)
class GfycatIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
+ _TESTS = [{
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
'info_dict': {
'id': 'DeadlyDecisiveGermanpinscher',
@@ -27,14 +28,33 @@ class GfycatIE(InfoExtractor):
'categories': list,
'age_limit': 0,
}
- }
+ }, {
+ 'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
+ 'info_dict': {
+ 'id': 'JauntyTimelyAmazontreeboa',
+ 'ext': 'mp4',
+ 'title': 'JauntyTimelyAmazontreeboa',
+ 'timestamp': 1411720126,
+ 'upload_date': '20140926',
+ 'uploader': 'anonymous',
+ 'duration': 3.52,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'categories': list,
+ 'age_limit': 0,
+ }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
gfy = self._download_json(
'http://gfycat.com/cajax/get/%s' % video_id,
- video_id, 'Downloading video info')['gfyItem']
+ video_id, 'Downloading video info')
+ if 'error' in gfy:
+ raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
+ gfy = gfy['gfyItem']
title = gfy.get('title') or gfy['gfyName']
description = gfy.get('description')
diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py
index 6147596..f006f0c 100644
--- a/youtube_dl/extractor/gorillavid.py
+++ b/youtube_dl/extractor/gorillavid.py
@@ -78,12 +78,7 @@ class GorillaVidIE(InfoExtractor):
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
- fields = dict(re.findall(r'''(?x)<input\s+
- type="hidden"\s+
- name="([^"]+)"\s+
- (?:id="[^"]+"\s+)?
- value="([^"]*)"
- ''', webpage))
+ fields = self._hidden_inputs(webpage)
if fields['op'] == 'download1':
countdown = int_or_none(self._search_regex(
diff --git a/youtube_dl/extractor/hentaistigma.py b/youtube_dl/extractor/hentaistigma.py
index 63d87b7..f5aa73d 100644
--- a/youtube_dl/extractor/hentaistigma.py
+++ b/youtube_dl/extractor/hentaistigma.py
@@ -1,7 +1,5 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
@@ -19,20 +17,19 @@ class HentaiStigmaIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
- r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>',
+ r'<h2[^>]+class="posttitle"[^>]*><a[^>]*>([^<]+)</a>',
webpage, 'title')
wrap_url = self._html_search_regex(
- r'<iframe src="([^"]+mp4)"', webpage, 'wrapper url')
+ r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url')
wrap_webpage = self._download_webpage(wrap_url, video_id)
video_url = self._html_search_regex(
- r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, 'video url')
+ r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
return {
'id': video_id,
diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py
index 704d028..a3154cf 100644
--- a/youtube_dl/extractor/hostingbulk.py
+++ b/youtube_dl/extractor/hostingbulk.py
@@ -58,11 +58,7 @@ class HostingBulkIE(InfoExtractor):
r'<img src="([^"]+)".+?class="pic"',
webpage, 'thumbnail', fatal=False)
- fields = dict(re.findall(r'''(?x)<input\s+
- type="hidden"\s+
- name="([^"]+)"\s+
- value="([^"]*)"
- ''', webpage))
+ fields = self._hidden_inputs(webpage)
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
request.add_header('Content-type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py
index 3f7d666..16677f1 100644
--- a/youtube_dl/extractor/howcast.py
+++ b/youtube_dl/extractor/howcast.py
@@ -1,8 +1,7 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..utils import parse_iso8601
class HowcastIE(InfoExtractor):
@@ -13,29 +12,31 @@ class HowcastIE(InfoExtractor):
'info_dict': {
'id': '390161',
'ext': 'mp4',
- 'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
'title': 'How to Tie a Square Knot Properly',
- }
+ 'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',
+ 'timestamp': 1276081287,
+ 'upload_date': '20100609',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ video_id = self._match_id(url)
- video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- self.report_extraction(video_id)
-
- video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
- webpage, 'video URL')
-
- video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
- webpage, 'description', fatal=False)
+ embed_code = self._search_regex(
+ r'<iframe[^>]+src="[^"]+\bembed_code=([^\b]+)\b',
+ webpage, 'ooyala embed code')
return {
+ '_type': 'url_transparent',
+ 'ie_key': 'Ooyala',
+ 'url': 'ooyala:%s' % embed_code,
'id': video_id,
- 'url': video_url,
- 'title': self._og_search_title(webpage),
- 'description': video_description,
- 'thumbnail': self._og_search_thumbnail(webpage),
+ 'timestamp': parse_iso8601(self._html_search_meta(
+ 'article:published_time', webpage, 'timestamp')),
}
diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py
index e973391..663e663 100644
--- a/youtube_dl/extractor/howstuffworks.py
+++ b/youtube_dl/extractor/howstuffworks.py
@@ -10,7 +10,7 @@ from ..utils import (
class HowStuffWorksIE(InfoExtractor):
- _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm'
+ _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
_TESTS = [
{
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
@@ -46,6 +46,10 @@ class HowStuffWorksIE(InfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$',
},
},
+ {
+ 'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
index f29df36..4bb574c 100644
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -46,7 +46,7 @@ class ImdbIE(InfoExtractor):
format_info = info['videoPlayerObject']['video']
formats.append({
'format_id': f_id,
- 'url': format_info['url'],
+ 'url': format_info['videoInfoList'][0]['videoUrl'],
})
return {
diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py
index 0847074..65712ab 100644
--- a/youtube_dl/extractor/ina.py
+++ b/youtube_dl/extractor/ina.py
@@ -7,7 +7,7 @@ from .common import InfoExtractor
class InaIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
_TEST = {
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py
index f25f436..71cfd12 100644
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -4,14 +4,15 @@ import base64
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
+ compat_urlparse,
)
class InfoQIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
+ _VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
'info_dict': {
@@ -20,7 +21,10 @@ class InfoQIE(InfoExtractor):
'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
'title': 'A Few of My Favorite [Python] Things',
},
- }
+ }, {
+ 'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -35,14 +39,14 @@ class InfoQIE(InfoExtractor):
# Extract video URL
encoded_id = self._search_regex(
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
- real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
+ real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
playpath = 'mp4:' + real_id
video_filename = playpath.split('/')[-1]
video_id, extension = video_filename.split('.')
http_base = self._search_regex(
- r'EXPRESSINSTALL_SWF\s*=\s*"(https?://[^/"]+/)', webpage,
+ r'EXPRESSINSTALL_SWF\s*=\s*[^"]*"((?:https?:)?//[^/"]+/)', webpage,
'HTTP base URL')
formats = [{
@@ -52,7 +56,7 @@ class InfoQIE(InfoExtractor):
'play_path': playpath,
}, {
'format_id': 'http',
- 'url': http_base + real_id,
+ 'url': compat_urlparse.urljoin(url, http_base) + real_id,
}]
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index b107557..3d78f78 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ limit_length,
+)
class InstagramIE(InfoExtractor):
@@ -100,11 +103,13 @@ class InstagramUserIE(InfoExtractor):
thumbnails_el = it.get('images', {})
thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
- title = it.get('caption', {}).get('text', it['id'])
+ # In some cases caption is null, which corresponds to None
+ # in python. As a result, it.get('caption', {}) gives None
+ title = (it.get('caption') or {}).get('text', it['id'])
entries.append({
'id': it['id'],
- 'title': title,
+ 'title': limit_length(title, 80),
'formats': formats,
'thumbnail': thumbnail,
'webpage_url': it.get('link'),
diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py
new file mode 100644
index 0000000..afb7f4e
--- /dev/null
+++ b/youtube_dl/extractor/iqiyi.py
@@ -0,0 +1,273 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import math
+import random
+import time
+import uuid
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import ExtractorError
+
+
+class IqiyiIE(InfoExtractor):
+ IE_NAME = 'iqiyi'
+ IE_DESC = '爱奇艺'
+
+ _VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
+ 'md5': '2cb594dc2781e6c941a110d8f358118b',
+ 'info_dict': {
+ 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
+ 'title': '美国德州空中惊现奇异云团 酷似UFO',
+ 'ext': 'f4v',
+ }
+ }, {
+ 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
+ 'info_dict': {
+ 'id': 'e3f585b550a280af23c98b6cb2be19fb',
+ 'title': '名侦探柯南第752集',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
+ 'ext': 'f4v',
+ 'title': '名侦探柯南第752集',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
+ 'ext': 'f4v',
+ 'title': '名侦探柯南第752集',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
+ 'ext': 'f4v',
+ 'title': '名侦探柯南第752集',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
+ 'ext': 'f4v',
+ 'title': '名侦探柯南第752集',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
+ 'ext': 'f4v',
+ 'title': '名侦探柯南第752集',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
+ 'ext': 'f4v',
+ 'title': '名侦探柯南第752集',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
+ 'ext': 'f4v',
+ 'title': '名侦探柯南第752集',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
+ 'ext': 'f4v',
+ 'title': '名侦探柯南第752集',
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ _FORMATS_MAP = [
+ ('1', 'h6'),
+ ('2', 'h5'),
+ ('3', 'h4'),
+ ('4', 'h3'),
+ ('5', 'h2'),
+ ('10', 'h1'),
+ ]
+
+ def construct_video_urls(self, data, video_id, _uuid):
+ def do_xor(x, y):
+ a = y % 3
+ if a == 1:
+ return x ^ 121
+ if a == 2:
+ return x ^ 72
+ return x ^ 103
+
+ def get_encode_code(l):
+ a = 0
+ b = l.split('-')
+ c = len(b)
+ s = ''
+ for i in range(c - 1, -1, -1):
+ a = do_xor(int(b[c - i - 1], 16), i)
+ s += chr(a)
+ return s[::-1]
+
+ def get_path_key(x, format_id, segment_index):
+ mg = ')(*&^flash@#$%a'
+ tm = self._download_json(
+ 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id,
+ note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
+ )['t']
+ t = str(int(math.floor(int(tm) / (600.0))))
+ return hashlib.md5((t + mg + x).encode('utf8')).hexdigest()
+
+ video_urls_dict = {}
+ for format_item in data['vp']['tkl'][0]['vs']:
+ if 0 < int(format_item['bid']) <= 10:
+ format_id = self.get_format(format_item['bid'])
+ else:
+ continue
+
+ video_urls = []
+
+ video_urls_info = format_item['fs']
+ if not format_item['fs'][0]['l'].startswith('/'):
+ t = get_encode_code(format_item['fs'][0]['l'])
+ if t.endswith('mp4'):
+ video_urls_info = format_item['flvs']
+
+ for segment_index, segment in enumerate(video_urls_info):
+ vl = segment['l']
+ if not vl.startswith('/'):
+ vl = get_encode_code(vl)
+ key = get_path_key(
+ vl.split('/')[-1].split('.')[0], format_id, segment_index)
+ filesize = segment['b']
+ base_url = data['vp']['du'].split('/')
+ base_url.insert(-1, key)
+ base_url = '/'.join(base_url)
+ param = {
+ 'su': _uuid,
+ 'qyid': uuid.uuid4().hex,
+ 'client': '',
+ 'z': '',
+ 'bt': '',
+ 'ct': '',
+ 'tn': str(int(time.time()))
+ }
+ api_video_url = base_url + vl + '?' + \
+ compat_urllib_parse.urlencode(param)
+ js = self._download_json(
+ api_video_url, video_id,
+ note='Download video info of segment %d for format %s' % (segment_index + 1, format_id))
+ video_url = js['l']
+ video_urls.append(
+ (video_url, filesize))
+
+ video_urls_dict[format_id] = video_urls
+ return video_urls_dict
+
+ def get_format(self, bid):
+ matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)]
+ return matched_format_ids[0] if len(matched_format_ids) else None
+
+ def get_bid(self, format_id):
+ matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id]
+ return matched_bids[0] if len(matched_bids) else None
+
+ def get_raw_data(self, tvid, video_id, enc_key, _uuid):
+ tm = str(int(time.time()))
+ param = {
+ 'key': 'fvip',
+ 'src': hashlib.md5(b'youtube-dl').hexdigest(),
+ 'tvId': tvid,
+ 'vid': video_id,
+ 'vinfo': 1,
+ 'tm': tm,
+ 'enc': hashlib.md5(
+ (enc_key + tm + tvid).encode('utf8')).hexdigest(),
+ 'qyid': _uuid,
+ 'tn': random.random(),
+ 'um': 0,
+ 'authkey': hashlib.md5(
+ (tm + tvid).encode('utf8')).hexdigest()
+ }
+
+ api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
+ compat_urllib_parse.urlencode(param)
+ raw_data = self._download_json(api_url, video_id)
+ return raw_data
+
+ def get_enc_key(self, swf_url, video_id):
+ enc_key = '8e29ab5666d041c3a1ea76e06dabdffb'
+ return enc_key
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(
+ url, 'temp_id', note='download video page')
+ tvid = self._search_regex(
+ r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
+ video_id = self._search_regex(
+ r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
+ swf_url = self._search_regex(
+ r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL')
+ _uuid = uuid.uuid4().hex
+
+ enc_key = self.get_enc_key(swf_url, video_id)
+
+ raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
+
+ if raw_data['code'] != 'A000000':
+ raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
+
+ if not raw_data['data']['vp']['tkl']:
+ raise ExtractorError('No support iQiqy VIP video')
+
+ data = raw_data['data']
+
+ title = data['vi']['vn']
+
+ # generate video_urls_dict
+ video_urls_dict = self.construct_video_urls(
+ data, video_id, _uuid)
+
+ # construct info
+ entries = []
+ for format_id in video_urls_dict:
+ video_urls = video_urls_dict[format_id]
+ for i, video_url_info in enumerate(video_urls):
+ if len(entries) < i + 1:
+ entries.append({'formats': []})
+ entries[i]['formats'].append(
+ {
+ 'url': video_url_info[0],
+ 'filesize': video_url_info[-1],
+ 'format_id': format_id,
+ 'preference': int(self.get_bid(format_id))
+ }
+ )
+
+ for i in range(len(entries)):
+ self._sort_formats(entries[i]['formats'])
+ entries[i].update(
+ {
+ 'id': '%s_part%d' % (video_id, i + 1),
+ 'title': title,
+ }
+ )
+
+ if len(entries) > 1:
+ info = {
+ '_type': 'multi_video',
+ 'id': video_id,
+ 'title': title,
+ 'entries': entries,
+ }
+ else:
+ info = entries[0]
+ info['id'] = video_id
+ info['title'] = title
+
+ return info
diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py
index 99a1361..bc226fa 100644
--- a/youtube_dl/extractor/izlesene.py
+++ b/youtube_dl/extractor/izlesene.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
determine_ext,
float_or_none,
@@ -30,7 +31,7 @@ class IzleseneIE(InfoExtractor):
'description': 'md5:253753e2655dde93f59f74b572454f6d',
'thumbnail': 're:^http://.*\.jpg',
'uploader_id': 'pelikzzle',
- 'timestamp': 1404302298,
+ 'timestamp': int,
'upload_date': '20140702',
'duration': 95.395,
'age_limit': 0,
@@ -46,7 +47,7 @@ class IzleseneIE(InfoExtractor):
'description': 'Tarkan Dortmund 2006 Konseri',
'thumbnail': 're:^http://.*\.jpg',
'uploader_id': 'parlayankiz',
- 'timestamp': 1163322193,
+ 'timestamp': int,
'upload_date': '20061112',
'duration': 253.666,
'age_limit': 0,
@@ -67,9 +68,9 @@ class IzleseneIE(InfoExtractor):
uploader = self._html_search_regex(
r"adduserUsername\s*=\s*'([^']+)';",
- webpage, 'uploader', fatal=False, default='')
+ webpage, 'uploader', fatal=False)
timestamp = parse_iso8601(self._html_search_meta(
- 'uploadDate', webpage, 'upload date', fatal=False))
+ 'uploadDate', webpage, 'upload date'))
duration = float_or_none(self._html_search_regex(
r'"videoduration"\s*:\s*"([^"]+)"',
@@ -86,8 +87,7 @@ class IzleseneIE(InfoExtractor):
# Might be empty for some videos.
streams = self._html_search_regex(
- r'"qualitylevel"\s*:\s*"([^"]+)"',
- webpage, 'streams', fatal=False, default='')
+ r'"qualitylevel"\s*:\s*"([^"]+)"', webpage, 'streams', default='')
formats = []
if streams:
@@ -95,15 +95,15 @@ class IzleseneIE(InfoExtractor):
quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
formats.append({
'format_id': '%sp' % quality if quality else 'sd',
- 'url': url,
+ 'url': compat_urllib_parse_unquote(url),
'ext': ext,
})
else:
stream_url = self._search_regex(
- r'"streamurl"\s?:\s?"([^"]+)"', webpage, 'stream URL')
+ r'"streamurl"\s*:\s*"([^"]+)"', webpage, 'stream URL')
formats.append({
'format_id': 'sd',
- 'url': stream_url,
+ 'url': compat_urllib_parse_unquote(stream_url),
'ext': ext,
})
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py
index d0720ff..1df084d 100644
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -8,9 +8,9 @@ from .common import InfoExtractor
class JeuxVideoIE(InfoExtractor):
- _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
+ _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
'info_dict': {
@@ -19,7 +19,10 @@ class JeuxVideoIE(InfoExtractor):
'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
},
- }
+ }, {
+ 'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py
index e3b43ff..06daf5a 100644
--- a/youtube_dl/extractor/karaoketv.py
+++ b/youtube_dl/extractor/karaoketv.py
@@ -2,7 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote_plus
from ..utils import (
js_to_json,
)
@@ -24,7 +24,7 @@ class KaraoketvIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
page_video_url = self._og_search_video_url(webpage, video_id)
- config_json = compat_urllib_parse.unquote_plus(self._search_regex(
+ config_json = compat_urllib_parse_unquote_plus(self._search_regex(
r'config=(.*)', page_video_url, 'configuration'))
urls_info_json = self._download_json(
diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py
index 7d4b570..1d391e6 100644
--- a/youtube_dl/extractor/kickstarter.py
+++ b/youtube_dl/extractor/kickstarter.py
@@ -28,6 +28,14 @@ class KickStarterIE(InfoExtractor):
'uploader': 'Pebble Technology',
'title': 'Pebble iOS Notifications',
}
+ }, {
+ 'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html',
+ 'info_dict': {
+ 'id': '1420158244',
+ 'ext': 'mp4',
+ 'title': 'Power Drive 2000',
+ },
+ 'expected_warnings': ['OpenGraph description'],
}]
def _real_extract(self, url):
@@ -48,10 +56,15 @@ class KickStarterIE(InfoExtractor):
'title': title,
}
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+ if thumbnail is None:
+ thumbnail = self._html_search_regex(
+ r'<img[^>]+class="[^"]+\s*poster\s*[^"]+"[^>]+src="([^"]+)"',
+ webpage, 'thumbnail image', fatal=False)
return {
'id': video_id,
'url': video_url,
'title': title,
'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
+ 'thumbnail': thumbnail,
}
diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py
new file mode 100644
index 0000000..1077846
--- /dev/null
+++ b/youtube_dl/extractor/kuwo.py
@@ -0,0 +1,314 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+ get_element_by_id,
+ clean_html,
+ ExtractorError,
+ remove_start,
+)
+
+
+class KuwoBaseIE(InfoExtractor):
+ _FORMATS = [
+ {'format': 'ape', 'ext': 'ape', 'preference': 100},
+ {'format': 'mp3-320', 'ext': 'mp3', 'br': '320kmp3', 'abr': 320, 'preference': 80},
+ {'format': 'mp3-192', 'ext': 'mp3', 'br': '192kmp3', 'abr': 192, 'preference': 70},
+ {'format': 'mp3-128', 'ext': 'mp3', 'br': '128kmp3', 'abr': 128, 'preference': 60},
+ {'format': 'wma', 'ext': 'wma', 'preference': 20},
+ {'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}
+ ]
+
+ def _get_formats(self, song_id):
+ formats = []
+ for file_format in self._FORMATS:
+ song_url = self._download_webpage(
+ 'http://antiserver.kuwo.cn/anti.s?format=%s&br=%s&rid=MUSIC_%s&type=convert_url&response=url' %
+ (file_format['ext'], file_format.get('br', ''), song_id),
+ song_id, note='Download %s url info' % file_format['format'],
+ )
+ if song_url.startswith('http://') or song_url.startswith('https://'):
+ formats.append({
+ 'url': song_url,
+ 'format_id': file_format['format'],
+ 'format': file_format['format'],
+ 'preference': file_format['preference'],
+ 'abr': file_format.get('abr'),
+ })
+ self._sort_formats(formats)
+ return formats
+
+
+class KuwoIE(KuwoBaseIE):
+ IE_NAME = 'kuwo:song'
+ IE_DESC = '酷我音乐'
+ _VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/'
+ _TESTS = [{
+ 'url': 'http://www.kuwo.cn/yinyue/635632/',
+ 'info_dict': {
+ 'id': '635632',
+ 'ext': 'ape',
+ 'title': '爱我别走',
+ 'creator': '张震岳',
+ 'upload_date': '20080122',
+ 'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c'
+ },
+ }, {
+ 'url': 'http://www.kuwo.cn/yinyue/6446136/',
+ 'info_dict': {
+ 'id': '6446136',
+ 'ext': 'mp3',
+ 'title': '心',
+ 'creator': 'IU',
+ 'upload_date': '20150518',
+ },
+ 'params': {
+ 'format': 'mp3-320'
+ },
+ }]
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, song_id, note='Download song detail info',
+ errnote='Unable to get song detail info')
+
+ song_name = self._html_search_regex(
+ r'<h1[^>]+title="([^"]+)">', webpage, 'song name')
+ singer_name = self._html_search_regex(
+ r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',
+ webpage, 'singer name', fatal=False)
+ lrc_content = clean_html(get_element_by_id('lrcContent', webpage))
+ if lrc_content == '暂无': # indicates no lyrics
+ lrc_content = None
+
+ formats = self._get_formats(song_id)
+
+ album_id = self._html_search_regex(
+ r'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
+ webpage, 'album id', fatal=False)
+
+ publish_time = None
+ if album_id is not None:
+ album_info_page = self._download_webpage(
+ 'http://www.kuwo.cn/album/%s/' % album_id, song_id,
+ note='Download album detail info',
+ errnote='Unable to get album detail info')
+
+ publish_time = self._html_search_regex(
+ r'发行时间:(\d{4}-\d{2}-\d{2})', album_info_page,
+ 'publish time', fatal=False)
+ if publish_time:
+ publish_time = publish_time.replace('-', '')
+
+ return {
+ 'id': song_id,
+ 'title': song_name,
+ 'creator': singer_name,
+ 'upload_date': publish_time,
+ 'description': lrc_content,
+ 'formats': formats,
+ }
+
+
+class KuwoAlbumIE(InfoExtractor):
+ IE_NAME = 'kuwo:album'
+ IE_DESC = '酷我音乐 - 专辑'
+ _VALID_URL = r'http://www\.kuwo\.cn/album/(?P<id>\d+?)/'
+ _TEST = {
+ 'url': 'http://www.kuwo.cn/album/502294/',
+ 'info_dict': {
+ 'id': '502294',
+ 'title': 'M',
+ 'description': 'md5:6a7235a84cc6400ec3b38a7bdaf1d60c',
+ },
+ 'playlist_count': 2,
+ }
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, album_id, note='Download album info',
+ errnote='Unable to get album info')
+
+ album_name = self._html_search_regex(
+ r'<div[^>]+class="comm"[^<]+<h1[^>]+title="([^"]+)"', webpage,
+ 'album name')
+ album_intro = remove_start(
+ clean_html(get_element_by_id('intro', webpage)),
+ '%s简介:' % album_name)
+
+ entries = [
+ self.url_result(song_url, 'Kuwo') for song_url in re.findall(
+ r'<p[^>]+class="listen"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+/)"',
+ webpage)
+ ]
+ return self.playlist_result(entries, album_id, album_name, album_intro)
+
+
+class KuwoChartIE(InfoExtractor):
+ IE_NAME = 'kuwo:chart'
+ IE_DESC = '酷我音乐 - 排行榜'
+ _VALID_URL = r'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
+ _TEST = {
+ 'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm',
+ 'info_dict': {
+ 'id': '香港中文龙虎榜',
+ 'title': '香港中文龙虎榜',
+ 'description': 're:\d{4}第\d{2}期',
+ },
+ 'playlist_mincount': 10,
+ }
+
+ def _real_extract(self, url):
+ chart_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, chart_id, note='Download chart info',
+ errnote='Unable to get chart info')
+
+ chart_name = self._html_search_regex(
+ r'<h1[^>]+class="unDis">([^<]+)</h1>', webpage, 'chart name')
+
+ chart_desc = self._html_search_regex(
+ r'<p[^>]+class="tabDef">(\d{4}第\d{2}期)</p>', webpage, 'chart desc')
+
+ entries = [
+ self.url_result(song_url, 'Kuwo') for song_url in re.findall(
+ r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage)
+ ]
+ return self.playlist_result(entries, chart_id, chart_name, chart_desc)
+
+
+class KuwoSingerIE(InfoExtractor):
+ IE_NAME = 'kuwo:singer'
+ IE_DESC = '酷我音乐 - 歌手'
+ _VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',
+ 'info_dict': {
+ 'id': 'bruno+mars',
+ 'title': 'Bruno Mars',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',
+ 'info_dict': {
+ 'id': 'Ali',
+ 'title': 'Ali',
+ },
+ 'playlist_mincount': 95,
+ }]
+
+ def _real_extract(self, url):
+ singer_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, singer_id, note='Download singer info',
+ errnote='Unable to get singer info')
+
+ singer_name = self._html_search_regex(
+ r'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage, 'singer name'
+ )
+
+ entries = []
+ first_page_only = False if re.search(r'/music(?:_\d+)?\.htm', url) else True
+ for page_num in itertools.count(1):
+ webpage = self._download_webpage(
+ 'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id, page_num),
+ singer_id, note='Download song list page #%d' % page_num,
+ errnote='Unable to get song list page #%d' % page_num)
+
+ entries.extend([
+ self.url_result(song_url, 'Kuwo') for song_url in re.findall(
+ r'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/',
+ webpage)
+ ][:10 if first_page_only else None])
+
+ if first_page_only or not re.search(r'<a[^>]+href="[^"]+">下一页</a>', webpage):
+ break
+
+ return self.playlist_result(entries, singer_id, singer_name)
+
+
+class KuwoCategoryIE(InfoExtractor):
+ IE_NAME = 'kuwo:category'
+ IE_DESC = '酷我音乐 - 分类'
+ _VALID_URL = r'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
+ _TEST = {
+ 'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm',
+ 'info_dict': {
+ 'id': '86375',
+ 'title': '八十年代精选',
+ 'description': '这些都是属于八十年代的回忆!',
+ },
+ 'playlist_count': 30,
+ }
+
+ def _real_extract(self, url):
+ category_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, category_id, note='Download category info',
+ errnote='Unable to get category info')
+
+ category_name = self._html_search_regex(
+ r'<h1[^>]+title="([^<>]+?)">[^<>]+?</h1>', webpage, 'category name')
+
+ category_desc = remove_start(
+ get_element_by_id('intro', webpage).strip(),
+ '%s简介:' % category_name)
+
+ jsonm = self._parse_json(self._html_search_regex(
+ r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)
+
+ entries = [
+ self.url_result('http://www.kuwo.cn/yinyue/%s/' % song['musicrid'], 'Kuwo')
+ for song in jsonm['musiclist']
+ ]
+ return self.playlist_result(entries, category_id, category_name, category_desc)
+
+
+class KuwoMvIE(KuwoBaseIE):
+ IE_NAME = 'kuwo:mv'
+ IE_DESC = '酷我音乐 - MV'
+ _VALID_URL = r'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
+ _TEST = {
+ 'url': 'http://www.kuwo.cn/mv/6480076/',
+ 'info_dict': {
+ 'id': '6480076',
+ 'ext': 'mkv',
+ 'title': '我们家MV',
+ 'creator': '2PM',
+ },
+ }
+ _FORMATS = KuwoBaseIE._FORMATS + [
+ {'format': 'mkv', 'ext': 'mkv', 'preference': 250},
+ {'format': 'mp4', 'ext': 'mp4', 'preference': 200},
+ ]
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, song_id, note='Download mv detail info: %s' % song_id,
+ errnote='Unable to get mv detail info: %s' % song_id)
+
+ mobj = re.search(
+ r'<h1[^>]+title="(?P<song>[^"]+)">[^<]+<span[^>]+title="(?P<singer>[^"]+)"',
+ webpage)
+ if mobj:
+ song_name = mobj.group('song')
+ singer_name = mobj.group('singer')
+ else:
+ raise ExtractorError('Unable to find song or singer names')
+
+ formats = self._get_formats(song_id)
+
+ return {
+ 'id': song_id,
+ 'title': song_name,
+ 'creator': singer_name,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py
index da896ca..ba2ae80 100644
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -19,6 +19,7 @@ from ..utils import (
class LetvIE(InfoExtractor):
+ IE_DESC = '乐视网'
_VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
_TESTS = [{
diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py
index 42cb6e3..f8cbca7 100644
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dl/extractor/lifenews.py
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
from ..utils import (
determine_ext,
int_or_none,
+ remove_end,
unified_strdate,
ExtractorError,
)
@@ -39,7 +40,6 @@ class LifeNewsIE(InfoExtractor):
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
'upload_date': '20150402',
- 'uploader': 'embed.life.ru',
}
}, {
'url': 'http://lifenews.ru/news/153461',
@@ -50,7 +50,6 @@ class LifeNewsIE(InfoExtractor):
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
'upload_date': '20150505',
- 'uploader': 'embed.life.ru',
}
}, {
'url': 'http://lifenews.ru/video/13035',
@@ -72,20 +71,20 @@ class LifeNewsIE(InfoExtractor):
if not videos and not iframe_link:
raise ExtractorError('No media links available for %s' % video_id)
- title = self._og_search_title(webpage)
- TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
- if title.endswith(TITLE_SUFFIX):
- title = title[:-len(TITLE_SUFFIX)]
+ title = remove_end(
+ self._og_search_title(webpage),
+ ' - Первый по срочным новостям — LIFE | NEWS')
description = self._og_search_description(webpage)
view_count = self._html_search_regex(
r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
comment_count = self._html_search_regex(
- r'<div class=\'comments\'>\s*<span class=\'counter\'>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)
+ r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
+ webpage, 'comment count', fatal=False)
upload_date = self._html_search_regex(
- r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
+ r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
if upload_date is not None:
upload_date = unified_strdate(upload_date)
diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py
index 3582206..857edfd 100644
--- a/youtube_dl/extractor/liveleak.py
+++ b/youtube_dl/extractor/liveleak.py
@@ -40,6 +40,17 @@ class LiveLeakIE(InfoExtractor):
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
'age_limit': 18,
}
+ }, {
+ # Covers https://github.com/rg3/youtube-dl/pull/5983
+ 'url': 'http://www.liveleak.com/view?i=801_1409392012',
+ 'md5': '0b3bec2d888c20728ca2ad3642f0ef15',
+ 'info_dict': {
+ 'id': '801_1409392012',
+ 'ext': 'mp4',
+ 'description': "Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.",
+ 'uploader': 'bony333',
+ 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia'
+ }
}]
def _real_extract(self, url):
@@ -85,7 +96,10 @@ class LiveLeakIE(InfoExtractor):
'url': s['file'],
} for i, s in enumerate(sources)]
for i, s in enumerate(sources):
- orig_url = s['file'].replace('.h264_base.mp4', '')
+ # Removing '.h264_*.mp4' gives the raw video, which is essentially
+ # the same video without the LiveLeak logo at the top (see
+ # https://github.com/rg3/youtube-dl/pull/4768)
+ orig_url = re.sub(r'\.h264_.+?\.mp4', '', s['file'])
if s['file'] != orig_url:
formats.append({
'format_id': 'original-%s' % i,
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py
index cfd3b14..a00f6e5 100644
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -30,13 +30,13 @@ class LyndaBaseIE(InfoExtractor):
return
login_form = {
- 'username': username,
- 'password': password,
+ 'username': username.encode('utf-8'),
+ 'password': password.encode('utf-8'),
'remember': 'false',
'stayPut': 'false'
}
request = compat_urllib_request.Request(
- self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
login_page = self._download_webpage(
request, None, 'Logging in as %s' % username)
@@ -65,7 +65,7 @@ class LyndaBaseIE(InfoExtractor):
'stayPut': 'false',
}
request = compat_urllib_request.Request(
- self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
+ self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
login_page = self._download_webpage(
request, None,
'Confirming log in and log out from another device')
diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py
index 0b85a59..92511a6 100644
--- a/youtube_dl/extractor/malemotion.py
+++ b/youtube_dl/extractor/malemotion.py
@@ -2,9 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_unquote
class MalemotionIE(InfoExtractor):
@@ -24,7 +22,7 @@ class MalemotionIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- video_url = compat_urllib_parse.unquote(self._search_regex(
+ video_url = compat_urllib_parse_unquote(self._search_regex(
r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
video_title = self._html_search_regex(
r'<title>(.*?)</title', webpage, 'title')
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py
index 8bc333b..6e2e73a 100644
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -6,6 +6,7 @@ from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
@@ -155,7 +156,7 @@ class MetacafeIE(InfoExtractor):
video_url = None
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
if mobj is not None:
- mediaURL = compat_urllib_parse.unquote(mobj.group(1))
+ mediaURL = compat_urllib_parse_unquote(mobj.group(1))
video_ext = mediaURL[-3:]
# Extract gdaKey if available
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
index 7091f33..852d722 100644
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -5,6 +5,7 @@ import json
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import (
@@ -48,7 +49,7 @@ class MiTeleIE(InfoExtractor):
domain = 'http://' + domain
info_url = compat_urlparse.urljoin(
domain,
- compat_urllib_parse.unquote(embed_data['flashvars']['host'])
+ compat_urllib_parse_unquote(embed_data['flashvars']['host'])
)
info_el = self._download_xml(info_url, episode).find('./video/info')
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 425a4cc..d47aece 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -3,9 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
HEADRequest,
@@ -60,7 +58,7 @@ class MixcloudIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
uploader = mobj.group(1)
cloudcast_name = mobj.group(2)
- track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name)))
+ track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name)))
webpage = self._download_webpage(url, track_id)
diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py
index 2cec12d..9bf99a5 100644
--- a/youtube_dl/extractor/mofosex.py
+++ b/youtube_dl/extractor/mofosex.py
@@ -5,9 +5,9 @@ import re
from .common import InfoExtractor
from ..compat import (
+ compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
compat_urllib_request,
- compat_urllib_parse,
)
@@ -34,7 +34,7 @@ class MofosexIE(InfoExtractor):
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
- video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
+ video_url = compat_urllib_parse_unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2]
diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py
index 5b9b9fb..4557a2b 100644
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -35,7 +35,8 @@ class MySpassIE(InfoExtractor):
# get metadata
metadata_url = META_DATA_URL_TEMPLATE % video_id
- metadata = self._download_xml(metadata_url, video_id)
+ metadata = self._download_xml(
+ metadata_url, video_id, transform_source=lambda s: s.strip())
# extract values from metadata
url_flv_el = metadata.find('url_flv')
diff --git a/youtube_dl/extractor/myvi.py b/youtube_dl/extractor/myvi.py
new file mode 100644
index 0000000..4c65be1
--- /dev/null
+++ b/youtube_dl/extractor/myvi.py
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .vimple import SprutoBaseIE
+
+
+class MyviIE(SprutoBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ myvi\.(?:ru/player|tv)/
+ (?:
+ (?:
+ embed/html|
+ flash|
+ api/Video/Get
+ )/|
+ content/preloader\.swf\?.*\bid=
+ )
+ (?P<id>[\da-zA-Z_-]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
+ 'md5': '571bbdfba9f9ed229dc6d34cc0f335bf',
+ 'info_dict': {
+ 'id': 'f16b2bbd-cde8-481c-a981-7cd48605df43',
+ 'ext': 'mp4',
+ 'title': 'хозяин жизни',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 25,
+ },
+ }, {
+ 'url': 'http://myvi.ru/player/content/preloader.swf?id=oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wOYf1WFpPfc_bWTKGVf_Zafr0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://myvi.ru/player/api/Video/Get/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://myvi.tv/embed/html/oTGTNWdyz4Zwy_u1nraolwZ1odenTd9WkTnRfIL9y8VOgHYqOHApE575x4_xxS9Vn0?ap=0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def _extract_url(cls, webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//myvi\.(?:ru/player|tv)/(?:embed/html|flash)/[^"]+)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ spruto = self._download_json(
+ 'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
+
+ return self._extract_spruto(spruto, video_id)
diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py
index 5e754fc..c96f472 100644
--- a/youtube_dl/extractor/myvideo.py
+++ b/youtube_dl/extractor/myvideo.py
@@ -10,6 +10,7 @@ from .common import InfoExtractor
from ..compat import (
compat_ord,
compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
@@ -107,7 +108,7 @@ class MyVideoIE(InfoExtractor):
if not a == '_encxml':
params[a] = b
else:
- encxml = compat_urllib_parse.unquote(b)
+ encxml = compat_urllib_parse_unquote(b)
if not params.get('domain'):
params['domain'] = 'www.myvideo.de'
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
@@ -135,7 +136,7 @@ class MyVideoIE(InfoExtractor):
video_url = None
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
if mobj:
- video_url = compat_urllib_parse.unquote(mobj.group(1))
+ video_url = compat_urllib_parse_unquote(mobj.group(1))
if 'myvideo2flash' in video_url:
self.report_warning(
'Rewriting URL to use unencrypted rtmp:// ...',
@@ -147,10 +148,10 @@ class MyVideoIE(InfoExtractor):
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError('unable to extract url')
- video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
+ video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
- video_file = compat_urllib_parse.unquote(video_file)
+ video_file = compat_urllib_parse_unquote(video_file)
if not video_file.endswith('f4m'):
ppath, prefix = video_file.split('.')
@@ -159,7 +160,7 @@ class MyVideoIE(InfoExtractor):
video_playpath = ''
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
- video_swfobj = compat_urllib_parse.unquote(video_swfobj)
+ video_swfobj = compat_urllib_parse_unquote(video_swfobj)
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
webpage, 'title')
diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py
index c18640c..f793b72 100644
--- a/youtube_dl/extractor/nationalgeographic.py
+++ b/youtube_dl/extractor/nationalgeographic.py
@@ -25,8 +25,11 @@ class NationalGeographicIE(InfoExtractor):
name = url_basename(url)
webpage = self._download_webpage(url, name)
- feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
- guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
+ feed_url = self._search_regex(
+ r'data-feed-url="([^"]+)"', webpage, 'feed url')
+ guid = self._search_regex(
+ r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"',
+ webpage, 'guid')
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py
new file mode 100644
index 0000000..a8e0a64
--- /dev/null
+++ b/youtube_dl/extractor/neteasemusic.py
@@ -0,0 +1,459 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from hashlib import md5
+from base64 import b64encode
+from datetime import datetime
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_request,
+ compat_urllib_parse,
+ compat_str,
+ compat_itertools_count,
+)
+
+
+class NetEaseMusicBaseIE(InfoExtractor):
+ _FORMATS = ['bMusic', 'mMusic', 'hMusic']
+ _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
+ _API_BASE = 'http://music.163.com/api/'
+
+ @classmethod
+ def _encrypt(cls, dfsid):
+ salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
+ string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
+ salt_len = len(salt_bytes)
+ for i in range(len(string_bytes)):
+ string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
+ m = md5()
+ m.update(bytes(string_bytes))
+ result = b64encode(m.digest()).decode('ascii')
+ return result.replace('/', '_').replace('+', '-')
+
+ @classmethod
+ def extract_formats(cls, info):
+ formats = []
+ for song_format in cls._FORMATS:
+ details = info.get(song_format)
+ if not details:
+ continue
+ formats.append({
+ 'url': 'http://m1.music.126.net/%s/%s.%s' %
+ (cls._encrypt(details['dfsId']), details['dfsId'],
+ details['extension']),
+ 'ext': details.get('extension'),
+ 'abr': details.get('bitrate', 0) / 1000,
+ 'format_id': song_format,
+ 'filesize': details.get('size'),
+ 'asr': details.get('sr')
+ })
+ return formats
+
+ @classmethod
+ def convert_milliseconds(cls, ms):
+ return int(round(ms / 1000.0))
+
+ def query_api(self, endpoint, video_id, note):
+ req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint))
+ req.add_header('Referer', self._API_BASE)
+ return self._download_json(req, video_id, note)
+
+
+class NetEaseMusicIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:song'
+ IE_DESC = '网易云音乐'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://music.163.com/#/song?id=32102397',
+ 'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
+ 'info_dict': {
+ 'id': '32102397',
+ 'ext': 'mp3',
+ 'title': 'Bad Blood (feat. Kendrick Lamar)',
+ 'creator': 'Taylor Swift / Kendrick Lamar',
+ 'upload_date': '20150517',
+ 'timestamp': 1431878400,
+ 'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
+ },
+ }, {
+ 'note': 'No lyrics translation.',
+ 'url': 'http://music.163.com/#/song?id=29822014',
+ 'info_dict': {
+ 'id': '29822014',
+ 'ext': 'mp3',
+ 'title': '听见下雨的声音',
+ 'creator': '周杰伦',
+ 'upload_date': '20141225',
+ 'timestamp': 1419523200,
+ 'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
+ },
+ }, {
+ 'note': 'No lyrics.',
+ 'url': 'http://music.163.com/song?id=17241424',
+ 'info_dict': {
+ 'id': '17241424',
+ 'ext': 'mp3',
+ 'title': 'Opus 28',
+ 'creator': 'Dustin O\'Halloran',
+ 'upload_date': '20080211',
+ 'timestamp': 1202745600,
+ },
+ }, {
+ 'note': 'Has translated name.',
+ 'url': 'http://music.163.com/#/song?id=22735043',
+ 'info_dict': {
+ 'id': '22735043',
+ 'ext': 'mp3',
+ 'title': '소원을 말해봐 (Genie)',
+ 'creator': '少女时代',
+ 'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
+ 'upload_date': '20100127',
+ 'timestamp': 1264608000,
+ 'alt_title': '说出愿望吧(Genie)',
+ }
+ }]
+
+ def _process_lyrics(self, lyrics_info):
+ original = lyrics_info.get('lrc', {}).get('lyric')
+ translated = lyrics_info.get('tlyric', {}).get('lyric')
+
+ if not translated:
+ return original
+
+ lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
+ original_ts_texts = re.findall(lyrics_expr, original)
+ translation_ts_dict = dict(
+ (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
+ )
+ lyrics = '\n'.join([
+ '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
+ for time_stamp, text in original_ts_texts
+ ])
+ return lyrics
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+
+ params = {
+ 'id': song_id,
+ 'ids': '[%s]' % song_id
+ }
+ info = self.query_api(
+ 'song/detail?' + compat_urllib_parse.urlencode(params),
+ song_id, 'Downloading song info')['songs'][0]
+
+ formats = self.extract_formats(info)
+ self._sort_formats(formats)
+
+ lyrics_info = self.query_api(
+ 'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
+ song_id, 'Downloading lyrics data')
+ lyrics = self._process_lyrics(lyrics_info)
+
+ alt_title = None
+ if info.get('transNames'):
+ alt_title = '/'.join(info.get('transNames'))
+
+ return {
+ 'id': song_id,
+ 'title': info['name'],
+ 'alt_title': alt_title,
+ 'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
+ 'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
+ 'thumbnail': info.get('album', {}).get('picUrl'),
+ 'duration': self.convert_milliseconds(info.get('duration', 0)),
+ 'description': lyrics,
+ 'formats': formats,
+ }
+
+
+class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:album'
+ IE_DESC = '网易云音乐 - 专辑'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://music.163.com/#/album?id=220780',
+ 'info_dict': {
+ 'id': '220780',
+ 'title': 'B\'day',
+ },
+ 'playlist_count': 23,
+ }
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+
+ info = self.query_api(
+ 'album/%s?id=%s' % (album_id, album_id),
+ album_id, 'Downloading album data')['album']
+
+ name = info['name']
+ desc = info.get('description')
+ entries = [
+ self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
+ 'NetEaseMusic', song['id'])
+ for song in info['songs']
+ ]
+ return self.playlist_result(entries, album_id, name, desc)
+
+
+class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:singer'
+ IE_DESC = '网易云音乐 - 歌手'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'note': 'Singer has aliases.',
+ 'url': 'http://music.163.com/#/artist?id=10559',
+ 'info_dict': {
+ 'id': '10559',
+ 'title': '张惠妹 - aMEI;阿密特',
+ },
+ 'playlist_count': 50,
+ }, {
+ 'note': 'Singer has translated name.',
+ 'url': 'http://music.163.com/#/artist?id=124098',
+ 'info_dict': {
+ 'id': '124098',
+ 'title': '李昇基 - 이승기',
+ },
+ 'playlist_count': 50,
+ }]
+
+ def _real_extract(self, url):
+ singer_id = self._match_id(url)
+
+ info = self.query_api(
+ 'artist/%s?id=%s' % (singer_id, singer_id),
+ singer_id, 'Downloading singer data')
+
+ name = info['artist']['name']
+ if info['artist']['trans']:
+ name = '%s - %s' % (name, info['artist']['trans'])
+ if info['artist']['alias']:
+ name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
+
+ entries = [
+ self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
+ 'NetEaseMusic', song['id'])
+ for song in info['hotSongs']
+ ]
+ return self.playlist_result(entries, singer_id, name)
+
+
+class NetEaseMusicListIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:playlist'
+ IE_DESC = '网易云音乐 - 歌单'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://music.163.com/#/playlist?id=79177352',
+ 'info_dict': {
+ 'id': '79177352',
+ 'title': 'Billboard 2007 Top 100',
+ 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
+ },
+ 'playlist_count': 99,
+ }, {
+ 'note': 'Toplist/Charts sample',
+ 'url': 'http://music.163.com/#/discover/toplist?id=3733003',
+ 'info_dict': {
+ 'id': '3733003',
+ 'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
+ 'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
+ },
+ 'playlist_count': 50,
+ }]
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ info = self.query_api(
+ 'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
+ list_id, 'Downloading playlist data')['result']
+
+ name = info['name']
+ desc = info.get('description')
+
+ if info.get('specialType') == 10: # is a chart/toplist
+ datestamp = datetime.fromtimestamp(
+ self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
+ name = '%s %s' % (name, datestamp)
+
+ entries = [
+ self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
+ 'NetEaseMusic', song['id'])
+ for song in info['tracks']
+ ]
+ return self.playlist_result(entries, list_id, name, desc)
+
+
+class NetEaseMusicMvIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:mv'
+ IE_DESC = '网易云音乐 - MV'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://music.163.com/#/mv?id=415350',
+ 'info_dict': {
+ 'id': '415350',
+ 'ext': 'mp4',
+ 'title': '이럴거면 그러지말지',
+ 'description': '白雅言自作曲唱甜蜜爱情',
+ 'creator': '白雅言',
+ 'upload_date': '20150520',
+ },
+ }
+
+ def _real_extract(self, url):
+ mv_id = self._match_id(url)
+
+ info = self.query_api(
+ 'mv/detail?id=%s&type=mp4' % mv_id,
+ mv_id, 'Downloading mv info')['data']
+
+ formats = [
+ {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
+ for brs, mv_url in info['brs'].items()
+ ]
+ self._sort_formats(formats)
+
+ return {
+ 'id': mv_id,
+ 'title': info['name'],
+ 'description': info.get('desc') or info.get('briefDesc'),
+ 'creator': info['artistName'],
+ 'upload_date': info['publishTime'].replace('-', ''),
+ 'formats': formats,
+ 'thumbnail': info.get('cover'),
+ 'duration': self.convert_milliseconds(info.get('duration', 0)),
+ }
+
+
+class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:program'
+ IE_DESC = '网易云音乐 - 电台节目'
+ _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://music.163.com/#/program?id=10109055',
+ 'info_dict': {
+ 'id': '10109055',
+ 'ext': 'mp3',
+ 'title': '不丹足球背后的故事',
+ 'description': '喜马拉雅人的足球梦 ...',
+ 'creator': '大话西藏',
+ 'timestamp': 1434179342,
+ 'upload_date': '20150613',
+ 'duration': 900,
+ },
+ }, {
+ 'note': 'This program has accompanying songs.',
+ 'url': 'http://music.163.com/#/program?id=10141022',
+ 'info_dict': {
+ 'id': '10141022',
+ 'title': '25岁,你是自在如风的少年<27°C>',
+ 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
+ },
+ 'playlist_count': 4,
+ }, {
+ 'note': 'This program has accompanying songs.',
+ 'url': 'http://music.163.com/#/program?id=10141022',
+ 'info_dict': {
+ 'id': '10141022',
+ 'ext': 'mp3',
+ 'title': '25岁,你是自在如风的少年<27°C>',
+ 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
+ 'timestamp': 1434450841,
+ 'upload_date': '20150616',
+ },
+ 'params': {
+ 'noplaylist': True
+ }
+ }]
+
+ def _real_extract(self, url):
+ program_id = self._match_id(url)
+
+ info = self.query_api(
+ 'dj/program/detail?id=%s' % program_id,
+ program_id, 'Downloading program info')['program']
+
+ name = info['name']
+ description = info['description']
+
+ if not info['songs'] or self._downloader.params.get('noplaylist'):
+ if info['songs']:
+ self.to_screen(
+ 'Downloading just the main audio %s because of --no-playlist'
+ % info['mainSong']['id'])
+
+ formats = self.extract_formats(info['mainSong'])
+ self._sort_formats(formats)
+
+ return {
+ 'id': program_id,
+ 'title': name,
+ 'description': description,
+ 'creator': info['dj']['brand'],
+ 'timestamp': self.convert_milliseconds(info['createTime']),
+ 'thumbnail': info['coverUrl'],
+ 'duration': self.convert_milliseconds(info.get('duration', 0)),
+ 'formats': formats,
+ }
+
+ self.to_screen(
+ 'Downloading playlist %s - add --no-playlist to just download the main audio %s'
+ % (program_id, info['mainSong']['id']))
+
+ song_ids = [info['mainSong']['id']]
+ song_ids.extend([song['id'] for song in info['songs']])
+ entries = [
+ self.url_result('http://music.163.com/#/song?id=%s' % song_id,
+ 'NetEaseMusic', song_id)
+ for song_id in song_ids
+ ]
+ return self.playlist_result(entries, program_id, name, description)
+
+
+class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:djradio'
+ IE_DESC = '网易云音乐 - 电台'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://music.163.com/#/djradio?id=42',
+ 'info_dict': {
+ 'id': '42',
+ 'title': '声音蔓延',
+ 'description': 'md5:766220985cbd16fdd552f64c578a6b15'
+ },
+ 'playlist_mincount': 40,
+ }
+ _PAGE_SIZE = 1000
+
+ def _real_extract(self, url):
+ dj_id = self._match_id(url)
+
+ name = None
+ desc = None
+ entries = []
+ for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
+ info = self.query_api(
+ 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
+ % (self._PAGE_SIZE, dj_id, offset),
+ dj_id, 'Downloading dj programs - %d' % offset)
+
+ entries.extend([
+ self.url_result(
+ 'http://music.163.com/#/program?id=%s' % program['id'],
+ 'NetEaseMusicProgram', program['id'])
+ for program in info['programs']
+ ])
+
+ if name is None:
+ radio = info['programs'][0]['radio']
+ name = radio['name']
+ desc = radio['desc']
+
+ if not info['more']:
+ break
+
+ return self.playlist_result(entries, dj_id, name, desc)
diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py
index 85fcad0..5a9e73c 100644
--- a/youtube_dl/extractor/newstube.py
+++ b/youtube_dl/extractor/newstube.py
@@ -31,7 +31,7 @@ class NewstubeIE(InfoExtractor):
page = self._download_webpage(url, video_id, 'Downloading page')
video_guid = self._html_search_regex(
- r'<meta property="og:video" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+ r'<meta property="og:video:url" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
page, 'video GUID')
player = self._download_xml(
diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py
index d1b7cff..c10784f 100644
--- a/youtube_dl/extractor/nextmedia.py
+++ b/youtube_dl/extractor/nextmedia.py
@@ -6,6 +6,7 @@ from ..utils import parse_iso8601
class NextMediaIE(InfoExtractor):
+ IE_DESC = '蘋果日報'
_VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
@@ -66,6 +67,7 @@ class NextMediaIE(InfoExtractor):
class NextMediaActionNewsIE(NextMediaIE):
+ IE_DESC = '蘋果日報 - 動新聞'
_VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
_TESTS = [{
'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
@@ -90,6 +92,7 @@ class NextMediaActionNewsIE(NextMediaIE):
class AppleDailyIE(NextMediaIE):
+ IE_DESC = '臺灣蘋果日報'
_VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
_TESTS = [{
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py
index 2684dd2..dc54634 100644
--- a/youtube_dl/extractor/nfl.py
+++ b/youtube_dl/extractor/nfl.py
@@ -19,7 +19,7 @@ class NFLIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://
(?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
(?:.+?/)*
- (?P<id>(?:\d[a-z]{2}\d{13}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
+ (?P<id>(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
_TESTS = [
{
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
@@ -58,6 +58,10 @@ class NFLIE(InfoExtractor):
'upload_date': '20150202',
},
},
+ {
+ 'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
+ 'only_matching': True,
+ }
]
@staticmethod
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 3cecebf..0f8aa5a 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -182,7 +182,6 @@ class NiconicoIE(InfoExtractor):
extension = xpath_text(video_info, './/movie_type')
if not extension:
extension = determine_ext(video_real_url)
- video_format = extension.upper()
thumbnail = (
xpath_text(video_info, './/thumbnail_url') or
@@ -241,7 +240,7 @@ class NiconicoIE(InfoExtractor):
'url': video_real_url,
'title': title,
'ext': extension,
- 'format': video_format,
+ 'format_id': 'economy' if video_real_url.endswith('low') else 'normal',
'thumbnail': thumbnail,
'description': description,
'uploader': uploader,
diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py
index 664dc81..a53e27b 100644
--- a/youtube_dl/extractor/noco.py
+++ b/youtube_dl/extractor/noco.py
@@ -166,6 +166,10 @@ class NocoIE(InfoExtractor):
self._sort_formats(formats)
timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
+
+ if timestamp is not None and timestamp < 0:
+ timestamp = None
+
uploader = show.get('partner_name')
uploader_id = show.get('partner_key')
duration = float_or_none(show.get('duration_ms'), 1000)
@@ -191,7 +195,7 @@ class NocoIE(InfoExtractor):
if episode_number:
title += ' #' + compat_str(episode_number)
if episode:
- title += ' - ' + episode
+ title += ' - ' + compat_str(episode)
description = show.get('show_resume') or show.get('family_resume')
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py
index 173e46c..0b5ff47 100644
--- a/youtube_dl/extractor/nowtv.py
+++ b/youtube_dl/extractor/nowtv.py
@@ -133,7 +133,7 @@ class NowTVIE(InfoExtractor):
station = mobj.group('station')
info = self._download_json(
- 'https://api.nowtv.de/v3/movies/%s?fields=*,format,files' % display_id,
+ 'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
display_id)
video_id = compat_str(info['id'])
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 5d84485..0c2d02c 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
fix_xml_ampersands,
@@ -7,7 +9,6 @@ from ..utils import (
qualities,
strip_jsonp,
unified_strdate,
- url_basename,
)
@@ -16,13 +17,42 @@ class NPOBaseIE(InfoExtractor):
token_page = self._download_webpage(
'http://ida.omroep.nl/npoplayer/i.js',
video_id, note='Downloading token')
- return self._search_regex(
+ token = self._search_regex(
r'npoplayer\.token = "(.+?)"', token_page, 'token')
+ # Decryption algorithm extracted from http://npoplayer.omroep.nl/csjs/npoplayer-min.js
+ token_l = list(token)
+ first = second = None
+ for i in range(5, len(token_l) - 4):
+ if token_l[i].isdigit():
+ if first is None:
+ first = i
+ elif second is None:
+ second = i
+ if first is None or second is None:
+ first = 12
+ second = 13
+
+ token_l[first], token_l[second] = token_l[second], token_l[first]
+
+ return ''.join(token_l)
class NPOIE(NPOBaseIE):
- IE_NAME = 'npo.nl'
- _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
+ IE_NAME = 'npo'
+ IE_DESC = 'npo.nl and ntr.nl'
+ _VALID_URL = r'''(?x)
+ (?:
+ npo:|
+ https?://
+ (?:www\.)?
+ (?:
+ npo\.nl/(?!live|radio)(?:[^/]+/){2}|
+ ntr\.nl/(?:[^/]+/){2,}|
+ omroepwnl\.nl/video/fragment/[^/]+__
+ )
+ )
+ (?P<id>[^/?#]+)
+ '''
_TESTS = [
{
@@ -42,7 +72,7 @@ class NPOIE(NPOBaseIE):
'info_dict': {
'id': 'VARA_101191800',
'ext': 'm4v',
- 'title': 'De Mega Mike & Mega Thomas show',
+ 'title': 'De Mega Mike & Mega Thomas show: The best of.',
'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
'upload_date': '20090227',
'duration': 2400,
@@ -54,8 +84,8 @@ class NPOIE(NPOBaseIE):
'info_dict': {
'id': 'VPWON_1169289',
'ext': 'm4v',
- 'title': 'Tegenlicht',
- 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+ 'title': 'Tegenlicht: De toekomst komt uit Afrika',
+ 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
'upload_date': '20130225',
'duration': 3000,
},
@@ -84,6 +114,30 @@ class NPOIE(NPOBaseIE):
'title': 'Hoe gaat Europa verder na Parijs?',
},
},
+ {
+ 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
+ 'md5': '01c6a2841675995da1f0cf776f03a9c3',
+ 'info_dict': {
+ 'id': 'VPWON_1233944',
+ 'ext': 'm4v',
+ 'title': 'Aap, poot, pies',
+ 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde',
+ 'upload_date': '20150508',
+ 'duration': 599,
+ },
+ },
+ {
+ 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
+ 'md5': 'd30cd8417b8b9bca1fdff27428860d08',
+ 'info_dict': {
+ 'id': 'POW_00996502',
+ 'ext': 'm4v',
+ 'title': '''"Dit is wel een 'landslide'..."''',
+ 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
+ 'upload_date': '20150508',
+ 'duration': 462,
+ },
+ }
]
def _real_extract(self, url):
@@ -92,12 +146,24 @@ class NPOIE(NPOBaseIE):
def _get_info(self, video_id):
metadata = self._download_json(
- 'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
+ 'http://e.omroep.nl/metadata/%s' % video_id,
video_id,
# We have to remove the javascript callback
transform_source=strip_jsonp,
)
+ # For some videos actual video id (prid) is different (e.g. for
+ # http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
+ # video id is POMS_WNL_853698 but prid is POW_00996502)
+ video_id = metadata.get('prid') or video_id
+
+ # titel is too generic in some cases so utilize aflevering_titel as well
+ # when available (e.g. http://tegenlicht.vpro.nl/afleveringen/2014-2015/access-to-africa.html)
+ title = metadata['titel']
+ sub_title = metadata.get('aflevering_titel')
+ if sub_title and sub_title != title:
+ title += ': %s' % sub_title
+
token = self._get_token(video_id)
formats = []
@@ -170,8 +236,8 @@ class NPOIE(NPOBaseIE):
return {
'id': video_id,
- 'title': metadata['titel'],
- 'description': metadata['info'],
+ 'title': title,
+ 'description': metadata.get('info'),
'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
'upload_date': unified_strdate(metadata.get('gidsdatum')),
'duration': parse_duration(metadata.get('tijdsduur')),
@@ -340,9 +406,8 @@ class NPORadioFragmentIE(InfoExtractor):
}
-class TegenlichtVproIE(NPOIE):
- IE_NAME = 'tegenlicht.vpro.nl'
- _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
+class VPROIE(NPOIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
_TESTS = [
{
@@ -351,17 +416,72 @@ class TegenlichtVproIE(NPOIE):
'info_dict': {
'id': 'VPWON_1169289',
'ext': 'm4v',
- 'title': 'Tegenlicht',
- 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+ 'title': 'De toekomst komt uit Afrika',
+ 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
'upload_date': '20130225',
},
},
+ {
+ 'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
+ 'info_dict': {
+ 'id': 'sergio-herman',
+ 'title': 'Sergio Herman: Fucking perfect',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ # playlist with youtube embed
+ 'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html',
+ 'info_dict': {
+ 'id': 'education-education',
+ 'title': '2Doc',
+ },
+ 'playlist_count': 2,
+ }
]
def _real_extract(self, url):
- name = url_basename(url)
- webpage = self._download_webpage(url, name)
- urn = self._html_search_meta('mediaurn', webpage)
- info_page = self._download_json(
- 'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
- return self._get_info(info_page['mid'])
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
+ for video_id in re.findall(r'data-media-id="([^"]+)"', webpage)
+ ]
+
+ playlist_title = self._search_regex(
+ r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>',
+ webpage, 'playlist title', default=None) or self._og_search_title(webpage)
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
+
+
+class WNLIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
+
+ _TEST = {
+ 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
+ 'info_dict': {
+ 'id': 'vandaag-de-dag-6-mei',
+ 'title': 'Vandaag de Dag 6 mei',
+ },
+ 'playlist_count': 4,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('npo:%s' % video_id, 'NPO')
+ for video_id, part in re.findall(
+ r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage)
+ ]
+
+ playlist_title = self._html_search_regex(
+ r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>',
+ webpage, 'playlist title')
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index cc70c29..d066a96 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -13,7 +13,7 @@ from ..utils import (
class NRKIE(InfoExtractor):
- _VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
+ _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
_TESTS = [
{
@@ -76,7 +76,7 @@ class NRKIE(InfoExtractor):
class NRKPlaylistIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
@@ -116,11 +116,12 @@ class NRKPlaylistIE(InfoExtractor):
class NRKTVIE(InfoExtractor):
- _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
+ IE_DESC = 'NRK TV and NRK Radio'
+ _VALID_URL = r'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
_TESTS = [
{
- 'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
+ 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
'info_dict': {
'id': 'MUHH48000314',
@@ -132,7 +133,7 @@ class NRKTVIE(InfoExtractor):
},
},
{
- 'url': 'http://tv.nrk.no/program/mdfp15000514',
+ 'url': 'https://tv.nrk.no/program/mdfp15000514',
'md5': '383650ece2b25ecec996ad7b5bb2a384',
'info_dict': {
'id': 'mdfp15000514',
@@ -145,7 +146,7 @@ class NRKTVIE(InfoExtractor):
},
{
# single playlist video
- 'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
+ 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
'md5': 'adbd1dbd813edaf532b0a253780719c2',
'info_dict': {
'id': 'MSPO40010515-part2',
@@ -157,7 +158,7 @@ class NRKTVIE(InfoExtractor):
'skip': 'Only works from Norway',
},
{
- 'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
+ 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
'playlist': [
{
'md5': '9480285eff92d64f06e02a5367970a7a',
@@ -188,6 +189,10 @@ class NRKTVIE(InfoExtractor):
'duration': 6947.5199999999995,
},
'skip': 'Only works from Norway',
+ },
+ {
+ 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
+ 'only_matching': True,
}
]
@@ -206,7 +211,8 @@ class NRKTVIE(InfoExtractor):
]}
def _extract_f4m(self, manifest_url, video_id):
- return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
+ return self._extract_f4m_formats(
+ manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id, f4m_id='hds')
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -268,7 +274,7 @@ class NRKTVIE(InfoExtractor):
m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
if m3u8_url:
- formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))
+ formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4', m3u8_id='hls'))
self._sort_formats(formats)
subtitles_url = self._html_search_regex(
diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py
index 6c7149f..215ffe8 100644
--- a/youtube_dl/extractor/odnoklassniki.py
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -2,7 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
unified_strdate,
int_or_none,
@@ -62,7 +62,7 @@ class OdnoklassnikiIE(InfoExtractor):
metadata = self._parse_json(metadata, video_id)
else:
metadata = self._download_json(
- compat_urllib_parse.unquote(flashvars['metadataUrl']),
+ compat_urllib_parse_unquote(flashvars['metadataUrl']),
video_id, 'Downloading metadata JSON')
movie = metadata['movie']
diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py
new file mode 100644
index 0000000..0f1f448
--- /dev/null
+++ b/youtube_dl/extractor/onionstudios.py
@@ -0,0 +1,76 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class OnionStudiosIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)'
+
+ _TESTS = [{
+ 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
+ 'md5': 'd4851405d31adfadf71cd7a487b765bb',
+ 'info_dict': {
+ 'id': '2937',
+ 'ext': 'mp4',
+ 'title': 'Hannibal charges forward, stops for a cocktail',
+ 'description': 'md5:545299bda6abf87e5ec666548c6a9448',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'The A.V. Club',
+ 'uploader_id': 'TheAVClub',
+ },
+ }, {
+ 'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/embed.+?)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.onionstudios.com/embed?id=%s' % video_id, video_id)
+
+ formats = []
+ for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage):
+ if determine_ext(src) != 'm3u8': # m3u8 always results in 403
+ formats.append({
+ 'url': src,
+ })
+ self._sort_formats(formats)
+
+ title = self._search_regex(
+ r'share_title\s*=\s*(["\'])(?P<title>[^\1]+?)\1',
+ webpage, 'title', group='title')
+ description = self._search_regex(
+ r'share_description\s*=\s*(["\'])(?P<description>[^\1]+?)\1',
+ webpage, 'description', default=None, group='description')
+ thumbnail = self._search_regex(
+ r'poster\s*=\s*(["\'])(?P<thumbnail>[^\1]+?)\1',
+ webpage, 'thumbnail', default=False, group='thumbnail')
+
+ uploader_id = self._search_regex(
+ r'twitter_handle\s*=\s*(["\'])(?P<uploader_id>[^\1]+?)\1',
+ webpage, 'uploader id', fatal=False, group='uploader_id')
+ uploader = self._search_regex(
+ r'window\.channelName\s*=\s*(["\'])Embedded:(?P<uploader>[^\1]+?)\1',
+ webpage, 'uploader', default=False, group='uploader')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/openfilm.py b/youtube_dl/extractor/openfilm.py
index 2249657..d2ceedd 100644
--- a/youtube_dl/extractor/openfilm.py
+++ b/youtube_dl/extractor/openfilm.py
@@ -3,9 +3,9 @@ from __future__ import unicode_literals
import json
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote_plus
from ..utils import (
parse_iso8601,
- compat_urllib_parse,
parse_age_limit,
int_or_none,
)
@@ -37,7 +37,7 @@ class OpenFilmIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
- player = compat_urllib_parse.unquote_plus(
+ player = compat_urllib_parse_unquote_plus(
self._og_search_video_url(webpage))
video = json.loads(self._search_regex(
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 143a766..fec5d65 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -1,3 +1,4 @@
+# coding: utf-8
from __future__ import unicode_literals
import re
@@ -35,6 +36,9 @@ class PBSIE(InfoExtractor):
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
'duration': 3190,
},
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
},
{
'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
@@ -46,6 +50,9 @@ class PBSIE(InfoExtractor):
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
'duration': 5050,
},
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ }
},
{
'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
@@ -68,7 +75,10 @@ class PBSIE(InfoExtractor):
'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
'duration': 6559,
'thumbnail': 're:^https?://.*\.jpg$',
- }
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
},
{
'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
@@ -82,7 +92,10 @@ class PBSIE(InfoExtractor):
'duration': 3172,
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20140122',
- }
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
},
{
'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
@@ -90,6 +103,21 @@ class PBSIE(InfoExtractor):
'id': 'united-states-of-secrets',
},
'playlist_count': 2,
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',
+ 'info_dict': {
+ 'id': '2280706814',
+ 'display_id': 'player',
+ 'ext': 'mp4',
+ 'title': 'Death and the Civil War',
+ 'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
+ 'duration': 6705,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
}
]
@@ -123,7 +151,7 @@ class PBSIE(InfoExtractor):
return media_id, presumptive_id, upload_date
url = self._search_regex(
- r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
+ r'<iframe\s+[^>]*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
webpage, 'player URL')
mobj = re.match(self._VALID_URL, url)
@@ -196,6 +224,14 @@ class PBSIE(InfoExtractor):
rating_str = rating_str.rpartition('-')[2]
age_limit = US_RATINGS.get(rating_str)
+ subtitles = {}
+ closed_captions_url = info.get('closed_captions_url')
+ if closed_captions_url:
+ subtitles['en'] = [{
+ 'ext': 'ttml',
+ 'url': closed_captions_url,
+ }]
+
return {
'id': video_id,
'display_id': display_id,
@@ -206,4 +242,5 @@ class PBSIE(InfoExtractor):
'age_limit': age_limit,
'upload_date': upload_date,
'formats': formats,
+ 'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py
index c66db3c..788411c 100644
--- a/youtube_dl/extractor/photobucket.py
+++ b/youtube_dl/extractor/photobucket.py
@@ -4,7 +4,7 @@ import json
import re
from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote
class PhotobucketIE(InfoExtractor):
@@ -34,7 +34,7 @@ class PhotobucketIE(InfoExtractor):
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
webpage, 'info json')
info = json.loads(info_json)
- url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
+ url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
return {
'id': video_id,
'url': url,
diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py
new file mode 100644
index 0000000..a52210f
--- /dev/null
+++ b/youtube_dl/extractor/pinkbike.py
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ remove_end,
+ remove_start,
+ str_to_int,
+ unified_strdate,
+)
+
+
+class PinkbikeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.pinkbike.com/video/402811/',
+ 'md5': '4814b8ca7651034cd87e3361d5c2155a',
+ 'info_dict': {
+ 'id': '402811',
+ 'ext': 'mp4',
+ 'title': 'Brandon Semenuk - RAW 100',
+ 'description': 'Official release: www.redbull.ca/rupertwalker',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 100,
+ 'upload_date': '20150406',
+ 'uploader': 'revelco',
+ 'location': 'Victoria, British Columbia, Canada',
+ 'view_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.pinkbike.com/video/%s' % video_id, video_id)
+
+ formats = []
+ for _, format_id, src in re.findall(
+ r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage):
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None))
+ formats.append({
+ 'url': src,
+ 'format_id': format_id,
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike')
+ description = self._html_search_regex(
+ r'(?s)id="media-description"[^>]*>(.+?)<',
+ webpage, 'description', default=None) or remove_start(
+ self._og_search_description(webpage), title + '. ')
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = int_or_none(self._html_search_meta(
+ 'video:duration', webpage, 'duration'))
+
+ uploader = self._search_regex(
+ r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
+ upload_date = unified_strdate(self._search_regex(
+ r'class="fullTime"[^>]+title="([^"]+)"',
+ webpage, 'upload date', fatal=False))
+
+ location = self._html_search_regex(
+ r'(?s)<dt>Location</dt>\s*<dd>(.+?)<',
+ webpage, 'location', fatal=False)
+
+ def extract_count(webpage, label):
+ return str_to_int(self._search_regex(
+ r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label,
+ webpage, label, fatal=False))
+
+ view_count = extract_count(webpage, 'Views')
+ comment_count = extract_count(webpage, 'Comments')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'location': location,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'formats': formats
+ }
diff --git a/youtube_dl/extractor/planetaplay.py b/youtube_dl/extractor/planetaplay.py
index 596c621..06505e9 100644
--- a/youtube_dl/extractor/planetaplay.py
+++ b/youtube_dl/extractor/planetaplay.py
@@ -18,7 +18,8 @@ class PlanetaPlayIE(InfoExtractor):
'id': '3586',
'ext': 'flv',
'title': 'md5:e829428ee28b1deed00de90de49d1da1',
- }
+ },
+ 'skip': 'Not accessible from Travis CI server',
}
_SONG_FORMATS = {
diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py
index 45716c7..8a1c296 100644
--- a/youtube_dl/extractor/played.py
+++ b/youtube_dl/extractor/played.py
@@ -38,9 +38,7 @@ class PlayedIE(InfoExtractor):
if m_error:
raise ExtractorError(m_error.group('msg'), expected=True)
- fields = re.findall(
- r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage)
- data = dict(fields)
+ data = self._hidden_inputs(orig_webpage)
self._sleep(2, video_id)
diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py
index c3e667e..2eb4fd9 100644
--- a/youtube_dl/extractor/playvid.py
+++ b/youtube_dl/extractor/playvid.py
@@ -4,7 +4,8 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
)
from ..utils import (
clean_html,
@@ -44,7 +45,7 @@ class PlayvidIE(InfoExtractor):
flashvars = self._html_search_regex(
r'flashvars="(.+?)"', webpage, 'flashvars')
- infos = compat_urllib_parse.unquote(flashvars).split(r'&')
+ infos = compat_urllib_parse_unquote(flashvars).split(r'&')
for info in infos:
videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
if videovars_match:
@@ -52,7 +53,7 @@ class PlayvidIE(InfoExtractor):
val = videovars_match.group(2)
if key == 'title':
- video_title = compat_urllib_parse.unquote_plus(val)
+ video_title = compat_urllib_parse_unquote_plus(val)
if key == 'duration':
try:
duration = int(val)
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index daa284e..0b78868 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -5,7 +5,8 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlparse,
compat_urllib_request,
)
@@ -19,8 +20,8 @@ from ..aes import (
class PornHubIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
+ _TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': '882f488fa1f0026f023f33576004a2ed',
'info_dict': {
@@ -30,7 +31,17 @@ class PornHubIE(InfoExtractor):
"title": "Seductive Indian beauty strips down and fingers her pink pussy",
"age_limit": 18
}
- }
+ }, {
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def _extract_url(cls, webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage)
+ if mobj:
+ return mobj.group('url')
def _extract_count(self, pattern, webpage, name):
return str_to_int(self._search_regex(
@@ -39,7 +50,8 @@ class PornHubIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- req = compat_urllib_request.Request(url)
+ req = compat_urllib_request.Request(
+ 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
@@ -58,7 +70,7 @@ class PornHubIE(InfoExtractor):
webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
if thumbnail:
- thumbnail = compat_urllib_parse.unquote(thumbnail)
+ thumbnail = compat_urllib_parse_unquote(thumbnail)
view_count = self._extract_count(
r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
@@ -69,9 +81,9 @@ class PornHubIE(InfoExtractor):
comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
- video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
+ video_urls = list(map(compat_urllib_parse_unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
- password = compat_urllib_parse.unquote_plus(
+ password = compat_urllib_parse_unquote_plus(
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py
index 9688ed9..eba4dfb 100644
--- a/youtube_dl/extractor/pornovoisines.py
+++ b/youtube_dl/extractor/pornovoisines.py
@@ -34,7 +34,7 @@ class PornoVoisinesIE(InfoExtractor):
'duration': 120,
'view_count': int,
'average_rating': float,
- 'categories': ['Débutante', 'Scénario', 'Sodomie'],
+ 'categories': ['Débutantes', 'Scénario', 'Sodomie'],
'age_limit': 18,
}
}
@@ -71,7 +71,7 @@ class PornoVoisinesIE(InfoExtractor):
view_count = int_or_none(self._search_regex(
r'(\d+) vues', webpage, 'view count', fatal=False))
average_rating = self._search_regex(
- r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False)
+ r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False)
if average_rating:
average_rating = float_or_none(average_rating.replace(',', '.'))
diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py
index 01cc3d9..304359d 100644
--- a/youtube_dl/extractor/primesharetv.py
+++ b/youtube_dl/extractor/primesharetv.py
@@ -1,7 +1,5 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
@@ -31,12 +29,7 @@ class PrimeShareTVIE(InfoExtractor):
if '>File not exist<' in webpage:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
- fields = dict(re.findall(r'''(?x)<input\s+
- type="hidden"\s+
- name="([^"]+)"\s+
- (?:id="[^"]+"\s+)?
- value="([^"]*)"
- ''', webpage))
+ fields = self._hidden_inputs(webpage)
headers = {
'Referer': url,
diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py
index f536e6e..8190ed6 100644
--- a/youtube_dl/extractor/promptfile.py
+++ b/youtube_dl/extractor/promptfile.py
@@ -35,10 +35,7 @@ class PromptFileIE(InfoExtractor):
raise ExtractorError('Video %s does not exist' % video_id,
expected=True)
- fields = dict(re.findall(r'''(?x)type="hidden"\s+
- name="(.+?)"\s+
- value="(.*?)"
- ''', webpage))
+ fields = self._hidden_inputs(webpage)
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py
index 255d4ab..fec008c 100644
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -9,8 +9,9 @@ from ..compat import (
compat_urllib_parse,
)
from ..utils import (
- unified_strdate,
+ determine_ext,
int_or_none,
+ unified_strdate,
)
@@ -21,6 +22,11 @@ class ProSiebenSat1IE(InfoExtractor):
_TESTS = [
{
+ # Tests changes introduced in https://github.com/rg3/youtube-dl/pull/6242
+ # in response to fixing https://github.com/rg3/youtube-dl/issues/6215:
+ # - malformed f4m manifest support
+ # - proper handling of URLs starting with `https?://` in 2.0 manifests
+ # - recursive child f4m manifests extraction
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
'info_dict': {
'id': '2104602',
@@ -177,6 +183,7 @@ class ProSiebenSat1IE(InfoExtractor):
r'<header class="clearfix">\s*<h3>(.+?)</h3>',
r'<!-- start video -->\s*<h1>(.+?)</h1>',
r'<h1 class="att-name">\s*(.+?)</h1>',
+ r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
]
_DESCRIPTION_REGEXES = [
r'<p itemprop="description">\s*(.+?)</p>',
@@ -206,8 +213,8 @@ class ProSiebenSat1IE(InfoExtractor):
def _extract_clip(self, url, webpage):
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
- access_token = 'testclient'
- client_name = 'kolibri-1.2.5'
+ access_token = 'prosieben'
+ client_name = 'kolibri-2.0.19-splec4'
client_location = url
videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({
@@ -274,23 +281,30 @@ class ProSiebenSat1IE(InfoExtractor):
for source in urls_sources:
protocol = source['protocol']
+ source_url = source['url']
if protocol == 'rtmp' or protocol == 'rtmpe':
- mobj = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', source['url'])
+ mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
if not mobj:
continue
+ path = mobj.group('path')
+ mp4colon_index = path.rfind('mp4:')
+ app = path[:mp4colon_index]
+ play_path = path[mp4colon_index:]
formats.append({
- 'url': mobj.group('url'),
- 'app': mobj.group('app'),
- 'play_path': mobj.group('playpath'),
+ 'url': '%s/%s' % (mobj.group('url'), app),
+ 'app': app,
+ 'play_path': play_path,
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
'page_url': 'http://www.prosieben.de',
'vbr': fix_bitrate(source['bitrate']),
'ext': 'mp4',
'format_id': '%s_%s' % (source['cdn'], source['bitrate']),
})
+ elif 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(source_url, clip_id))
else:
formats.append({
- 'url': source['url'],
+ 'url': source_url,
'vbr': fix_bitrate(source['bitrate']),
})
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py
index f773332..1654a64 100644
--- a/youtube_dl/extractor/qqmusic.py
+++ b/youtube_dl/extractor/qqmusic.py
@@ -9,26 +9,48 @@ from .common import InfoExtractor
from ..utils import (
strip_jsonp,
unescapeHTML,
+ clean_html,
)
from ..compat import compat_urllib_request
class QQMusicIE(InfoExtractor):
IE_NAME = 'qqmusic'
+ IE_DESC = 'QQ音乐'
_VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
_TESTS = [{
'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
- 'md5': 'bed90b6db2a7a7a7e11bc585f471f63a',
+ 'md5': '9ce1c1c8445f561506d2e3cfb0255705',
'info_dict': {
'id': '004295Et37taLD',
- 'ext': 'm4a',
+ 'ext': 'mp3',
'title': '可惜没如果',
'upload_date': '20141227',
'creator': '林俊杰',
'description': 'md5:d327722d0361576fde558f1ac68a7065',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'note': 'There is no mp3-320 version of this song.',
+ 'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV',
+ 'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
+ 'info_dict': {
+ 'id': '004MsGEo3DdNxV',
+ 'ext': 'mp3',
+ 'title': '如果',
+ 'upload_date': '20050626',
+ 'creator': '李季美',
+ 'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
+ 'thumbnail': 're:^https?://.*\.jpg$',
}
}]
+ _FORMATS = {
+ 'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
+ 'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
+ 'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10}
+ }
+
# Reference: m_r_GetRUin() in top_player.js
# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
@staticmethod
@@ -62,21 +84,42 @@ class QQMusicIE(InfoExtractor):
if lrc_content:
lrc_content = lrc_content.replace('\\n', '\n')
+ thumbnail_url = None
+ albummid = self._search_regex(
+ [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
+ detail_info_page, 'album mid', default=None)
+ if albummid:
+ thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \
+ % (albummid[-2:-1], albummid[-1], albummid)
+
guid = self.m_r_get_ruin()
vkey = self._download_json(
'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
mid, note='Retrieve vkey', errnote='Unable to get vkey',
transform_source=strip_jsonp)['key']
- song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid)
+
+ formats = []
+ for format_id, details in self._FORMATS.items():
+ formats.append({
+ 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0'
+ % (details['prefix'], mid, details['ext'], vkey, guid),
+ 'format': format_id,
+ 'format_id': format_id,
+ 'preference': details['preference'],
+ 'abr': details.get('abr'),
+ })
+ self._check_formats(formats, mid)
+ self._sort_formats(formats)
return {
'id': mid,
- 'url': song_url,
+ 'formats': formats,
'title': song_name,
'upload_date': publish_time,
'creator': singer,
'description': lrc_content,
+ 'thumbnail': thumbnail_url,
}
@@ -100,6 +143,7 @@ class QQPlaylistBaseIE(InfoExtractor):
class QQMusicSingerIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:singer'
+ IE_DESC = 'QQ音乐 - 歌手'
_VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
_TEST = {
'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
@@ -144,39 +188,50 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
class QQMusicAlbumIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:album'
+ IE_DESC = 'QQ音乐 - 专辑'
_VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
- _TEST = {
- 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0',
+ _TESTS = [{
+ 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',
'info_dict': {
'id': '000gXCTb2AhRR1',
'title': '我们都是这样长大的',
- 'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6',
+ 'description': 'md5:179c5dce203a5931970d306aa9607ea6',
},
'playlist_count': 4,
- }
+ }, {
+ 'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3',
+ 'info_dict': {
+ 'id': '002Y5a3b3AlCu3',
+ 'title': '그리고...',
+ 'description': 'md5:a48823755615508a95080e81b51ba729',
+ },
+ 'playlist_count': 8,
+ }]
def _real_extract(self, url):
mid = self._match_id(url)
- album_page = self._download_webpage(
- self.qq_static_url('album', mid), mid, 'Download album page')
-
- entries = self.get_entries_from_page(album_page)
+ album = self._download_json(
+ 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid,
+ mid, 'Download album page')['data']
- album_name = self._html_search_regex(
- r"albumname\s*:\s*'([^']+)',", album_page, 'album name',
- default=None)
-
- album_detail = self._html_search_regex(
- r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>',
- album_page, 'album details', default=None)
+ entries = [
+ self.url_result(
+ 'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
+ ) for song in album['list']
+ ]
+ album_name = album.get('name')
+ album_detail = album.get('desc')
+ if album_detail is not None:
+ album_detail = album_detail.strip()
return self.playlist_result(entries, mid, album_name, album_detail)
class QQMusicToplistIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:toplist'
+ IE_DESC = 'QQ音乐 - 排行榜'
_VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
_TESTS = [{
@@ -226,3 +281,37 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
list_name = topinfo.get('ListName')
list_description = topinfo.get('info')
return self.playlist_result(entries, list_id, list_name, list_description)
+
+
+class QQMusicPlaylistIE(QQPlaylistBaseIE):
+ IE_NAME = 'qqmusic:playlist'
+ IE_DESC = 'QQ音乐 - 歌单'
+ _VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
+
+ _TEST = {
+ 'url': 'http://y.qq.com/#type=taoge&id=3462654915',
+ 'info_dict': {
+ 'id': '3462654915',
+ 'title': '韩国5月新歌精选下旬',
+ 'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
+ },
+ 'playlist_count': 40,
+ }
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ list_json = self._download_json(
+ 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s'
+ % list_id, list_id, 'Download list page',
+ transform_source=strip_jsonp)['cdlist'][0]
+
+ entries = [
+ self.url_result(
+ 'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
+ ) for song in list_json['songlist']
+ ]
+
+ list_name = list_json.get('dissname')
+ list_description = clean_html(unescapeHTML(list_json.get('desc')))
+ return self.playlist_result(entries, list_id, list_name, list_description)
diff --git a/youtube_dl/extractor/quickvid.py b/youtube_dl/extractor/quickvid.py
index af7d76c..f414e23 100644
--- a/youtube_dl/extractor/quickvid.py
+++ b/youtube_dl/extractor/quickvid.py
@@ -24,6 +24,7 @@ class QuickVidIE(InfoExtractor):
'thumbnail': 're:^https?://.*\.(?:png|jpg|gif)$',
'view_count': int,
},
+ 'skip': 'Not accessible from Travis CI server',
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/rds.py b/youtube_dl/extractor/rds.py
new file mode 100644
index 0000000..796adfd
--- /dev/null
+++ b/youtube_dl/extractor/rds.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class RDSIE(InfoExtractor):
+ IE_DESC = 'RDS.ca'
+ _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
+ 'info_dict': {
+ 'id': '3.1132799',
+ 'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
+ 'ext': 'mp4',
+ 'title': 'Fowler Jr. prend la direction de Jacksonville',
+ 'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
+ 'timestamp': 1430397346,
+ 'upload_date': '20150430',
+ 'duration': 154.354,
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ # TODO: extract f4m from 9c9media.com
+ video_url = self._search_regex(
+ r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"',
+ webpage, 'video url')
+
+ title = self._og_search_title(webpage) or self._html_search_meta(
+ 'title', webpage, 'title', fatal=True)
+ description = self._og_search_description(webpage) or self._html_search_meta(
+ 'description', webpage, 'description')
+ thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(
+ [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
+ r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
+ webpage, 'thumbnail', fatal=False)
+ timestamp = parse_iso8601(self._search_regex(
+ r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"',
+ webpage, 'upload date', fatal=False))
+ duration = parse_duration(self._search_regex(
+ r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"',
+ webpage, 'duration', fatal=False))
+ age_limit = self._family_friendly_search(webpage)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ }
diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py
index 5a381d9..e4215d5 100644
--- a/youtube_dl/extractor/rtbf.py
+++ b/youtube_dl/extractor/rtbf.py
@@ -21,6 +21,13 @@ class RTBFIE(InfoExtractor):
}
}
+ _QUALITIES = [
+ ('mobile', 'mobile'),
+ ('web', 'SD'),
+ ('url', 'MD'),
+ ('high', 'HD'),
+ ]
+
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -32,14 +39,21 @@ class RTBFIE(InfoExtractor):
r'data-video="([^"]+)"', webpage, 'data video')),
video_id)
- video_url = data.get('downloadUrl') or data.get('url')
-
if data.get('provider').lower() == 'youtube':
+ video_url = data.get('downloadUrl') or data.get('url')
return self.url_result(video_url, 'Youtube')
+ formats = []
+ for key, format_id in self._QUALITIES:
+ format_url = data['sources'].get(key)
+ if format_url:
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ })
return {
'id': video_id,
- 'url': video_url,
+ 'formats': formats,
'title': data['title'],
'description': data.get('description') or data.get('subtitle'),
'thumbnail': data.get('thumbnail'),
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index cfce455..e0c530d 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -12,10 +12,10 @@ class RtlNlIE(InfoExtractor):
IE_NAME = 'rtl.nl'
IE_DESC = 'rtl.nl and rtlxl.nl'
_VALID_URL = r'''(?x)
- https?://(www\.)?
+ https?://(?:www\.)?
(?:
rtlxl\.nl/\#!/[^/]+/|
- rtl\.nl/system/videoplayer/[^?#]+?/video_embed\.html\#uuid=
+ rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=
)
(?P<id>[0-9a-f-]+)'''
@@ -43,22 +43,51 @@ class RtlNlIE(InfoExtractor):
'upload_date': '20150215',
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
}
+ }, {
+ # empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
+ 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
+ 'info_dict': {
+ 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
+ 'ext': 'mp4',
+ 'title': 'RTL Nieuws - Meer beelden van overval juwelier',
+ 'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
+ 'timestamp': 1437233400,
+ 'upload_date': '20150718',
+ 'duration': 30.474,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # encrypted m3u8 streams, georestricted
+ 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
+ 'only_matching': True,
}]
def _real_extract(self, url):
uuid = self._match_id(url)
info = self._download_json(
- 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
+ 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid,
uuid)
material = info['material'][0]
- progname = info['abstracts'][0]['name']
- subtitle = material['title'] or info['episodes'][0]['name']
- description = material.get('synopsis') or info['episodes'][0]['synopsis']
+ title = info['abstracts'][0]['name']
+ subtitle = material.get('title')
+ if subtitle:
+ title += ' - %s' % subtitle
+ description = material.get('synopsis')
+
+ meta = info.get('meta', {})
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
- videopath = material['videopath'].replace('.f4m', '.m3u8')
- m3u8_url = 'http://manifest.us.rtl.nl' + videopath
+ # NB: nowadays, recent ffmpeg and avconv can handle these encrypted streams, so
+ # this adaptive -> flash workaround is not required in general, but it also
+ # allows bypassing georestriction therefore is retained for now.
+ videopath = material['videopath'].replace('/adaptive/', '/flash/')
+ m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
@@ -79,7 +108,7 @@ class RtlNlIE(InfoExtractor):
self._sort_formats(formats)
thumbnails = []
- meta = info.get('meta', {})
+
for p in ('poster_base_url', '"thumb_base_url"'):
if not meta.get(p):
continue
@@ -95,7 +124,7 @@ class RtlNlIE(InfoExtractor):
return {
'id': uuid,
- 'title': '%s - %s' % (progname, subtitle),
+ 'title': title,
'formats': formats,
'timestamp': material['original_date'],
'description': description,
diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py
new file mode 100644
index 0000000..4e22628
--- /dev/null
+++ b/youtube_dl/extractor/ruutu.py
@@ -0,0 +1,119 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ xpath_text,
+)
+
+
+class RuutuIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?ruutu\.fi/ohjelmat/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
+ _TESTS = [
+ {
+ 'url': 'http://www.ruutu.fi/ohjelmat/oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
+ 'md5': 'ab2093f39be1ca8581963451b3c0234f',
+ 'info_dict': {
+ 'id': '2058907',
+ 'display_id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
+ 'ext': 'mp4',
+ 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
+ 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 114,
+ 'age_limit': 0,
+ },
+ },
+ {
+ 'url': 'http://www.ruutu.fi/ohjelmat/superpesis/superpesis-katso-koko-kausi-ruudussa',
+ 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
+ 'info_dict': {
+ 'id': '2057306',
+ 'display_id': 'superpesis-katso-koko-kausi-ruudussa',
+ 'ext': 'mp4',
+ 'title': 'Superpesis: katso koko kausi Ruudussa',
+ 'description': 'md5:44c44a99fdbe5b380ab74ebd75f0af77',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 40,
+ 'age_limit': 0,
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r'data-media-id="(\d+)"', webpage, 'media id')
+
+ video_xml_url = None
+
+ media_data = self._search_regex(
+ r'jQuery\.extend\([^,]+,\s*(.+?)\);', webpage,
+ 'media data', default=None)
+ if media_data:
+ media_json = self._parse_json(media_data, display_id, fatal=False)
+ if media_json:
+ xml_url = media_json.get('ruutuplayer', {}).get('xmlUrl')
+ if xml_url:
+ video_xml_url = xml_url.replace('{ID}', video_id)
+
+ if not video_xml_url:
+ video_xml_url = 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id
+
+ video_xml = self._download_xml(video_xml_url, video_id)
+
+ formats = []
+ processed_urls = []
+
+ def extract_formats(node):
+ for child in node:
+ if child.tag.endswith('Files'):
+ extract_formats(child)
+ elif child.tag.endswith('File'):
+ video_url = child.text
+ if not video_url or video_url in processed_urls or 'NOT_USED' in video_url:
+ return
+ processed_urls.append(video_url)
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', m3u8_id='hls'))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id='hds'))
+ else:
+ proto = compat_urllib_parse_urlparse(video_url).scheme
+ if not child.tag.startswith('HTTP') and proto != 'rtmp':
+ continue
+ preference = -1 if proto == 'rtmp' else 1
+ label = child.get('label')
+ tbr = int_or_none(child.get('bitrate'))
+ width, height = [int_or_none(x) for x in child.get('resolution', '').split('x')]
+ formats.append({
+ 'format_id': '%s-%s' % (proto, label if label else tbr),
+ 'url': video_url,
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'preference': preference,
+ })
+
+ extract_formats(video_xml.find('./Clip'))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')),
+ 'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
index 10251f2..f3c8070 100644
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -83,7 +83,7 @@ class SafariIE(SafariBaseIE):
library/view/[^/]+|
api/v1/book
)/
- (?P<course_id>\d+)/
+ (?P<course_id>[^/]+)/
(?:chapter(?:-content)?/)?
(?P<part>part\d+)\.html
'''
@@ -100,6 +100,10 @@ class SafariIE(SafariBaseIE):
}, {
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
'only_matching': True,
+ }, {
+ # non-digits in course id
+ 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -122,7 +126,7 @@ class SafariCourseIE(SafariBaseIE):
IE_NAME = 'safari:course'
IE_DESC = 'safaribooksonline.com online courses'
- _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
+ _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py
index d4bd1a0..d6ee2d9 100644
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@@ -1,17 +1,12 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
-from ..utils import (
- js_to_json,
- remove_end,
-)
class SBSIE(InfoExtractor):
IE_DESC = 'sbs.com.au'
- _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/(?:single/)?(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand|news)/video/(?:single/)?(?P<id>[0-9]+)'
_TESTS = [{
# Original URL is handled by the generic IE which finds the iframe:
@@ -21,39 +16,36 @@ class SBSIE(InfoExtractor):
'info_dict': {
'id': '320403011771',
'ext': 'mp4',
- 'title': 'Dingo Conservation',
- 'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
+ 'title': 'Dingo Conservation (The Feed)',
+ 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
'thumbnail': 're:http://.*\.jpg',
+ 'duration': 308,
},
- 'add_ies': ['generic'],
}, {
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
'only_matching': True,
+ }, {
+ 'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- player = self._search_regex(
- r'(?s)playerParams\.releaseUrls\s*=\s*(\{.*?\n\});\n',
- webpage, 'player')
- player = re.sub(r"'\s*\+\s*[\da-zA-Z_]+\s*\+\s*'", '', player)
-
- release_urls = self._parse_json(js_to_json(player), video_id)
+ webpage = self._download_webpage(
+ 'http://www.sbs.com.au/ondemand/video/single/%s?context=web' % video_id, video_id)
- theplatform_url = release_urls.get('progressive') or release_urls['standard']
+ player_params = self._parse_json(
+ self._search_regex(
+ r'(?s)var\s+playerParams\s*=\s*({.+?});', webpage, 'playerParams'),
+ video_id)
- title = remove_end(self._og_search_title(webpage), ' (The Feed)')
- description = self._html_search_meta('description', webpage)
- thumbnail = self._og_search_thumbnail(webpage)
+ urls = player_params['releaseUrls']
+ theplatform_url = (urls.get('progressive') or urls.get('standard') or
+ urls.get('html') or player_params['relatedItemsURL'])
return {
'_type': 'url_transparent',
'id': video_id,
'url': theplatform_url,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
}
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py
index 9f3e944..a076776 100644
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
import base64
from .common import InfoExtractor
@@ -35,8 +34,7 @@ class SharedIE(InfoExtractor):
raise ExtractorError(
'Video %s does not exist' % video_id, expected=True)
- download_form = dict(re.findall(
- r'<input type="hidden" name="([^"]+)" value="([^"]*)"', webpage))
+ download_form = self._hidden_inputs(webpage)
request = compat_urllib_request.Request(
url, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py
index 24746a0..93a7cfe 100644
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -53,7 +53,7 @@ class SmotriIE(InfoExtractor):
'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
},
},
- # video-password
+ # video-password, not approved by moderator
{
'url': 'http://smotri.com/video/view/?id=v1390466a13c',
'md5': 'f6331cef33cad65a0815ee482a54440b',
@@ -71,7 +71,24 @@ class SmotriIE(InfoExtractor):
},
'skip': 'Video is not approved by moderator',
},
- # age limit + video-password
+ # video-password
+ {
+ 'url': 'http://smotri.com/video/view/?id=v6984858774#',
+ 'md5': 'f11e01d13ac676370fc3b95b9bda11b0',
+ 'info_dict': {
+ 'id': 'v6984858774',
+ 'ext': 'mp4',
+ 'title': 'Дача Солженицина ПАРОЛЬ 223322',
+ 'uploader': 'psavari1',
+ 'uploader_id': 'psavari1',
+ 'upload_date': '20081103',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'videopassword': '223322',
+ },
+ },
+ # age limit + video-password, not approved by moderator
{
'url': 'http://smotri.com/video/view/?id=v15408898bcf',
'md5': '91e909c9f0521adf5ee86fbe073aad70',
@@ -90,19 +107,22 @@ class SmotriIE(InfoExtractor):
},
'skip': 'Video is not approved by moderator',
},
- # not approved by moderator, but available
+ # age limit + video-password
{
- 'url': 'http://smotri.com/video/view/?id=v28888533b73',
- 'md5': 'f44bc7adac90af518ef1ecf04893bb34',
+ 'url': 'http://smotri.com/video/view/?id=v7780025814',
+ 'md5': 'b4599b068422559374a59300c5337d72',
'info_dict': {
- 'id': 'v28888533b73',
+ 'id': 'v7780025814',
'ext': 'mp4',
- 'title': 'Russian Spies Killed By ISIL Child Soldier',
- 'uploader': 'Mopeder',
- 'uploader_id': 'mopeder',
- 'duration': 71,
- 'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg',
- 'upload_date': '20150114',
+ 'title': 'Sexy Beach (пароль 123)',
+ 'uploader': 'вАся',
+ 'uploader_id': 'asya_prosto',
+ 'upload_date': '20081218',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'videopassword': '123'
},
},
# swf player
@@ -152,6 +172,10 @@ class SmotriIE(InfoExtractor):
'getvideoinfo': '1',
}
+ video_password = self._downloader.params.get('videopassword', None)
+ if video_password:
+ video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
+
request = compat_urllib_request.Request(
'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
@@ -161,13 +185,18 @@ class SmotriIE(InfoExtractor):
video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
if not video_url:
- if video.get('_moderate_no') or not video.get('moderated'):
+ if video.get('_moderate_no'):
raise ExtractorError(
'Video %s has not been approved by moderator' % video_id, expected=True)
if video.get('error'):
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+ if video.get('_pass_protected') == 1:
+ msg = ('Invalid video password' if video_password
+ else 'This video is protected by a password, use the --video-password option')
+ raise ExtractorError(msg, expected=True)
+
title = video['title']
thumbnail = video['_imgURL']
upload_date = unified_strdate(video['added'])
diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py
new file mode 100644
index 0000000..cf495f3
--- /dev/null
+++ b/youtube_dl/extractor/snagfilms.py
@@ -0,0 +1,171 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ parse_duration,
+)
+
+
+class SnagFilmsEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|embed)\.)?snagfilms\.com/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})'
+ _TESTS = [{
+ 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
+ 'md5': '2924e9215c6eff7a55ed35b72276bd93',
+ 'info_dict': {
+ 'id': '74849a00-85a9-11e1-9660-123139220831',
+ 'ext': 'mp4',
+ 'title': '#whilewewatch',
+ }
+ }, {
+ 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ if '>This film is not playable in your area.<' in webpage:
+ raise ExtractorError(
+ 'Film %s is not playable in your area.' % video_id, expected=True)
+
+ formats = []
+ for source in self._parse_json(js_to_json(self._search_regex(
+ r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
+ file_ = source.get('file')
+ if not file_:
+ continue
+ type_ = source.get('type')
+ format_id = source.get('label')
+ ext = determine_ext(file_)
+ if any(_ == 'm3u8' for _ in (type_, ext)):
+ formats.extend(self._extract_m3u8_formats(
+ file_, video_id, 'mp4', m3u8_id='hls'))
+ else:
+ bitrate = int_or_none(self._search_regex(
+ r'(\d+)kbps', file_, 'bitrate', default=None))
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None))
+ formats.append({
+ 'url': file_,
+ 'format_id': format_id,
+ 'tbr': bitrate,
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ title = self._search_regex(
+ [r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
+ webpage, 'title')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ }
+
+
+class SnagFilmsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?snagfilms\.com/(?:films/title|show)/(?P<id>[^?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.snagfilms.com/films/title/lost_for_life',
+ 'md5': '19844f897b35af219773fd63bdec2942',
+ 'info_dict': {
+ 'id': '0000014c-de2f-d5d6-abcf-ffef58af0017',
+ 'display_id': 'lost_for_life',
+ 'ext': 'mp4',
+ 'title': 'Lost for Life',
+ 'description': 'md5:fbdacc8bb6b455e464aaf98bc02e1c82',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'duration': 4489,
+ 'categories': ['Documentary', 'Crime', 'Award Winning', 'Festivals']
+ }
+ }, {
+ 'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
+ 'md5': 'e6292e5b837642bbda82d7f8bf3fbdfd',
+ 'info_dict': {
+ 'id': '00000145-d75c-d96e-a9c7-ff5c67b20000',
+ 'display_id': 'the_world_cut_project/india',
+ 'ext': 'mp4',
+ 'title': 'India',
+ 'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'duration': 979,
+ 'categories': ['Documentary', 'Sports', 'Politics']
+ }
+ }, {
+ # Film is not playable in your area.
+ 'url': 'http://www.snagfilms.com/films/title/inside_mecca',
+ 'only_matching': True,
+ }, {
+ # Film is not available.
+ 'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ if ">Sorry, the Film you're looking for is not available.<" in webpage:
+ raise ExtractorError(
+ 'Film %s is not available.' % display_id, expected=True)
+
+ film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
+
+ snag = self._parse_json(
+ self._search_regex(
+ 'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'),
+ display_id)
+
+ for item in snag:
+ if item.get('data', {}).get('film', {}).get('id') == film_id:
+ data = item['data']['film']
+ title = data['title']
+ description = clean_html(data.get('synopsis'))
+ thumbnail = data.get('image')
+ duration = int_or_none(data.get('duration') or data.get('runtime'))
+ categories = [
+ category['title'] for category in data.get('categories', [])
+ if category.get('title')]
+ break
+ else:
+ title = self._search_regex(
+ r'itemprop="title">([^<]+)<', webpage, 'title')
+ description = self._html_search_regex(
+ r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
+ webpage, 'description', default=None) or self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = parse_duration(self._search_regex(
+ r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
+ webpage, 'duration', fatal=False))
+ categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'http://embed.snagfilms.com/embed/player?filmId=%s' % film_id,
+ 'id': film_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'categories': categories,
+ }
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index 29bd9ce..ba2d5e1 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -6,9 +6,12 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_str,
- compat_urllib_request
+ compat_urllib_request,
+ compat_urllib_parse,
+)
+from ..utils import (
+ ExtractorError,
)
-from ..utils import ExtractorError
class SohuIE(InfoExtractor):
@@ -26,7 +29,7 @@ class SohuIE(InfoExtractor):
'skip': 'On available in China',
}, {
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
- 'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
+ 'md5': '699060e75cf58858dd47fb9c03c42cfb',
'info_dict': {
'id': '409385080',
'ext': 'mp4',
@@ -34,7 +37,7 @@ class SohuIE(InfoExtractor):
}
}, {
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
- 'md5': '49308ff6dafde5ece51137d04aec311e',
+ 'md5': '9bf34be48f2f4dadcb226c74127e203c',
'info_dict': {
'id': '78693464',
'ext': 'mp4',
@@ -48,7 +51,7 @@ class SohuIE(InfoExtractor):
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
},
'playlist': [{
- 'md5': '492923eac023ba2f13ff69617c32754a',
+ 'md5': 'bdbfb8f39924725e6589c146bc1883ad',
'info_dict': {
'id': '78910339_part1',
'ext': 'mp4',
@@ -56,7 +59,7 @@ class SohuIE(InfoExtractor):
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
}
}, {
- 'md5': 'de604848c0e8e9c4a4dde7e1347c0637',
+ 'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
'info_dict': {
'id': '78910339_part2',
'ext': 'mp4',
@@ -64,7 +67,7 @@ class SohuIE(InfoExtractor):
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
}
}, {
- 'md5': '93584716ee0657c0b205b8aa3d27aa13',
+ 'md5': '8407e634175fdac706766481b9443450',
'info_dict': {
'id': '78910339_part3',
'ext': 'mp4',
@@ -139,21 +142,42 @@ class SohuIE(InfoExtractor):
for i in range(part_count):
formats = []
for format_id, format_data in formats_json.items():
+ allot = format_data['allot']
+
data = format_data['data']
+ clips_url = data['clipsURL']
+ su = data['su']
- # URLs starts with http://newflv.sohu.ccgslb.net/ is not usable
- # so retry until got a working URL
video_url = 'newflv.sohu.ccgslb.net'
+ cdnId = None
retries = 0
- while 'newflv.sohu.ccgslb.net' in video_url and retries < 5:
- download_note = 'Download information from CDN gateway for format ' + format_id
+
+ while 'newflv.sohu.ccgslb.net' in video_url:
+ params = {
+ 'prot': 9,
+ 'file': clips_url[i],
+ 'new': su[i],
+ 'prod': 'flash',
+ }
+
+ if cdnId is not None:
+ params['idc'] = cdnId
+
+ download_note = 'Downloading %s video URL part %d of %d' % (
+ format_id, i + 1, part_count)
+
if retries > 0:
download_note += ' (retry #%d)' % retries
+ part_info = self._parse_json(self._download_webpage(
+ 'http://%s/?%s' % (allot, compat_urllib_parse.urlencode(params)),
+ video_id, download_note), video_id)
+
+ video_url = part_info['url']
+ cdnId = part_info.get('nid')
+
retries += 1
- cdn_info = self._download_json(
- 'http://data.vod.itc.cn/cdnList?new=' + data['su'][i],
- video_id, download_note)
- video_url = cdn_info['url']
+ if retries > 5:
+ raise ExtractorError('Failed to get video URL')
formats.append({
'url': video_url,
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index c23c5ee..118ca48 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
_VALID_URL = r'''(?x)^(?:https?://)?
(?:(?:(?:www\.|m\.)?soundcloud\.com/
(?P<uploader>[\w\d-]+)/
- (?!sets/|likes/?(?:$|[?#]))
+ (?!sets/|(?:likes|tracks)/?(?:$|[?#]))
(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
@@ -307,6 +307,9 @@ class SoundcloudUserIE(SoundcloudIE):
'title': 'The Royal Concept',
},
'playlist_mincount': 1,
+ }, {
+ 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py
index 06d6e66..5fa6faf 100644
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@@ -4,7 +4,7 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
compat_urllib_request,
)
@@ -27,7 +27,7 @@ class SpankwireIE(InfoExtractor):
'description': 'Crazy Bitch X rated music video.',
'uploader': 'oreusz',
'uploader_id': '124697',
- 'upload_date': '20070508',
+ 'upload_date': '20070507',
'age_limit': 18,
}
}
@@ -44,7 +44,7 @@ class SpankwireIE(InfoExtractor):
title = self._html_search_regex(
r'<h1>([^<]+)', webpage, 'title')
description = self._html_search_regex(
- r'<div\s+id="descriptionContent">([^<]+)<',
+ r'(?s)<div\s+id="descriptionContent">(.+?)</div>',
webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']',
@@ -64,12 +64,12 @@ class SpankwireIE(InfoExtractor):
r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>',
webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
- r'Comments<span[^>]+>\s*\(([\d,\.]+)\)</span>',
+ r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
webpage, 'comment count', fatal=False))
video_urls = list(map(
- compat_urllib_parse.unquote,
- re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*["\']([^"\']+)["\']', webpage)))
+ compat_urllib_parse_unquote,
+ re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
if webpage.find('flashvars\.encrypted = "true"') != -1:
password = self._search_regex(
r'flashvars\.video_title = "([^"]+)',
diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py
index 359722a..27f4033 100644
--- a/youtube_dl/extractor/spiegeltv.py
+++ b/youtube_dl/extractor/spiegeltv.py
@@ -2,7 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import float_or_none
+from ..compat import compat_urllib_parse_urlparse
+from ..utils import (
+ determine_ext,
+ float_or_none,
+)
class SpiegeltvIE(InfoExtractor):
@@ -17,7 +21,7 @@ class SpiegeltvIE(InfoExtractor):
'thumbnail': 're:http://.*\.jpg$',
},
'params': {
- # rtmp download
+ # m3u8 download
'skip_download': True,
}
}, {
@@ -53,7 +57,37 @@ class SpiegeltvIE(InfoExtractor):
server_json = self._download_json(
'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json',
video_id, note='Downloading server information')
- server = server_json['streamingserver'][0]['endpoint']
+
+ format = '16x9' if is_wide else '4x3'
+
+ formats = []
+ for streamingserver in server_json['streamingserver']:
+ endpoint = streamingserver.get('endpoint')
+ if not endpoint:
+ continue
+ play_path = 'mp4:%s_spiegeltv_0500_%s.m4v' % (uuid, format)
+ if endpoint.startswith('rtmp'):
+ formats.append({
+ 'url': endpoint,
+ 'format_id': 'rtmp',
+ 'app': compat_urllib_parse_urlparse(endpoint).path[1:],
+ 'play_path': play_path,
+ 'player_path': 'http://prod-static.spiegel.tv/frontend-076.swf',
+ 'ext': 'flv',
+ 'rtmp_live': True,
+ })
+ elif determine_ext(endpoint) == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ endpoint.replace('[video]', play_path),
+ video_id, 'm4v',
+ preference=1, # Prefer hls since it allows to workaround georestriction
+ m3u8_id='hls', fatal=False)
+ if m3u8_formats is not False:
+ formats.extend(m3u8_formats)
+ else:
+ formats.append({
+ 'url': endpoint,
+ })
thumbnails = []
for image in media_json['images']:
@@ -65,17 +99,12 @@ class SpiegeltvIE(InfoExtractor):
description = media_json['subtitle']
duration = float_or_none(media_json.get('duration_in_ms'), scale=1000)
- format = '16x9' if is_wide else '4x3'
-
- url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v'
return {
'id': video_id,
'title': title,
- 'url': url,
- 'ext': 'm4v',
'description': description,
'duration': duration,
'thumbnails': thumbnails,
- 'rtmp_live': True,
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py
index 854d01b..e527aa9 100644
--- a/youtube_dl/extractor/sunporno.py
+++ b/youtube_dl/extractor/sunporno.py
@@ -44,7 +44,7 @@ class SunPornoIE(InfoExtractor):
webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex(
- r'class="views">\s*(\d+)\s*<',
+ r'class="views">(?:<noscript>)?\s*(\d+)\s*<',
webpage, 'view count', fatal=False))
comment_count = int_or_none(self._html_search_regex(
r'(\d+)</b> Comments?',
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index bfe07b0..cf1b37a 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -11,13 +11,13 @@ class TagesschauIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/ts|video/video)(?P<id>-?[0-9]+)\.html'
_TESTS = [{
- 'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
- 'md5': 'bcdeac2194fb296d599ce7929dfa4009',
+ 'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
+ 'md5': '917a228bc7df7850783bc47979673a09',
'info_dict': {
- 'id': '1399128',
+ 'id': '102143',
'ext': 'mp4',
- 'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen',
- 'description': 'md5:69da3c61275b426426d711bde96463ab',
+ 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
+ 'description': 'md5:171feccd9d9b3dd54d05d501568f6359',
'thumbnail': 're:^http:.*\.jpg$',
},
}, {
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index b2a4b1f..d1b7264 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -51,6 +51,17 @@ class TeamcocoIE(InfoExtractor):
'params': {
'skip_download': True, # m3u8 downloads
}
+ }, {
+ 'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
+ 'info_dict': {
+ 'id': '89341',
+ 'ext': 'mp4',
+ 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
+ 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 downloads
+ }
}
]
_VIDEO_ID_REGEXES = (
@@ -110,9 +121,23 @@ class TeamcocoIE(InfoExtractor):
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
for filed in data['files']:
if determine_ext(filed['url']) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- filed['url'], video_id, ext='mp4'))
+ # compat_urllib_parse.urljoin does not work here
+ if filed['url'].startswith('/'):
+ m3u8_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed['url']
+ else:
+ m3u8_url = filed['url']
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4')
+ for m3u8_format in m3u8_formats:
+ if m3u8_format not in formats:
+ formats.append(m3u8_format)
+ elif determine_ext(filed['url']) == 'f4m':
+ # TODO Correct f4m extraction
+ continue
else:
+ if filed['url'].startswith('/mp4:protected/'):
+ # TODO Correct extraction for these files
+ continue
m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
if m_format is not None:
format_id = m_format.group(1)
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index 92731ad..83d833e 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -26,7 +26,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language
class ThePlatformIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
- (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
+ (?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|theplatform:)(?P<id>[^/\?&]+)'''
_TESTS = [{
@@ -56,6 +56,17 @@ class ThePlatformIE(InfoExtractor):
# rtmp download
'skip_download': True,
}
+ }, {
+ 'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
+ 'info_dict': {
+ 'id': 'yMBg9E8KFxZD',
+ 'ext': 'mp4',
+ 'description': 'md5:644ad9188d655b742f942bf2e06b002d',
+ 'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
+ }
+ }, {
+ 'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
+ 'only_matching': True,
}]
@staticmethod
@@ -85,6 +96,11 @@ class ThePlatformIE(InfoExtractor):
if not provider_id:
provider_id = 'dJ5BDC'
+ path = provider_id
+ if mobj.group('media'):
+ path += '/media'
+ path += '/' + video_id
+
if smuggled_data.get('force_smil_url', False):
smil_url = url
elif mobj.group('config'):
@@ -94,8 +110,7 @@ class ThePlatformIE(InfoExtractor):
config = self._download_json(config_url, video_id, 'Downloading config')
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
else:
- smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?'
- 'format=smil&mbr=true'.format(provider_id, video_id))
+ smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path
sig = smuggled_data.get('sig')
if sig:
@@ -112,7 +127,7 @@ class ThePlatformIE(InfoExtractor):
else:
raise ExtractorError(error_msg, expected=True)
- info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id)
+ info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)
diff --git a/youtube_dl/extractor/thesixtyone.py b/youtube_dl/extractor/thesixtyone.py
index a77c6a2..5d09eb9 100644
--- a/youtube_dl/extractor/thesixtyone.py
+++ b/youtube_dl/extractor/thesixtyone.py
@@ -1,9 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
-import re
-
from .common import InfoExtractor
from ..utils import unified_strdate
@@ -17,7 +14,7 @@ class TheSixtyOneIE(InfoExtractor):
song
)/(?P<id>[A-Za-z0-9]+)/?$'''
_SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
- _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}.thesixtyone.com/thesixtyone_production/audio/{0:}_stream'
+ _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream'
_THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
_TESTS = [
{
@@ -70,14 +67,19 @@ class TheSixtyOneIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- song_id = mobj.group('id')
+ song_id = self._match_id(url)
webpage = self._download_webpage(
self._SONG_URL_TEMPLATE.format(song_id), song_id)
- song_data = json.loads(self._search_regex(
- r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'))
+ song_data = self._parse_json(self._search_regex(
+ r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'), song_id)
+
+ if self._search_regex(r'(t61\.s3_audio_load\s*=\s*1\.0;)', webpage, 's3_audio_load marker', default=None):
+ song_data['audio_server'] = 's3.amazonaws.com'
+ else:
+ song_data['audio_server'] = song_data['audio_server'] + '.thesixtyone.com'
+
keys = [self._DECODE_MAP.get(s, s) for s in song_data['key']]
url = self._SONG_FILE_URL_TEMPLATE.format(
"".join(reversed(keys)), **song_data)
diff --git a/youtube_dl/extractor/thisamericanlife.py b/youtube_dl/extractor/thisamericanlife.py
new file mode 100644
index 0000000..36493a5
--- /dev/null
+++ b/youtube_dl/extractor/thisamericanlife.py
@@ -0,0 +1,40 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ThisAmericanLifeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?thisamericanlife\.org/(?:radio-archives/episode/|play_full\.php\?play=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.thisamericanlife.org/radio-archives/episode/487/harper-high-school-part-one',
+ 'md5': '8f7d2da8926298fdfca2ee37764c11ce',
+ 'info_dict': {
+ 'id': '487',
+ 'ext': 'm4a',
+ 'title': '487: Harper High School, Part One',
+ 'description': 'md5:ee40bdf3fb96174a9027f76dbecea655',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.thisamericanlife.org/play_full.php?play=487',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.thisamericanlife.org/radio-archives/episode/%s' % video_id, video_id)
+
+ return {
+ 'id': video_id,
+ 'url': 'http://stream.thisamericanlife.org/{0}/stream/{0}_64k.m3u8'.format(video_id),
+ 'protocol': 'm3u8_native',
+ 'ext': 'm4a',
+ 'acodec': 'aac',
+ 'vcodec': 'none',
+ 'abr': 64,
+ 'title': self._html_search_meta(r'twitter:title', webpage, 'title', fatal=True),
+ 'description': self._html_search_meta(r'description', webpage, 'description'),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py
index 9f9e388..1326361 100644
--- a/youtube_dl/extractor/tlc.py
+++ b/youtube_dl/extractor/tlc.py
@@ -12,17 +12,22 @@ class TlcIE(DiscoveryIE):
IE_NAME = 'tlc.com'
_VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
- _TEST = {
+ # DiscoveryIE has _TESTS
+ _TESTS = [{
'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm',
- 'md5': 'c4038f4a9b44d0b5d74caaa64ed2a01a',
'info_dict': {
- 'id': '853232',
+ 'id': '104493',
'ext': 'mp4',
- 'title': 'Cake Boss: Too Big to Fly',
+ 'title': 'Too Big to Fly',
'description': 'Buddy has taken on a high flying task.',
'duration': 119,
+ 'timestamp': 1393365060,
+ 'upload_date': '20140225',
},
- }
+ 'params': {
+ 'skip_download': True, # requires ffmpef
+ },
+ }]
class TlcDeIE(InfoExtractor):
diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py
index c282865..49516ab 100644
--- a/youtube_dl/extractor/tnaflix.py
+++ b/youtube_dl/extractor/tnaflix.py
@@ -3,39 +3,70 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
- parse_duration,
fix_xml_ampersands,
+ float_or_none,
+ int_or_none,
+ parse_duration,
+ str_to_int,
+ xpath_text,
)
-class TNAFlixIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
-
- _TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
- _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
- _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
-
- _TESTS = [
- {
- 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
- 'md5': 'ecf3498417d09216374fc5907f9c6ec0',
- 'info_dict': {
- 'id': '553878',
- 'display_id': 'Carmella-Decesare-striptease',
- 'ext': 'mp4',
- 'title': 'Carmella Decesare - striptease',
- 'description': '',
- 'thumbnail': 're:https?://.*\.jpg$',
- 'duration': 91,
- 'age_limit': 18,
- }
- },
- {
- 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
- 'only_matching': True,
- }
+class TNAFlixNetworkBaseIE(InfoExtractor):
+ # May be overridden in descendants if necessary
+ _CONFIG_REGEX = [
+ r'flashvars\.config\s*=\s*escape\("([^"]+)"',
+ r'<input[^>]+name="config\d?" value="([^"]+)"',
]
+ _TITLE_REGEX = r'<input[^>]+name="title" value="([^"]+)"'
+ _DESCRIPTION_REGEX = r'<input[^>]+name="description" value="([^"]+)"'
+ _UPLOADER_REGEX = r'<input[^>]+name="username" value="([^"]+)"'
+ _VIEW_COUNT_REGEX = None
+ _COMMENT_COUNT_REGEX = None
+ _AVERAGE_RATING_REGEX = None
+ _CATEGORIES_REGEX = r'<li[^>]*>\s*<span[^>]+class="infoTitle"[^>]*>Categories:</span>\s*<span[^>]+class="listView"[^>]*>(.+?)</span>\s*</li>'
+
+ def _extract_thumbnails(self, flix_xml):
+
+ def get_child(elem, names):
+ for name in names:
+ child = elem.find(name)
+ if child is not None:
+ return child
+
+ timeline = get_child(flix_xml, ['timeline', 'rolloverBarImage'])
+ if timeline is None:
+ return
+
+ pattern_el = get_child(timeline, ['imagePattern', 'pattern'])
+ if pattern_el is None or not pattern_el.text:
+ return
+
+ first_el = get_child(timeline, ['imageFirst', 'first'])
+ last_el = get_child(timeline, ['imageLast', 'last'])
+ if first_el is None or last_el is None:
+ return
+
+ first_text = first_el.text
+ last_text = last_el.text
+ if not first_text.isdigit() or not last_text.isdigit():
+ return
+
+ first = int(first_text)
+ last = int(last_text)
+ if first > last:
+ return
+
+ width = int_or_none(xpath_text(timeline, './imageWidth', 'thumbnail width'))
+ height = int_or_none(xpath_text(timeline, './imageHeight', 'thumbnail height'))
+
+ return [{
+ 'url': self._proto_relative_url(pattern_el.text.replace('#', compat_str(i)), 'http:'),
+ 'width': width,
+ 'height': height,
+ } for i in range(first, last + 1)]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -44,47 +75,195 @@ class TNAFlixIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
- title = self._html_search_regex(
- self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
- description = self._html_search_regex(
- self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='')
-
- age_limit = self._rta_search(webpage)
-
- duration = parse_duration(self._html_search_meta(
- 'duration', webpage, 'duration', default=None))
-
cfg_url = self._proto_relative_url(self._html_search_regex(
self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
cfg_xml = self._download_xml(
- cfg_url, display_id, note='Downloading metadata',
+ cfg_url, display_id, 'Downloading metadata',
transform_source=fix_xml_ampersands)
- thumbnail = self._proto_relative_url(
- cfg_xml.find('./startThumb').text, 'http:')
-
formats = []
+
+ def extract_video_url(vl):
+ return re.sub('speed=\d+', 'speed=', vl.text)
+
+ video_link = cfg_xml.find('./videoLink')
+ if video_link is not None:
+ formats.append({
+ 'url': extract_video_url(video_link),
+ 'ext': xpath_text(cfg_xml, './videoConfig/type', 'type', default='flv'),
+ })
+
for item in cfg_xml.findall('./quality/item'):
- video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
- format_id = item.find('res').text
- fmt = {
- 'url': self._proto_relative_url(video_url, 'http:'),
+ video_link = item.find('./videoLink')
+ if video_link is None:
+ continue
+ res = item.find('res')
+ format_id = None if res is None else res.text
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]', format_id, 'height', default=None))
+ formats.append({
+ 'url': self._proto_relative_url(extract_video_url(video_link), 'http:'),
'format_id': format_id,
- }
- m = re.search(r'^(\d+)', format_id)
- if m:
- fmt['height'] = int(m.group(1))
- formats.append(fmt)
+ 'height': height,
+ })
+
self._sort_formats(formats)
+ thumbnail = self._proto_relative_url(
+ xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
+ thumbnails = self._extract_thumbnails(cfg_xml)
+
+ title = self._html_search_regex(
+ self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
+
+ age_limit = self._rta_search(webpage)
+
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration', default=None))
+
+ def extract_field(pattern, name):
+ return self._html_search_regex(pattern, webpage, name, default=None) if pattern else None
+
+ description = extract_field(self._DESCRIPTION_REGEX, 'description')
+ uploader = extract_field(self._UPLOADER_REGEX, 'uploader')
+ view_count = str_to_int(extract_field(self._VIEW_COUNT_REGEX, 'view count'))
+ comment_count = str_to_int(extract_field(self._COMMENT_COUNT_REGEX, 'comment count'))
+ average_rating = float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating'))
+
+ categories_str = extract_field(self._CATEGORIES_REGEX, 'categories')
+ categories = categories_str.split(', ') if categories_str is not None else []
+
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
+ 'thumbnails': thumbnails,
'duration': duration,
'age_limit': age_limit,
+ 'uploader': uploader,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'average_rating': average_rating,
+ 'categories': categories,
'formats': formats,
}
+
+
+class TNAFlixIE(TNAFlixNetworkBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
+
+ _TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
+ _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
+ _UPLOADER_REGEX = r'(?s)<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)<div'
+
+ _TESTS = [{
+ # anonymous uploader, no categories
+ 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
+ 'md5': 'ecf3498417d09216374fc5907f9c6ec0',
+ 'info_dict': {
+ 'id': '553878',
+ 'display_id': 'Carmella-Decesare-striptease',
+ 'ext': 'mp4',
+ 'title': 'Carmella Decesare - striptease',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'duration': 91,
+ 'age_limit': 18,
+ 'uploader': 'Anonymous',
+ 'categories': [],
+ }
+ }, {
+ # non-anonymous uploader, categories
+ 'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
+ 'md5': '0f5d4d490dbfd117b8607054248a07c0',
+ 'info_dict': {
+ 'id': '6538',
+ 'display_id': 'Educational-xxx-video',
+ 'ext': 'mp4',
+ 'title': 'Educational xxx video',
+ 'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'duration': 164,
+ 'age_limit': 18,
+ 'uploader': 'bobwhite39',
+ 'categories': ['Amateur Porn', 'Squirting Videos', 'Teen Girls 18+'],
+ }
+ }, {
+ 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
+ 'only_matching': True,
+ }]
+
+
+class EMPFlixIE(TNAFlixNetworkBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
+
+ _UPLOADER_REGEX = r'<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)</li>'
+
+ _TESTS = [{
+ 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
+ 'md5': 'b1bc15b6412d33902d6e5952035fcabc',
+ 'info_dict': {
+ 'id': '33051',
+ 'display_id': 'Amateur-Finger-Fuck',
+ 'ext': 'mp4',
+ 'title': 'Amateur Finger Fuck',
+ 'description': 'Amateur solo finger fucking.',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'duration': 83,
+ 'age_limit': 18,
+ 'uploader': 'cwbike',
+ 'categories': ['Amateur', 'Anal', 'Fisting', 'Home made', 'Solo'],
+ }
+ }, {
+ 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
+ 'only_matching': True,
+ }]
+
+
+class MovieFapIE(TNAFlixNetworkBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?moviefap\.com/videos/(?P<id>[0-9a-f]+)/(?P<display_id>[^/]+)\.html'
+
+ _VIEW_COUNT_REGEX = r'<br>Views\s*<strong>([\d,.]+)</strong>'
+ _COMMENT_COUNT_REGEX = r'<span[^>]+id="comCount"[^>]*>([\d,.]+)</span>'
+ _AVERAGE_RATING_REGEX = r'Current Rating\s*<br>\s*<strong>([\d.]+)</strong>'
+ _CATEGORIES_REGEX = r'(?s)<div[^>]+id="vid_info"[^>]*>\s*<div[^>]*>.+?</div>(.*?)<br>'
+
+ _TESTS = [{
+ # normal, multi-format video
+ 'url': 'http://www.moviefap.com/videos/be9867c9416c19f54a4a/experienced-milf-amazing-handjob.html',
+ 'md5': '26624b4e2523051b550067d547615906',
+ 'info_dict': {
+ 'id': 'be9867c9416c19f54a4a',
+ 'display_id': 'experienced-milf-amazing-handjob',
+ 'ext': 'mp4',
+ 'title': 'Experienced MILF Amazing Handjob',
+ 'description': 'Experienced MILF giving an Amazing Handjob',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ 'uploader': 'darvinfred06',
+ 'view_count': int,
+ 'comment_count': int,
+ 'average_rating': float,
+ 'categories': ['Amateur', 'Masturbation', 'Mature', 'Flashing'],
+ }
+ }, {
+ # quirky single-format case where the extension is given as fid, but the video is really an flv
+ 'url': 'http://www.moviefap.com/videos/e5da0d3edce5404418f5/jeune-couple-russe.html',
+ 'md5': 'fa56683e291fc80635907168a743c9ad',
+ 'info_dict': {
+ 'id': 'e5da0d3edce5404418f5',
+ 'display_id': 'jeune-couple-russe',
+ 'ext': 'flv',
+ 'title': 'Jeune Couple Russe',
+ 'description': 'Amateur',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ 'uploader': 'whiskeyjar',
+ 'view_count': int,
+ 'comment_count': int,
+ 'average_rating': float,
+ 'categories': ['Amateur', 'Teen'],
+ }
+ }]
diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py
index 6ca8840..c9cb693 100644
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@@ -47,7 +47,7 @@ class Tube8IE(InfoExtractor):
webpage = self._download_webpage(req, display_id)
flashvars = json.loads(self._html_search_regex(
- r'flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
+ r'flashvars\s*=\s*({.+?});\r?\n', webpage, 'flashvars'))
video_url = flashvars['video_url']
if flashvars.get('encrypted') is True:
diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py
index e621880..3d3b635 100644
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@@ -39,6 +39,17 @@ class TumblrIE(InfoExtractor):
'timestamp': 1430931613,
},
'add_ie': ['Vidme'],
+ }, {
+ 'url': 'http://camdamage.tumblr.com/post/98846056295/',
+ 'md5': 'a9e0c8371ea1ca306d6554e3fecf50b6',
+ 'info_dict': {
+ 'id': '105463834',
+ 'ext': 'mp4',
+ 'title': 'Cam Damage-HD 720p',
+ 'uploader': 'John Moyer',
+ 'uploader_id': 'user32021558',
+ },
+ 'add_ie': ['Vimeo'],
}]
def _real_extract(self, url):
@@ -47,18 +58,16 @@ class TumblrIE(InfoExtractor):
blog = m_url.group('blog_name')
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
- webpage = self._download_webpage(url, video_id)
-
- vid_me_embed_url = self._search_regex(
- r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
- webpage, 'vid.me embed', default=None)
- if vid_me_embed_url is not None:
- return self.url_result(vid_me_embed_url, 'Vidme')
+ webpage, urlh = self._download_webpage_handle(url, video_id)
iframe_url = self._search_regex(
r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
- webpage, 'iframe url')
- iframe = self._download_webpage(iframe_url, video_id)
+ webpage, 'iframe url', default=None)
+ if iframe_url is None:
+ return self.url_result(urlh.geturl(), 'Generic')
+
+ iframe = self._download_webpage(iframe_url, video_id,
+ 'Downloading iframe page')
video_url = self._search_regex(r'<source src="([^"]+)"',
iframe, 'video url')
diff --git a/youtube_dl/extractor/turbo.py b/youtube_dl/extractor/turbo.py
index 29703a8..7ae63a4 100644
--- a/youtube_dl/extractor/turbo.py
+++ b/youtube_dl/extractor/turbo.py
@@ -23,7 +23,7 @@ class TurboIE(InfoExtractor):
'ext': 'mp4',
'duration': 3715,
'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
- 'description': 'Retrouvez dans cette rubrique toutes les vidéos de l\'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
+ 'description': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
'thumbnail': 're:^https?://.*\.jpg$',
}
}
@@ -42,7 +42,7 @@ class TurboIE(InfoExtractor):
title = xpath_text(item, './title', 'title')
duration = int_or_none(xpath_text(item, './durate', 'duration'))
thumbnail = xpath_text(item, './visuel_clip', 'thumbnail')
- description = self._og_search_description(webpage)
+ description = self._html_search_meta('description', webpage)
formats = []
get_quality = qualities(['3g', 'sd', 'hq'])
diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py
new file mode 100644
index 0000000..3a4f393
--- /dev/null
+++ b/youtube_dl/extractor/tvc.py
@@ -0,0 +1,109 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+)
+
+
+class TVCIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702',
+ 'md5': 'bbc5ff531d1e90e856f60fc4b3afd708',
+ 'info_dict': {
+ 'id': '74622',
+ 'ext': 'mp4',
+ 'title': 'События. "События". Эфир от 22.05.2015 14:30',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 1122,
+ },
+ }
+
+ @classmethod
+ def _extract_url(cls, webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:http:)?//(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://www.tvc.ru/video/json/id/%s' % video_id, video_id)
+
+ formats = []
+ for info in video.get('path', {}).get('quality', []):
+ video_url = info.get('url')
+ if not video_url:
+ continue
+ format_id = self._search_regex(
+ r'cdnvideo/([^/]+?)(?:-[^/]+?)?/', video_url,
+ 'format id', default=None)
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'width': int_or_none(info.get('width')),
+ 'height': int_or_none(info.get('height')),
+ 'tbr': int_or_none(info.get('bitrate')),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video['title'],
+ 'thumbnail': video.get('picture'),
+ 'duration': int_or_none(video.get('duration')),
+ 'formats': formats,
+ }
+
+
+class TVCArticleIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/',
+ 'info_dict': {
+ 'id': '74622',
+ 'ext': 'mp4',
+ 'title': 'События. "События". Эфир от 22.05.2015 14:30',
+ 'description': 'md5:ad7aa7db22903f983e687b8a3e98c6dd',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 1122,
+ },
+ }, {
+ 'url': 'http://www.tvc.ru/news/show/id/69944',
+ 'info_dict': {
+ 'id': '75399',
+ 'ext': 'mp4',
+ 'title': 'Эксперты: в столице встал вопрос о максимально безопасных остановках',
+ 'description': 'md5:f2098f71e21f309e89f69b525fd9846e',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 278,
+ },
+ }, {
+ 'url': 'http://www.tvc.ru/channel/brand/id/47/show/episodes#',
+ 'info_dict': {
+ 'id': '2185',
+ 'ext': 'mp4',
+ 'title': 'Ещё не поздно. Эфир от 03.08.2013',
+ 'description': 'md5:51fae9f3f8cfe67abce014e428e5b027',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 3316,
+ },
+ }]
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, self._match_id(url))
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'TVC',
+ 'url': self._og_search_video_url(webpage),
+ 'title': clean_html(self._og_search_title(webpage)),
+ 'description': clean_html(self._og_search_description(webpage)),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py
index e83e31a..79863e7 100644
--- a/youtube_dl/extractor/tvplay.py
+++ b/youtube_dl/extractor/tvplay.py
@@ -26,6 +26,7 @@ class TVPlayIE(InfoExtractor):
viasat4play\.no/programmer|
tv6play\.no/programmer|
tv3play\.dk/programmer|
+ play\.novatv\.bg/programi
)/[^/]+/(?P<id>\d+)
'''
_TESTS = [
@@ -173,6 +174,22 @@ class TVPlayIE(InfoExtractor):
'skip_download': True,
},
},
+ {
+ 'url': 'http://play.novatv.bg/programi/zdravei-bulgariya/624952?autostart=true',
+ 'info_dict': {
+ 'id': '624952',
+ 'ext': 'flv',
+ 'title': 'Здравей, България (12.06.2015 г.) ',
+ 'description': 'md5:99f3700451ac5bb71a260268b8daefd7',
+ 'duration': 8838,
+ 'timestamp': 1434100372,
+ 'upload_date': '20150612',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index 94bd634..73ce335 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -22,8 +22,8 @@ class TwitchBaseIE(InfoExtractor):
_API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'http://usher.twitch.tv'
- _LOGIN_URL = 'https://secure.twitch.tv/user/login'
- _LOGIN_POST_URL = 'https://secure-login.twitch.tv/login'
+ _LOGIN_URL = 'https://secure.twitch.tv/login'
+ _LOGIN_POST_URL = 'https://passport.twitch.tv/authorize'
_NETRC_MACHINE = 'twitch'
def _handle_error(self, response):
@@ -59,20 +59,12 @@ class TwitchBaseIE(InfoExtractor):
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
- authenticity_token = self._search_regex(
- r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
- login_page, 'authenticity token')
-
- login_form = {
- 'utf8': '✓'.encode('utf-8'),
- 'authenticity_token': authenticity_token,
- 'redirect_on_login': '',
- 'embed_form': 'false',
- 'mp_source_action': 'login-button',
- 'follow': '',
- 'login': username,
- 'password': password,
- }
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'login': username.encode('utf-8'),
+ 'password': password.encode('utf-8'),
+ })
request = compat_urllib_request.Request(
self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
@@ -80,11 +72,15 @@ class TwitchBaseIE(InfoExtractor):
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
- m = re.search(
- r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
- if m:
+ error_message = self._search_regex(
+ r'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>',
+ response, 'error message', default=None)
+ if error_message:
raise ExtractorError(
- 'Unable to login: %s' % m.group('msg').strip(), expected=True)
+ 'Unable to login. Twitch said: %s' % error_message, expected=True)
+
+ if '>Reset your password<' in response:
+ self.report_warning('Twitch asks you to reset your password, go to https://secure.twitch.tv/reset/submit')
def _prefer_source(self, formats):
try:
@@ -189,17 +185,17 @@ class TwitchVodIE(TwitchItemBaseIE):
_ITEM_SHORTCUT = 'v'
_TEST = {
- 'url': 'http://www.twitch.tv/ksptv/v/3622000',
+ 'url': 'http://www.twitch.tv/riotgames/v/6528877',
'info_dict': {
- 'id': 'v3622000',
+ 'id': 'v6528877',
'ext': 'mp4',
- 'title': '''KSPTV: Squadcast: "Everyone's on vacation so here's Dahud" Edition!''',
+ 'title': 'LCK Summer Split - Week 6 Day 1',
'thumbnail': 're:^https?://.*\.jpg$',
- 'duration': 6951,
- 'timestamp': 1419028564,
- 'upload_date': '20141219',
- 'uploader': 'KSPTV',
- 'uploader_id': 'ksptv',
+ 'duration': 17208,
+ 'timestamp': 1435131709,
+ 'upload_date': '20150624',
+ 'uploader': 'Riot Games',
+ 'uploader_id': 'riotgames',
'view_count': int,
},
'params': {
@@ -215,7 +211,7 @@ class TwitchVodIE(TwitchItemBaseIE):
'%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
'Downloading %s access token' % self._ITEM_TYPE)
formats = self._extract_m3u8_formats(
- '%s/vod/%s?nauth=%s&nauthsig=%s'
+ '%s/vod/%s?nauth=%s&nauthsig=%s&allow_source=true'
% (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
item_id, 'mp4')
self._prefer_source(formats)
@@ -314,9 +310,9 @@ class TwitchBookmarksIE(TwitchPlaylistBaseIE):
class TwitchStreamIE(TwitchBaseIE):
IE_NAME = 'twitch:stream'
- _VALID_URL = r'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
- _TEST = {
+ _TESTS = [{
'url': 'http://www.twitch.tv/shroomztv',
'info_dict': {
'id': '12772022048',
@@ -335,7 +331,10 @@ class TwitchStreamIE(TwitchBaseIE):
# m3u8 download
'skip_download': True,
},
- }
+ }, {
+ 'url': 'http://www.twitch.tv/miracle_doto#profile-0',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
channel_id = self._match_id(url)
@@ -350,6 +349,12 @@ class TwitchStreamIE(TwitchBaseIE):
'http://www.twitch.tv/%s/profile' % channel_id,
'TwitchProfile', channel_id)
+ # Channel name may be typed if different case than the original channel name
+ # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
+ # an invalid m3u8 URL. Working around by use of original channel name from stream
+ # JSON and fallback to lowercase if it's not available.
+ channel_id = stream.get('channel', {}).get('name') or channel_id.lower()
+
access_token = self._download_json(
'%s/api/channels/%s/access_token' % (self._API_BASE, channel_id), channel_id,
'Downloading channel access token')
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
new file mode 100644
index 0000000..1aaa063
--- /dev/null
+++ b/youtube_dl/extractor/twitter.py
@@ -0,0 +1,72 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import (
+ float_or_none,
+ unescapeHTML,
+)
+
+
+class TwitterCardIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
+ 'md5': 'a74f50b310c83170319ba16de6955192',
+ 'info_dict': {
+ 'id': '560070183650213889',
+ 'ext': 'mp4',
+ 'title': 'TwitterCard',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 30.033,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ # Different formats served for different User-Agents
+ USER_AGENTS = [
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)', # mp4
+ 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0', # webm
+ ]
+
+ config = None
+ formats = []
+ for user_agent in USER_AGENTS:
+ request = compat_urllib_request.Request(url)
+ request.add_header('User-Agent', user_agent)
+ webpage = self._download_webpage(request, video_id)
+
+ config = self._parse_json(
+ unescapeHTML(self._search_regex(
+ r'data-player-config="([^"]+)"', webpage, 'data player config')),
+ video_id)
+
+ video_url = config['playlist'][0]['source']
+
+ f = {
+ 'url': video_url,
+ }
+
+ m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
+ if m:
+ f.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ thumbnail = config.get('posterImageUrl')
+ duration = float_or_none(config.get('duration'))
+
+ return {
+ 'id': video_id,
+ 'title': 'TwitterCard',
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py
index 4667ed8..e2bab52 100644
--- a/youtube_dl/extractor/udemy.py
+++ b/youtube_dl/extractor/udemy.py
@@ -15,7 +15,8 @@ from ..utils import (
class UdemyIE(InfoExtractor):
IE_NAME = 'udemy'
_VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)'
- _LOGIN_URL = 'https://www.udemy.com/join/login-submit/'
+ _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1'
+ _ORIGIN_URL = 'https://www.udemy.com'
_NETRC_MACHINE = 'udemy'
_TESTS = [{
@@ -74,29 +75,33 @@ class UdemyIE(InfoExtractor):
expected=True)
login_popup = self._download_webpage(
- 'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None,
- 'Downloading login popup')
+ self._LOGIN_URL, None, 'Downloading login popup')
if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
return
- csrf = self._html_search_regex(
- r'<input type="hidden" name="csrf" value="(.+?)"',
- login_popup, 'csrf token')
+ login_form = self._form_hidden_inputs('login-form', login_popup)
+
+ login_form.update({
+ 'email': username.encode('utf-8'),
+ 'password': password.encode('utf-8'),
+ })
- login_form = {
- 'email': username,
- 'password': password,
- 'csrf': csrf,
- 'displayType': 'json',
- 'isSubmitted': '1',
- }
request = compat_urllib_request.Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
- response = self._download_json(
+ request.add_header('Referer', self._ORIGIN_URL)
+ request.add_header('Origin', self._ORIGIN_URL)
+
+ response = self._download_webpage(
request, None, 'Logging in as %s' % username)
- if 'returnUrl' not in response:
+ if all(logout_pattern not in response
+ for logout_pattern in ['href="https://www.udemy.com/user/logout/', '>Logout<']):
+ error = self._html_search_regex(
+ r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py
index c08428a..2151f83 100644
--- a/youtube_dl/extractor/udn.py
+++ b/youtube_dl/extractor/udn.py
@@ -11,6 +11,7 @@ from ..compat import compat_urlparse
class UDNEmbedIE(InfoExtractor):
+ IE_DESC = '聯合影音'
_VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
_TESTS = [{
'url': 'http://video.udn.com/embed/news/300040',
diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index dd02674..722eb52 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
+ compat_urlparse,
)
from ..utils import (
ExtractorError,
@@ -26,11 +27,21 @@ class Vbox7IE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- redirect_page, urlh = self._download_webpage_handle(url, video_id)
- new_location = self._search_regex(r'window\.location = \'(.*)\';',
- redirect_page, 'redirect location')
- redirect_url = urlh.geturl() + new_location
- webpage = self._download_webpage(redirect_url, video_id,
+ # need to get the page 3 times for the correct jsSecretToken cookie
+ # which is necessary for the correct title
+ def get_session_id():
+ redirect_page = self._download_webpage(url, video_id)
+ session_id_url = self._search_regex(
+ r'var\s*url\s*=\s*\'([^\']+)\';', redirect_page,
+ 'session id url')
+ self._download_webpage(
+ compat_urlparse.urljoin(url, session_id_url), video_id,
+ 'Getting session id')
+
+ get_session_id()
+ get_session_id()
+
+ webpage = self._download_webpage(url, video_id,
'Downloading redirect page')
title = self._html_search_regex(r'<title>(.*)</title>',
diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py
index 346edf4..0d8d832 100644
--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dl/extractor/veehd.py
@@ -5,6 +5,7 @@ import json
from .common import InfoExtractor
from ..compat import (
+ compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import (
@@ -76,7 +77,7 @@ class VeeHDIE(InfoExtractor):
if config_json:
config = json.loads(config_json)
- video_url = compat_urlparse.unquote(config['clip']['url'])
+ video_url = compat_urllib_parse_unquote(config['clip']['url'])
if not video_url:
video_url = self._html_search_regex(
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
index 04e2b0b..01af7a9 100644
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@@ -1,5 +1,4 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from .ooyala import OoyalaIE
@@ -7,25 +6,29 @@ from ..utils import ExtractorError
class ViceIE(InfoExtractor):
- _VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
+ _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)+(?P<id>.+)'
- _TEST = {
- 'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
- 'info_dict': {
- 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
- 'ext': 'mp4',
- 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
- },
- 'params': {
- # Requires ffmpeg (m3u8 manifest)
- 'skip_download': True,
- },
- }
+ _TESTS = [
+ {
+ 'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
+ 'info_dict': {
+ 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
+ 'ext': 'mp4',
+ 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+ },
+ 'params': {
+ # Requires ffmpeg (m3u8 manifest)
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
+ 'only_matching': True,
+ }
+ ]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- name = mobj.group('name')
- webpage = self._download_webpage(url, name)
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
try:
embed_code = self._search_regex(
r'embedCode=([^&\'"]+)', webpage,
diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py
index eb309a7..78ff631 100644
--- a/youtube_dl/extractor/videomega.py
+++ b/youtube_dl/extractor/videomega.py
@@ -8,20 +8,23 @@ from ..compat import compat_urllib_request
class VideoMegaIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://
- (?:www\.)?videomega\.tv/
- (?:iframe\.php|cdn\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
- '''
- _TEST = {
- 'url': 'http://videomega.tv/?ref=4GNA688SU99US886ANG4',
- 'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
+ _VALID_URL = r'(?:videomega:|https?://(?:www\.)?videomega\.tv/(?:(?:view|iframe|cdn)\.php)?\?ref=)(?P<id>[A-Za-z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA',
+ 'md5': 'cc1920a58add3f05c6a93285b84fb3aa',
'info_dict': {
- 'id': '4GNA688SU99US886ANG4',
+ 'id': 'AOSQBJYKIDDIKYJBQSOA',
'ext': 'mp4',
- 'title': 'BigBuckBunny_320x180',
+ 'title': '1254207',
'thumbnail': 're:^https?://.*\.jpg$',
}
- }
+ }, {
+ 'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA&width=1070&height=600',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videomega.tv/view.php?ref=090051111052065112106089103052052103089106112065052111051090',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -29,12 +32,13 @@ class VideoMegaIE(InfoExtractor):
iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
req = compat_urllib_request.Request(iframe_url)
req.add_header('Referer', url)
+ req.add_header('Cookie', 'noadvtday=0')
webpage = self._download_webpage(req, video_id)
title = self._html_search_regex(
- r'<title>(.*?)</title>', webpage, 'title')
+ r'<title>(.+?)</title>', webpage, 'title')
title = re.sub(
- r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s?|\s?-\svideomega\.tv$)', '', title)
+ r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s*|\s*-\svideomega\.tv$)', '', title)
thumbnail = self._search_regex(
r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
video_url = self._search_regex(
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index 7f2fb1c..51cdc6b 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -1,5 +1,7 @@
+# coding: utf-8
from __future__ import unicode_literals
+import json
import time
import hmac
import hashlib
@@ -11,6 +13,7 @@ from ..utils import (
parse_age_limit,
parse_iso8601,
)
+from ..compat import compat_urllib_request
from .common import InfoExtractor
@@ -23,27 +26,35 @@ class VikiBaseIE(InfoExtractor):
_APP_VERSION = '2.2.5.1428709186'
_APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
- def _prepare_call(self, path, timestamp=None):
+ _NETRC_MACHINE = 'viki'
+
+ _token = None
+
+ def _prepare_call(self, path, timestamp=None, post_data=None):
path += '?' if '?' not in path else '&'
if not timestamp:
timestamp = int(time.time())
query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
+ if self._token:
+ query += '&token=%s' % self._token
sig = hmac.new(
self._APP_SECRET.encode('ascii'),
query.encode('ascii'),
hashlib.sha1
).hexdigest()
- return self._API_URL_TEMPLATE % (query, sig)
+ url = self._API_URL_TEMPLATE % (query, sig)
+ return compat_urllib_request.Request(
+ url, json.dumps(post_data).encode('utf-8')) if post_data else url
- def _call_api(self, path, video_id, note, timestamp=None):
+ def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
resp = self._download_json(
- self._prepare_call(path, timestamp), video_id, note)
+ self._prepare_call(path, timestamp, post_data), video_id, note)
error = resp.get('error')
if error:
if error == 'invalid timestamp':
resp = self._download_json(
- self._prepare_call(path, int(resp['current_timestamp'])),
+ self._prepare_call(path, int(resp['current_timestamp']), post_data),
video_id, '%s (retry)' % note)
error = resp.get('error')
if error:
@@ -56,6 +67,27 @@ class VikiBaseIE(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, error),
expected=True)
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ login_form = {
+ 'login_id': username,
+ 'password': password,
+ }
+
+ login = self._call_api(
+ 'sessions.json', None,
+ 'Logging in as %s' % username, post_data=login_form)
+
+ self._token = login.get('token')
+ if not self._token:
+ self.report_warning('Unable to get session token, login has probably failed')
+
class VikiIE(VikiBaseIE):
IE_NAME = 'viki'
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index f300c7c..10d6745 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -22,6 +22,7 @@ from ..utils import (
unified_strdate,
unsmuggle_url,
urlencode_postdata,
+ unescapeHTML,
)
@@ -173,6 +174,21 @@ class VimeoIE(VimeoBaseInfoExtractor):
},
]
+ @staticmethod
+ def _extract_vimeo_url(url, webpage):
+ # Look for embedded (iframe) Vimeo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
+ if mobj:
+ player_url = unescapeHTML(mobj.group('url'))
+ surl = smuggle_url(player_url, {'Referer': url})
+ return surl
+ # Look for embedded (swf embed) Vimeo player
+ mobj = re.search(
+ r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
+ if mobj:
+ return mobj.group(1)
+
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword', None)
if password is None:
@@ -436,11 +452,7 @@ class VimeoChannelIE(InfoExtractor):
password = self._downloader.params.get('videopassword', None)
if password is None:
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
- fields = dict(re.findall(r'''(?x)<input\s+
- type="hidden"\s+
- name="([^"]+)"\s+
- value="([^"]*)"
- ''', login_form))
+ fields = self._hidden_inputs(login_form)
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
fields['token'] = token
fields['password'] = password
diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py
index aa3d6dd..92321d6 100644
--- a/youtube_dl/extractor/vimple.py
+++ b/youtube_dl/extractor/vimple.py
@@ -4,7 +4,29 @@ from .common import InfoExtractor
from ..utils import int_or_none
-class VimpleIE(InfoExtractor):
+class SprutoBaseIE(InfoExtractor):
+ def _extract_spruto(self, spruto, video_id):
+ playlist = spruto['playlist'][0]
+ title = playlist['title']
+ video_id = playlist.get('videoId') or video_id
+ thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
+ duration = int_or_none(playlist.get('duration'))
+
+ formats = [{
+ 'url': f['url'],
+ } for f in playlist['video']]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class VimpleIE(SprutoBaseIE):
IE_DESC = 'Vimple - one-click video hosting'
_VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})'
_TESTS = [
@@ -30,25 +52,9 @@ class VimpleIE(InfoExtractor):
webpage = self._download_webpage(
'http://player.vimple.ru/iframe/%s' % video_id, video_id)
- playlist = self._parse_json(
+ spruto = self._parse_json(
self._search_regex(
r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'),
- video_id)['playlist'][0]
-
- title = playlist['title']
- video_id = playlist.get('videoId') or video_id
- thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
- duration = int_or_none(playlist.get('duration'))
-
- formats = [{
- 'url': f['url'],
- } for f in playlist['video']]
- self._sort_formats(formats)
+ video_id)
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- }
+ return self._extract_spruto(spruto, video_id)
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index cc384ad..c30c5a8 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -13,14 +13,26 @@ from ..compat import (
from ..utils import (
ExtractorError,
orderedSet,
+ str_to_int,
unescapeHTML,
unified_strdate,
)
class VKIE(InfoExtractor):
- IE_NAME = 'vk.com'
- _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))'
+ IE_NAME = 'vk'
+ IE_DESC = 'VK'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:m\.)?vk\.com/video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|
+ (?:
+ (?:m\.)?vk\.com/(?:.+?\?.*?z=)?video|
+ (?:www\.)?biqle\.ru/watch/
+ )
+ (?P<videoid>[^s].*?)(?:\?(?:.*\blist=(?P<list_id>[\da-f]+))?|%2F|$)
+ )
+ '''
_NETRC_MACHINE = 'vk'
_TESTS = [
@@ -34,6 +46,7 @@ class VKIE(InfoExtractor):
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'duration': 195,
'upload_date': '20120212',
+ 'view_count': int,
},
},
{
@@ -45,7 +58,8 @@ class VKIE(InfoExtractor):
'uploader': 'Tom Cruise',
'title': 'No name',
'duration': 9,
- 'upload_date': '20130721'
+ 'upload_date': '20130721',
+ 'view_count': int,
}
},
{
@@ -59,6 +73,7 @@ class VKIE(InfoExtractor):
'title': 'Lin Dan',
'duration': 101,
'upload_date': '20120730',
+ 'view_count': int,
}
},
{
@@ -73,7 +88,8 @@ class VKIE(InfoExtractor):
'uploader': 'Триллеры',
'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
'duration': 8352,
- 'upload_date': '20121218'
+ 'upload_date': '20121218',
+ 'view_count': int,
},
'skip': 'Requires vk account credentials',
},
@@ -100,14 +116,54 @@ class VKIE(InfoExtractor):
'title': 'Книга Илая',
'duration': 6771,
'upload_date': '20140626',
+ 'view_count': int,
},
'skip': 'Only works from Russia',
},
{
+ # video (removed?) only available with list id
+ 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
+ 'md5': '091287af5402239a1051c37ec7b92913',
+ 'info_dict': {
+ 'id': '171201961',
+ 'ext': 'mp4',
+ 'title': 'ТюменцевВВ_09.07.2015',
+ 'uploader': 'Anton Ivanov',
+ 'duration': 109,
+ 'upload_date': '20150709',
+ 'view_count': int,
+ },
+ },
+ {
+ # youtube embed
+ 'url': 'https://vk.com/video276849682_170681728',
+ 'info_dict': {
+ 'id': 'V3K4mi0SYkc',
+ 'ext': 'mp4',
+ 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
+ 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
+ 'duration': 179,
+ 'upload_date': '20130116',
+ 'uploader': "Children's Joy Foundation",
+ 'uploader_id': 'thecjf',
+ 'view_count': int,
+ },
+ },
+ {
# removed video, just testing that we match the pattern
'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
'only_matching': True,
},
+ {
+ # age restricted video, requires vk account credentials
+ 'url': 'https://vk.com/video205387401_164765225',
+ 'only_matching': True,
+ },
+ {
+ # vk wrapper
+ 'url': 'http://www.biqle.ru/watch/847655_160197695',
+ 'only_matching': True,
+ }
]
def _login(self):
@@ -115,20 +171,25 @@ class VKIE(InfoExtractor):
if username is None:
return
- login_form = {
- 'act': 'login',
- 'role': 'al_frame',
- 'expire': '1',
- 'email': username,
- 'pass': password,
- }
+ login_page = self._download_webpage(
+ 'https://vk.com', None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'email': username.encode('cp1251'),
+ 'pass': password.encode('cp1251'),
+ })
- request = compat_urllib_request.Request('https://login.vk.com/?act=login',
- compat_urllib_parse.urlencode(login_form).encode('utf-8'))
- login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
+ request = compat_urllib_request.Request(
+ 'https://login.vk.com/?act=login',
+ compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+ login_page = self._download_webpage(
+ request, None, note='Logging in as %s' % username)
if re.search(r'onLoginFailed', login_page):
- raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
+ raise ExtractorError(
+ 'Unable to login, incorrect username and/or password', expected=True)
def _real_initialize(self):
self._login()
@@ -140,9 +201,26 @@ class VKIE(InfoExtractor):
if not video_id:
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
- info_url = 'http://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
+ info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
+
+ # Some videos (removed?) can only be downloaded with list id specified
+ list_id = mobj.group('list_id')
+ if list_id:
+ info_url += '&list=%s' % list_id
+
info_page = self._download_webpage(info_url, video_id)
+ error_message = self._html_search_regex(
+ r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
+ info_page, 'error message', default=None)
+ if error_message:
+ raise ExtractorError(error_message, expected=True)
+
+ if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
+ raise ExtractorError(
+ 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
+ expected=True)
+
ERRORS = {
r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
'Video %s has been removed from public access due to rightholder complaint.',
@@ -156,16 +234,20 @@ class VKIE(InfoExtractor):
r'<!>Видео временно недоступно':
'Video %s is temporarily unavailable.',
+
+ r'<!>Access denied':
+ 'Access denied to video %s.',
}
for error_re, error_msg in ERRORS.items():
if re.search(error_re, info_page):
raise ExtractorError(error_msg % video_id, expected=True)
- m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
- if m_yt is not None:
- self.to_screen('Youtube video detected')
- return self.url_result(m_yt.group(1), 'Youtube')
+ youtube_url = self._search_regex(
+ r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
+ info_page, 'youtube iframe', default=None)
+ if youtube_url:
+ return self.url_result(youtube_url, 'Youtube')
m_rutube = re.search(
r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
@@ -175,25 +257,29 @@ class VKIE(InfoExtractor):
m_rutube.group(1).replace('\\', ''))
return self.url_result(rutube_url)
- m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
+ m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
if m_opts:
- m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
+ m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
if m_opts_url:
opts_url = m_opts_url.group(1)
if opts_url.startswith('//'):
opts_url = 'http:' + opts_url
return self.url_result(opts_url)
- data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
+ data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars')
data = json.loads(data_json)
# Extract upload date
upload_date = None
- mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
+ mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
if mobj is not None:
mobj.group(1) + ' ' + mobj.group(2)
upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
+ view_count = str_to_int(self._search_regex(
+ r'"mv_views_count_number"[^>]*>([\d,.]+) views<',
+ info_page, 'view count', fatal=False))
+
formats = [{
'format_id': k,
'url': v,
@@ -210,29 +296,39 @@ class VKIE(InfoExtractor):
'uploader': data.get('md_author'),
'duration': data.get('duration'),
'upload_date': upload_date,
+ 'view_count': view_count,
}
class VKUserVideosIE(InfoExtractor):
- IE_NAME = 'vk.com:user-videos'
- IE_DESC = 'vk.com:All of a user\'s videos'
- _VALID_URL = r'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?'
+ IE_NAME = 'vk:uservideos'
+ IE_DESC = "VK - User's Videos"
+ _VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)$'
_TEMPLATE_URL = 'https://vk.com/videos'
- _TEST = {
+ _TESTS = [{
'url': 'http://vk.com/videos205387401',
'info_dict': {
'id': '205387401',
+ 'title': "Tom Cruise's Videos",
},
'playlist_mincount': 4,
- }
+ }, {
+ 'url': 'http://vk.com/videos-77521',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
page_id = self._match_id(url)
- page = self._download_webpage(url, page_id)
- video_ids = orderedSet(
- m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
- url_entries = [
+
+ webpage = self._download_webpage(url, page_id)
+
+ entries = [
self.url_result(
'http://vk.com/video' + video_id, 'VK', video_id=video_id)
- for video_id in video_ids]
- return self.playlist_result(url_entries, page_id)
+ for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
+
+ title = unescapeHTML(self._search_regex(
+ r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
+ webpage, 'title', default=page_id))
+
+ return self.playlist_result(entries, page_id, title)
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py
index 1c0966a..ccf1928 100644
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dl/extractor/vodlocker.py
@@ -1,8 +1,6 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
@@ -28,12 +26,7 @@ class VodlockerIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- fields = dict(re.findall(r'''(?x)<input\s+
- type="hidden"\s+
- name="([^"]+)"\s+
- (?:id="[^"]+"\s+)?
- value="([^"]*)"
- ''', webpage))
+ fields = self._hidden_inputs(webpage)
if fields['op'] == 'download1':
self._sleep(3, video_id) # they do detect when requests happen too fast!
diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py
index 405cb9d..149e364 100644
--- a/youtube_dl/extractor/vube.py
+++ b/youtube_dl/extractor/vube.py
@@ -36,6 +36,7 @@ class VubeIE(InfoExtractor):
'comment_count': int,
'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'],
},
+ 'skip': 'Not accessible from Travis CI server',
}, {
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
'md5': 'db7aba89d4603dadd627e9d1973946fe',
diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py
index 73077a3..2037d9b 100644
--- a/youtube_dl/extractor/webofstories.py
+++ b/youtube_dl/extractor/webofstories.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import int_or_none
@@ -98,3 +100,42 @@ class WebOfStoriesIE(InfoExtractor):
'description': description,
'duration': duration,
}
+
+
+class WebOfStoriesPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?webofstories\.com/playAll/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://www.webofstories.com/playAll/donald.knuth',
+ 'info_dict': {
+ 'id': 'donald.knuth',
+ 'title': 'Donald Knuth (Scientist)',
+ },
+ 'playlist_mincount': 97,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories')
+ for video_number in set(re.findall('href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage))
+ ]
+
+ title = self._search_regex(
+ r'<div id="speakerName">\s*<span>([^<]+)</span>',
+ webpage, 'speaker', default=None)
+ if title:
+ field = self._search_regex(
+ r'<span id="primaryField">([^<]+)</span>',
+ webpage, 'field', default=None)
+ if field:
+ title += ' (%s)' % field
+
+ if not title:
+ title = self._search_regex(
+ r'<title>Play\s+all\s+stories\s*-\s*([^<]+)\s*-\s*Web\s+of\s+Stories</title>',
+ webpage, 'title')
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/youtube_dl/extractor/xbef.py b/youtube_dl/extractor/xbef.py
index 80c48c3..4ff99e5 100644
--- a/youtube_dl/extractor/xbef.py
+++ b/youtube_dl/extractor/xbef.py
@@ -1,9 +1,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_unquote
class XBefIE(InfoExtractor):
@@ -30,7 +28,7 @@ class XBefIE(InfoExtractor):
config_url_enc = self._download_webpage(
'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id,
note='Retrieving config URL')
- config_url = compat_urllib_parse.unquote(config_url_enc)
+ config_url = compat_urllib_parse_unquote(config_url_enc)
config = self._download_xml(
config_url, video_id, note='Retrieving config')
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index 4527567..b4ad513 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -13,7 +13,6 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
- """Information Extractor for xHamster"""
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
_TESTS = [
{
@@ -133,3 +132,36 @@ class XHamsterIE(InfoExtractor):
'age_limit': age_limit,
'formats': formats,
}
+
+
+class XHamsterEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://xhamster.com/xembed.php?video=3328539',
+ 'info_dict': {
+ 'id': '3328539',
+ 'ext': 'mp4',
+ 'title': 'Pen Masturbation',
+ 'upload_date': '20140728',
+ 'uploader_id': 'anonymous',
+ 'duration': 5,
+ 'age_limit': 18,
+ }
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [url for _, url in re.findall(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
+ webpage, 'xhamster url')
+
+ return self.url_result(video_url, 'XHamster')
diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py
index 79ed6c7..5a41f8f 100644
--- a/youtube_dl/extractor/xnxx.py
+++ b/youtube_dl/extractor/xnxx.py
@@ -2,9 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_unquote
class XNXXIE(InfoExtractor):
@@ -26,7 +24,7 @@ class XNXXIE(InfoExtractor):
video_url = self._search_regex(r'flv_url=(.*?)&amp;',
webpage, 'video URL')
- video_url = compat_urllib_parse.unquote(video_url)
+ video_url = compat_urllib_parse_unquote(video_url)
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
webpage, 'title')
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py
index 1644f53..779e4f4 100644
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -5,7 +5,7 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
)
from ..utils import (
parse_duration,
@@ -59,7 +59,7 @@ class XTubeIE(InfoExtractor):
for format_id, video_url in re.findall(
r'flashvars\.quality_(.+?)\s*=\s*"([^"]+)"', webpage):
fmt = {
- 'url': compat_urllib_parse.unquote(video_url),
+ 'url': compat_urllib_parse_unquote(video_url),
'format_id': format_id,
}
m = re.search(r'^(?P<height>\d+)[pP]', format_id)
@@ -68,7 +68,7 @@ class XTubeIE(InfoExtractor):
formats.append(fmt)
if not formats:
- video_url = compat_urllib_parse.unquote(self._search_regex(
+ video_url = compat_urllib_parse_unquote(self._search_regex(
r'flashvars\.video_url\s*=\s*"([^"]+)"',
webpage, 'video URL'))
formats.append({'url': video_url})
diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py
index 81d885f..5aac8ad 100644
--- a/youtube_dl/extractor/xuite.py
+++ b/youtube_dl/extractor/xuite.py
@@ -13,6 +13,7 @@ from ..utils import (
class XuiteIE(InfoExtractor):
+ IE_DESC = '隨意窩Xuite影音'
_REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?'
_VALID_URL = r'https?://vlog\.xuite\.net/(?:play|embed)/(?P<id>%s)' % _REGEX_BASE64
_TESTS = [{
diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py
index 2a45dc5..5dcf2fd 100644
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@@ -4,11 +4,13 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
+ compat_urllib_request,
)
from ..utils import (
clean_html,
ExtractorError,
+ determine_ext,
)
@@ -25,6 +27,8 @@ class XVideosIE(InfoExtractor):
}
}
+ _ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19'
+
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@@ -33,16 +37,37 @@ class XVideosIE(InfoExtractor):
if mobj:
raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
- video_url = compat_urllib_parse.unquote(
+ video_url = compat_urllib_parse_unquote(
self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
video_title = self._html_search_regex(
r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
video_thumbnail = self._search_regex(
r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
+ formats = [{
+ 'url': video_url,
+ }]
+
+ android_req = compat_urllib_request.Request(url)
+ android_req.add_header('User-Agent', self._ANDROID_USER_AGENT)
+ android_webpage = self._download_webpage(android_req, video_id, fatal=False)
+
+ if android_webpage is not None:
+ player_params_str = self._search_regex(
+ 'mobileReplacePlayerDivTwoQual\(([^)]+)\)',
+ android_webpage, 'player parameters', default='')
+ player_params = list(map(lambda s: s.strip(' \''), player_params_str.split(',')))
+ if player_params:
+ formats.extend([{
+ 'url': param,
+ 'preference': -10,
+ } for param in player_params if determine_ext(param) == 'mp4'])
+
+ self._sort_formats(formats)
+
return {
'id': video_id,
- 'url': video_url,
+ 'formats': formats,
'title': video_title,
'ext': 'flv',
'thumbnail': video_thumbnail,
diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py
index 9d851ba..001ee17 100644
--- a/youtube_dl/extractor/yam.py
+++ b/youtube_dl/extractor/yam.py
@@ -14,6 +14,7 @@ from ..utils import (
class YamIE(InfoExtractor):
+ IE_DESC = '蕃薯藤yam天空部落'
_VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)'
_TESTS = [{
diff --git a/youtube_dl/extractor/yinyuetai.py b/youtube_dl/extractor/yinyuetai.py
new file mode 100644
index 0000000..834d860
--- /dev/null
+++ b/youtube_dl/extractor/yinyuetai.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class YinYueTaiIE(InfoExtractor):
+ IE_NAME = 'yinyuetai:video'
+ IE_DESC = '音悦Tai'
+ _VALID_URL = r'https?://v\.yinyuetai\.com/video(?:/h5)?/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://v.yinyuetai.com/video/2322376',
+ 'md5': '6e3abe28d38e3a54b591f9f040595ce0',
+ 'info_dict': {
+ 'id': '2322376',
+ 'ext': 'mp4',
+ 'title': '少女时代_PARTY_Music Video Teaser',
+ 'creator': '少女时代',
+ 'duration': 25,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://v.yinyuetai.com/video/h5/2322376',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = self._download_json(
+ 'http://ext.yinyuetai.com/main/get-h-mv-info?json=true&videoId=%s' % video_id, video_id,
+ 'Downloading mv info')['videoInfo']['coreVideoInfo']
+
+ if info['error']:
+ raise ExtractorError(info['errorMsg'], expected=True)
+
+ formats = [{
+ 'url': format_info['videoUrl'],
+ 'format_id': format_info['qualityLevel'],
+ 'format': format_info.get('qualityLevelName'),
+ 'filesize': format_info.get('fileSize'),
+ # though URLs ends with .flv, the downloaded files are in fact mp4
+ 'ext': 'mp4',
+ 'tbr': format_info.get('bitrate'),
+ } for format_info in info['videoUrlModels']]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': info['videoName'],
+ 'thumbnail': info.get('bigHeadImage'),
+ 'creator': info.get('artistNames'),
+ 'duration': info.get('duration'),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py
index 894678a..869f3e8 100644
--- a/youtube_dl/extractor/ynet.py
+++ b/youtube_dl/extractor/ynet.py
@@ -5,7 +5,7 @@ import re
import json
from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote_plus
class YnetIE(InfoExtractor):
@@ -34,7 +34,7 @@ class YnetIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage))
+ content = compat_urllib_parse_unquote_plus(self._og_search_video_url(webpage))
config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
f4m_url = config['clip']['url']
title = self._og_search_title(webpage)
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index 97b98bb..78caeb8 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -1,123 +1,236 @@
# coding: utf-8
-
from __future__ import unicode_literals
-import math
-import random
-import re
-import time
+import base64
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
+from ..utils import ExtractorError
+
+from ..compat import (
+ compat_urllib_parse,
+ compat_ord,
+ compat_urllib_request,
)
class YoukuIE(InfoExtractor):
+ IE_NAME = 'youku'
+ IE_DESC = '优酷'
_VALID_URL = r'''(?x)
(?:
http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
youku:)
(?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
'''
- _TEST = {
- 'url': 'http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html',
- 'md5': 'ffe3f2e435663dc2d1eea34faeff5b5b',
- 'params': {
- 'test': False
+
+ _TESTS = [{
+ 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
+ 'md5': '5f3af4192eabacc4501508d54a8cabd7',
+ 'info_dict': {
+ 'id': 'XMTc1ODE5Njcy_part1',
+ 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
+ 'ext': 'flv'
+ }
+ }, {
+ 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
+ 'info_dict': {
+ 'id': 'XODgxNjg1Mzk2',
+ 'title': '武媚娘传奇 85',
},
+ 'playlist_count': 11,
+ }, {
+ 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
'info_dict': {
- 'id': 'XNDgyMDQ2NTQw_part00',
- 'ext': 'flv',
- 'title': 'youtube-dl test video "\'/\\ä↭𝕐'
+ 'id': 'XMTI1OTczNDM5Mg',
+ 'title': '花千骨 04',
+ },
+ 'playlist_count': 13,
+ 'skip': 'Available in China only',
+ }]
+
+ def construct_video_urls(self, data1, data2):
+ # get sid, token
+ def yk_t(s1, s2):
+ ls = list(range(256))
+ t = 0
+ for i in range(256):
+ t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
+ ls[i], ls[t] = ls[t], ls[i]
+ s = bytearray()
+ x, y = 0, 0
+ for i in range(len(s2)):
+ y = (y + 1) % 256
+ x = (x + ls[y]) % 256
+ ls[x], ls[y] = ls[y], ls[x]
+ s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
+ return bytes(s)
+
+ sid, token = yk_t(
+ b'becaf9be', base64.b64decode(data2['ep'].encode('ascii'))
+ ).decode('ascii').split('_')
+
+ # get oip
+ oip = data2['ip']
+
+ # get fileid
+ string_ls = list(
+ 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890')
+ shuffled_string_ls = []
+ seed = data1['seed']
+ N = len(string_ls)
+ for ii in range(N):
+ seed = (seed * 0xd3 + 0x754f) % 0x10000
+ idx = seed * len(string_ls) // 0x10000
+ shuffled_string_ls.append(string_ls[idx])
+ del string_ls[idx]
+
+ fileid_dict = {}
+ for format in data1['streamtypes']:
+ streamfileid = [
+ int(i) for i in data1['streamfileids'][format].strip('*').split('*')]
+ fileid = ''.join(
+ [shuffled_string_ls[i] for i in streamfileid])
+ fileid_dict[format] = fileid[:8] + '%s' + fileid[10:]
+
+ def get_fileid(format, n):
+ fileid = fileid_dict[format] % hex(int(n))[2:].upper().zfill(2)
+ return fileid
+
+ # get ep
+ def generate_ep(format, n):
+ fileid = get_fileid(format, n)
+ ep_t = yk_t(
+ b'bf7e5f01',
+ ('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
+ )
+ ep = base64.b64encode(ep_t).decode('ascii')
+ return ep
+
+ # generate video_urls
+ video_urls_dict = {}
+ for format in data1['streamtypes']:
+ video_urls = []
+ for dt in data1['segs'][format]:
+ n = str(int(dt['no']))
+ param = {
+ 'K': dt['k'],
+ 'hd': self.get_hd(format),
+ 'myp': 0,
+ 'ts': dt['seconds'],
+ 'ypp': 0,
+ 'ctype': 12,
+ 'ev': 1,
+ 'token': token,
+ 'oip': oip,
+ 'ep': generate_ep(format, n)
+ }
+ video_url = \
+ 'http://k.youku.com/player/getFlvPath/' + \
+ 'sid/' + sid + \
+ '_' + str(int(n) + 1).zfill(2) + \
+ '/st/' + self.parse_ext_l(format) + \
+ '/fileid/' + get_fileid(format, n) + '?' + \
+ compat_urllib_parse.urlencode(param)
+ video_urls.append(video_url)
+ video_urls_dict[format] = video_urls
+
+ return video_urls_dict
+
+ def get_hd(self, fm):
+ hd_id_dict = {
+ 'flv': '0',
+ 'mp4': '1',
+ 'hd2': '2',
+ 'hd3': '3',
+ '3gp': '0',
+ '3gphd': '1'
}
- }
-
- def _gen_sid(self):
- nowTime = int(time.time() * 1000)
- random1 = random.randint(1000, 1998)
- random2 = random.randint(1000, 9999)
-
- return "%d%d%d" % (nowTime, random1, random2)
-
- def _get_file_ID_mix_string(self, seed):
- mixed = []
- source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
- seed = float(seed)
- for i in range(len(source)):
- seed = (seed * 211 + 30031) % 65536
- index = math.floor(seed / 65536 * len(source))
- mixed.append(source[int(index)])
- source.remove(source[int(index)])
- # return ''.join(mixed)
- return mixed
-
- def _get_file_id(self, fileId, seed):
- mixed = self._get_file_ID_mix_string(seed)
- ids = fileId.split('*')
- realId = []
- for ch in ids:
- if ch:
- realId.append(mixed[int(ch)])
- return ''.join(realId)
+ return hd_id_dict[fm]
+
+ def parse_ext_l(self, fm):
+ ext_dict = {
+ 'flv': 'flv',
+ 'mp4': 'mp4',
+ 'hd2': 'flv',
+ 'hd3': 'flv',
+ '3gp': 'flv',
+ '3gphd': 'mp4'
+ }
+ return ext_dict[fm]
+
+ def get_format_name(self, fm):
+ _dict = {
+ '3gp': 'h6',
+ '3gphd': 'h5',
+ 'flv': 'h4',
+ 'mp4': 'h3',
+ 'hd2': 'h2',
+ 'hd3': 'h1'
+ }
+ return _dict[fm]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
+ video_id = self._match_id(url)
- config = self._download_json(info_url, video_id)
+ def retrieve_data(req_url, note):
+ req = compat_urllib_request.Request(req_url)
- error_code = config['data'][0].get('error_code')
- if error_code:
- # -8 means blocked outside China.
- error = config['data'][0].get('error') # Chinese and English, separated by newline.
- raise ExtractorError(error or 'Server reported error %i' % error_code,
- expected=True)
+ cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+ if cn_verification_proxy:
+ req.add_header('Ytdl-request-proxy', cn_verification_proxy)
- video_title = config['data'][0]['title']
- seed = config['data'][0]['seed']
+ raw_data = self._download_json(req, video_id, note=note)
+ return raw_data['data'][0]
- format = self._downloader.params.get('format', None)
- supported_format = list(config['data'][0]['streamfileids'].keys())
+ # request basic data
+ data1 = retrieve_data(
+ 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id,
+ 'Downloading JSON metadata 1')
+ data2 = retrieve_data(
+ 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id,
+ 'Downloading JSON metadata 2')
- # TODO proper format selection
- if format is None or format == 'best':
- if 'hd2' in supported_format:
- format = 'hd2'
+ error_code = data1.get('error_code')
+ if error_code:
+ error = data1.get('error')
+ if error is not None and '因版权原因无法观看此视频' in error:
+ raise ExtractorError(
+ 'Youku said: Sorry, this video is available in China only', expected=True)
else:
- format = 'flv'
- ext = 'flv'
- elif format == 'worst':
- format = 'mp4'
- ext = 'mp4'
- else:
- format = 'flv'
- ext = 'flv'
-
- fileid = config['data'][0]['streamfileids'][format]
- keys = [s['k'] for s in config['data'][0]['segs'][format]]
- # segs is usually a dictionary, but an empty *list* if an error occured.
-
- files_info = []
- sid = self._gen_sid()
- fileid = self._get_file_id(fileid, seed)
-
- # column 8,9 of fileid represent the segment number
- # fileid[7:9] should be changed
- for index, key in enumerate(keys):
- temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
- download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
-
- info = {
- 'id': '%s_part%02d' % (video_id, index),
- 'url': download_url,
- 'uploader': None,
- 'upload_date': None,
- 'title': video_title,
- 'ext': ext,
- }
- files_info.append(info)
-
- return files_info
+ msg = 'Youku server reported error %i' % error_code
+ if error is not None:
+ msg += ': ' + error
+ raise ExtractorError(msg)
+
+ title = data1['title']
+
+ # generate video_urls_dict
+ video_urls_dict = self.construct_video_urls(data1, data2)
+
+ # construct info
+ entries = [{
+ 'id': '%s_part%d' % (video_id, i + 1),
+ 'title': title,
+ 'formats': [],
+ # some formats are not available for all parts, we have to detect
+ # which one has all
+ } for i in range(max(len(v) for v in data1['segs'].values()))]
+ for fm in data1['streamtypes']:
+ video_urls = video_urls_dict[fm]
+ for video_url, seg, entry in zip(video_urls, data1['segs'][fm], entries):
+ entry['formats'].append({
+ 'url': video_url,
+ 'format_id': self.get_format_name(fm),
+ 'ext': self.parse_ext_l(fm),
+ 'filesize': int(seg['size']),
+ })
+
+ return {
+ '_type': 'multi_video',
+ 'id': video_id,
+ 'title': title,
+ 'entries': entries,
+ }
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index aacb999..3d8b31f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -17,6 +17,8 @@ from ..compat import (
compat_chr,
compat_parse_qs,
compat_urllib_parse,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
compat_urllib_request,
compat_urlparse,
compat_str,
@@ -29,9 +31,11 @@ from ..utils import (
get_element_by_id,
int_or_none,
orderedSet,
+ str_to_int,
unescapeHTML,
unified_strdate,
uppercase_escape,
+ ISO3166Utils,
)
@@ -234,6 +238,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'44': {'ext': 'webm', 'width': 854, 'height': 480},
'45': {'ext': 'webm', 'width': 1280, 'height': 720},
'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
+ '59': {'ext': 'mp4', 'width': 854, 'height': 480},
+ '78': {'ext': 'mp4', 'width': 854, 'height': 480},
# 3d videos
@@ -516,6 +522,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'skip_download': 'requires avconv',
}
},
+ # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
+ {
+ 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
+ 'info_dict': {
+ 'id': 'FIl7x6_3R5Y',
+ 'ext': 'mp4',
+ 'title': 'md5:7b81415841e02ecd4313668cde88737a',
+ 'description': 'md5:116377fd2963b81ec4ce64b542173306',
+ 'upload_date': '20150625',
+ 'uploader_id': 'dorappi2000',
+ 'uploader': 'dorappi2000',
+ 'formats': 'mincount:33',
+ },
+ },
+ # DASH manifest with segment_list
+ {
+ 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
+ 'md5': '8ce563a1d667b599d21064e982ab9e31',
+ 'info_dict': {
+ 'id': 'CsmdDsKjzN8',
+ 'ext': 'mp4',
+ 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
+ 'uploader': 'Airtek',
+ 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
+ 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
+ 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '135', # bestvideo
+ }
+ },
]
def __init__(self, *args, **kwargs):
@@ -780,16 +818,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
def _parse_dash_manifest(
- self, video_id, dash_manifest_url, player_url, age_gate):
+ self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
def decrypt_sig(mobj):
s = mobj.group(1)
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
return '/signature/%s' % dec_s
- dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
+ dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
dash_doc = self._download_xml(
dash_manifest_url, video_id,
note='Downloading DASH manifest',
- errnote='Could not download DASH manifest')
+ errnote='Could not download DASH manifest',
+ fatal=fatal)
+
+ if dash_doc is False:
+ return []
formats = []
for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
@@ -802,6 +844,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# TODO implement WebVTT downloading
pass
elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+ segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
format_id = r.attrib['id']
video_url = url_el.text
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
@@ -815,6 +858,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'filesize': filesize,
'fps': int_or_none(r.attrib.get('frameRate')),
}
+ if segment_list is not None:
+ f.update({
+ 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
+ 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
+ 'protocol': 'http_dash_segments',
+ })
try:
existing_format = next(
fo for fo in formats
@@ -822,6 +871,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
except StopIteration:
full_info = self._formats.get(format_id, {}).copy()
full_info.update(f)
+ codecs = r.attrib.get('codecs')
+ if codecs:
+ if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
+ full_info['vcodec'] = codecs
+ elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
+ full_info['acodec'] = codecs
formats.append(full_info)
else:
existing_format.update(f)
@@ -837,7 +892,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
mobj = re.search(self._NEXT_URL_RE, url)
if mobj:
- url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
+ url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
video_id = self.extract_id(url)
# Get video webpage
@@ -851,8 +906,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else:
player_url = None
+ dash_mpds = []
+
+ def add_dash_mpd(video_info):
+ dash_mpd = video_info.get('dashmpd')
+ if dash_mpd and dash_mpd[0] not in dash_mpds:
+ dash_mpds.append(dash_mpd[0])
+
# Get video info
embed_webpage = None
+ is_live = None
if re.search(r'player-age-gate-content">', video_webpage) is not None:
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
@@ -871,24 +934,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
note='Refetching age-gated info webpage',
errnote='unable to download video info webpage')
video_info = compat_parse_qs(video_info_webpage)
+ add_dash_mpd(video_info)
else:
age_gate = False
- try:
- # Try looking directly into the video webpage
- mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
- if not mobj:
- raise ValueError('Could not find ytplayer.config') # caught below
+ video_info = None
+ # Try looking directly into the video webpage
+ mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
+ if mobj:
json_code = uppercase_escape(mobj.group(1))
ytplayer_config = json.loads(json_code)
args = ytplayer_config['args']
- # Convert to the same format returned by compat_parse_qs
- video_info = dict((k, [v]) for k, v in args.items())
- if not args.get('url_encoded_fmt_stream_map'):
- raise ValueError('No stream_map present') # caught below
- except ValueError:
- # We fallback to the get_video_info pages (used by the embed page)
+ if args.get('url_encoded_fmt_stream_map'):
+ # Convert to the same format returned by compat_parse_qs
+ video_info = dict((k, [v]) for k, v in args.items())
+ add_dash_mpd(video_info)
+ if args.get('livestream') == '1' or args.get('live_playback') == 1:
+ is_live = True
+ if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
+ # We also try looking in get_video_info since it may contain different dashmpd
+ # URL that points to a DASH manifest with possibly different itag set (some itags
+ # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
+ # manifest pointed by get_video_info's dashmpd).
+ # The general idea is to take a union of itags of both DASH manifests (for example
+ # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
self.report_video_info_webpage_download(video_id)
- for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
+ for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = (
'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (proto, video_id, el_type))
@@ -896,11 +966,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_info_url,
video_id, note=False,
errnote='unable to download video info webpage')
- video_info = compat_parse_qs(video_info_webpage)
- if 'token' in video_info:
+ get_video_info = compat_parse_qs(video_info_webpage)
+ add_dash_mpd(get_video_info)
+ if not video_info:
+ video_info = get_video_info
+ if 'token' in get_video_info:
break
if 'token' not in video_info:
if 'reason' in video_info:
+ if 'The uploader has not made this video available in your country.' in video_info['reason']:
+ regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
+ if regions_allowed is not None:
+ raise ExtractorError('YouTube said: This video is available in %s only' % (
+ ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
+ expected=True)
raise ExtractorError(
'YouTube said: %s' % video_info['reason'][0],
expected=True, video_id=video_id)
@@ -924,7 +1003,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# uploader
if 'author' not in video_info:
raise ExtractorError('Unable to extract uploader name')
- video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
+ video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
# uploader_id
video_uploader_id = None
@@ -951,18 +1030,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._downloader.report_warning('unable to extract video thumbnail')
video_thumbnail = None
else: # don't panic if we can't find it
- video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
+ video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
# upload date
- upload_date = None
- mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
- if mobj is None:
- mobj = re.search(
- r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
- video_webpage)
- if mobj is not None:
- upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
- upload_date = unified_strdate(upload_date)
+ upload_date = self._html_search_meta(
+ 'datePublished', video_webpage, 'upload date', default=None)
+ if not upload_date:
+ upload_date = self._search_regex(
+ [r'(?s)id="eow-date.*?>(.*?)</span>',
+ r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
+ video_webpage, 'upload date', default=None)
+ if upload_date:
+ upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
+ upload_date = unified_strdate(upload_date)
m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
@@ -996,12 +1076,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_description = ''
def _extract_count(count_name):
- count = self._search_regex(
- r'id="watch-%s"[^>]*>.*?([\d,]+)\s*</span>' % re.escape(count_name),
- video_webpage, count_name, default=None)
- if count is not None:
- return int(count.replace(',', ''))
- return None
+ return str_to_int(self._search_regex(
+ r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
+ % re.escape(count_name),
+ video_webpage, count_name, default=None))
+
like_count = _extract_count('like')
dislike_count = _extract_count('dislike')
@@ -1013,7 +1092,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._downloader.report_warning('unable to extract video duration')
video_duration = None
else:
- video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
+ video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
# annotations
video_annotations = None
@@ -1116,24 +1195,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Look for the DASH manifest
if self._downloader.params.get('youtube_include_dash_manifest', True):
- dash_mpd = video_info.get('dashmpd')
- if dash_mpd:
- dash_manifest_url = dash_mpd[0]
+ dash_mpd_fatal = True
+ for dash_manifest_url in dash_mpds:
+ dash_formats = {}
try:
- dash_formats = self._parse_dash_manifest(
- video_id, dash_manifest_url, player_url, age_gate)
+ for df in self._parse_dash_manifest(
+ video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
+ # Do not overwrite DASH format found in some previous DASH manifest
+ if df['format_id'] not in dash_formats:
+ dash_formats[df['format_id']] = df
+ # Additional DASH manifests may end up in HTTP Error 403 therefore
+ # allow them to fail without bug report message if we already have
+ # some DASH manifest succeeded. This is temporary workaround to reduce
+ # burst of bug reports until we figure out the reason and whether it
+ # can be fixed at all.
+ dash_mpd_fatal = False
except (ExtractorError, KeyError) as e:
self.report_warning(
'Skipping DASH manifest: %r' % e, video_id)
- else:
+ if dash_formats:
# Remove the formats we found through non-DASH, they
# contain less info and it can be wrong, because we use
# fixed values (for example the resolution). See
# https://github.com/rg3/youtube-dl/issues/5774 for an
# example.
- dash_keys = set(df['format_id'] for df in dash_formats)
- formats = [f for f in formats if f['format_id'] not in dash_keys]
- formats.extend(dash_formats)
+ formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
+ formats.extend(dash_formats.values())
# Check for malformed aspect ratio
stretched_m = re.search(
@@ -1167,6 +1254,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'dislike_count': dislike_count,
'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
'formats': formats,
+ 'is_live': is_live,
}
@@ -1290,7 +1378,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
def _extract_playlist(self, playlist_id):
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
- more_widget_html = content_html = page
for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
match = match.strip()
@@ -1310,36 +1397,36 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
self.report_warning('Youtube gives an alert message: ' + match)
# Extract the video ids from the playlist pages
- ids = []
-
- for page_num in itertools.count(1):
- matches = re.finditer(self._VIDEO_RE, content_html)
- # We remove the duplicates and the link with index 0
- # (it's not the first video of the playlist)
- new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
- ids.extend(new_ids)
-
- mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
- if not mobj:
- break
+ def _entries():
+ more_widget_html = content_html = page
+ for page_num in itertools.count(1):
+ matches = re.finditer(self._VIDEO_RE, content_html)
+ # We remove the duplicates and the link with index 0
+ # (it's not the first video of the playlist)
+ new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
+ for vid_id in new_ids:
+ yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
+
+ mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+ if not mobj:
+ break
- more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
- 'Downloading page #%s' % page_num,
- transform_source=uppercase_escape)
- content_html = more['content_html']
- if not content_html.strip():
- # Some webpages show a "Load more" button but they don't
- # have more videos
- break
- more_widget_html = more['load_more_widget_html']
+ more = self._download_json(
+ 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+ 'Downloading page #%s' % page_num,
+ transform_source=uppercase_escape)
+ content_html = more['content_html']
+ if not content_html.strip():
+ # Some webpages show a "Load more" button but they don't
+ # have more videos
+ break
+ more_widget_html = more['load_more_widget_html']
playlist_title = self._html_search_regex(
r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
page, 'title')
- url_results = self._ids_to_results(ids)
- return self.playlist_result(url_results, playlist_id, playlist_title)
+ return self.playlist_result(_entries(), playlist_id, playlist_title)
def _real_extract(self, url):
# Extract playlist id
@@ -1406,10 +1493,12 @@ class YoutubeChannelIE(InfoExtractor):
channel_page = self._download_webpage(
url + '?view=57', channel_id,
'Downloading channel page', fatal=False)
- channel_playlist_id = self._search_regex(
- [r'<meta itemprop="channelId" content="([^"]+)">',
- r'data-channel-external-id="([^"]+)"'],
- channel_page, 'channel id', default=None)
+ channel_playlist_id = self._html_search_meta(
+ 'channelId', channel_page, 'channel id', default=None)
+ if not channel_playlist_id:
+ channel_playlist_id = self._search_regex(
+ r'data-channel-external-id="([^"]+)"',
+ channel_page, 'channel id', default=None)
if channel_playlist_id and channel_playlist_id.startswith('UC'):
playlist_id = 'UU' + channel_playlist_id[2:]
return self.url_result(
@@ -1503,7 +1592,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
for pagenum in itertools.count(1):
url_query = {
- 'search_query': query,
+ 'search_query': query.encode('utf-8'),
'page': pagenum,
'spf': 'navigate',
}
@@ -1551,7 +1640,7 @@ class YoutubeSearchURLIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- query = compat_urllib_parse.unquote_plus(mobj.group('query'))
+ query = compat_urllib_parse_unquote_plus(mobj.group('query'))
webpage = self._download_webpage(url, query)
result_code = self._search_regex(
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 5a2315b..9016e34 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -145,12 +145,16 @@ def parseOpts(overrideArguments=None):
general.add_option(
'--list-extractors',
action='store_true', dest='list_extractors', default=False,
- help='List all supported extractors and the URLs they would handle')
+ help='List all supported extractors')
general.add_option(
'--extractor-descriptions',
action='store_true', dest='list_extractor_descriptions', default=False,
help='Output descriptions of all supported extractors')
general.add_option(
+ '--force-generic-extractor',
+ action='store_true', dest='force_generic_extractor', default=False,
+ help='Force extraction to use the generic extractor')
+ general.add_option(
'--default-search',
dest='default_search', metavar='PREFIX',
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
@@ -215,7 +219,7 @@ def parseOpts(overrideArguments=None):
selection.add_option(
'--playlist-items',
dest='playlist_items', metavar='ITEM_SPEC', default=None,
- help='Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
+ help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
selection.add_option(
'--match-title',
dest='matchtitle', metavar='REGEX',
@@ -342,12 +346,13 @@ def parseOpts(overrideArguments=None):
video_format.add_option(
'--youtube-skip-dash-manifest',
action='store_false', dest='youtube_include_dash_manifest',
- help='Do not download the DASH manifest on YouTube videos')
+ help='Do not download the DASH manifests and related data on YouTube videos')
video_format.add_option(
'--merge-output-format',
action='store', dest='merge_output_format', metavar='FORMAT', default=None,
help=(
- 'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.'
+ 'If a merge is required (e.g. bestvideo+bestaudio), '
+ 'output to given container format. One of mkv, mp4, ogg, webm, flv. '
'Ignored if no merge is required'))
subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
@@ -686,7 +691,11 @@ def parseOpts(overrideArguments=None):
postproc.add_option(
'--recode-video',
metavar='FORMAT', dest='recodevideo', default=None,
- help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)')
+ help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)')
+ postproc.add_option(
+ '--postprocessor-args',
+ dest='postprocessor_args', metavar='ARGS',
+ help='Give these arguments to the postprocessor')
postproc.add_option(
'-k', '--keep-video',
action='store_true', dest='keepvideo', default=False,
@@ -725,7 +734,7 @@ def parseOpts(overrideArguments=None):
metavar='POLICY', dest='fixup', default='detect_or_warn',
help='Automatically correct known faults of the file. '
'One of never (do nothing), warn (only emit a warning), '
- 'detect_or_warn(the default; fix file if we can, warn otherwise)')
+ 'detect_or_warn (the default; fix file if we can, warn otherwise)')
postproc.add_option(
'--prefer-avconv',
action='store_false', dest='prefer_ffmpeg',
diff --git a/youtube_dl/postprocessor/common.py b/youtube_dl/postprocessor/common.py
index 3b0e8dd..4191d04 100644
--- a/youtube_dl/postprocessor/common.py
+++ b/youtube_dl/postprocessor/common.py
@@ -23,6 +23,9 @@ class PostProcessor(object):
PostProcessor objects follow a "mutual registration" process similar
to InfoExtractor objects.
+
+ Optionally PostProcessor can use a list of additional command-line arguments
+ with self._configuration_args.
"""
_downloader = None
@@ -57,6 +60,13 @@ class PostProcessor(object):
except Exception:
self._downloader.report_warning(errnote)
+ def _configuration_args(self, default=[]):
+ pp_args = self._downloader.params.get('postprocessor_args')
+ if pp_args is None:
+ return default
+ assert isinstance(pp_args, list)
+ return pp_args
+
class AudioConversionError(PostProcessingError):
pass
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py
index 774494e..e19dbf7 100644
--- a/youtube_dl/postprocessor/embedthumbnail.py
+++ b/youtube_dl/postprocessor/embedthumbnail.py
@@ -35,6 +35,11 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
thumbnail_filename = info['thumbnails'][-1]['filename']
+ if not os.path.exists(encodeFilename(thumbnail_filename)):
+ self._downloader.report_warning(
+ 'Skipping embedding the thumbnail because the file is missing.')
+ return [], info
+
if info['ext'] == 'mp3':
options = [
'-c', 'copy', '-map', '0', '-map', '1',
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index cc65b34..1f72390 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -21,6 +21,7 @@ from ..utils import (
shell_quote,
subtitles_filename,
dfxp2srt,
+ ISO639Utils,
)
@@ -130,6 +131,8 @@ class FFmpegPostProcessor(PostProcessor):
oldest_mtime = min(
os.stat(encodeFilename(path)).st_mtime for path in input_paths)
+ opts += self._configuration_args()
+
files_cmd = []
for path in input_paths:
files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
@@ -262,7 +265,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
# If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
if (new_path == path or
(self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
- self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
+ self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
return [], information
try:
@@ -293,13 +296,16 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
def run(self, information):
path = information['filepath']
- prefix, sep, ext = path.rpartition('.')
- outpath = prefix + sep + self._preferedformat
if information['ext'] == self._preferedformat:
self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
return [], information
+ options = []
+ if self._preferedformat == 'avi':
+ options.extend(['-c:v', 'libxvid', '-vtag', 'XVID'])
+ prefix, sep, ext = path.rpartition('.')
+ outpath = prefix + sep + self._preferedformat
self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
- self.run_ffmpeg(path, outpath, [])
+ self.run_ffmpeg(path, outpath, options)
information['filepath'] = outpath
information['format'] = self._preferedformat
information['ext'] = self._preferedformat
@@ -307,199 +313,6 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
- # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
- _lang_map = {
- 'aa': 'aar',
- 'ab': 'abk',
- 'ae': 'ave',
- 'af': 'afr',
- 'ak': 'aka',
- 'am': 'amh',
- 'an': 'arg',
- 'ar': 'ara',
- 'as': 'asm',
- 'av': 'ava',
- 'ay': 'aym',
- 'az': 'aze',
- 'ba': 'bak',
- 'be': 'bel',
- 'bg': 'bul',
- 'bh': 'bih',
- 'bi': 'bis',
- 'bm': 'bam',
- 'bn': 'ben',
- 'bo': 'bod',
- 'br': 'bre',
- 'bs': 'bos',
- 'ca': 'cat',
- 'ce': 'che',
- 'ch': 'cha',
- 'co': 'cos',
- 'cr': 'cre',
- 'cs': 'ces',
- 'cu': 'chu',
- 'cv': 'chv',
- 'cy': 'cym',
- 'da': 'dan',
- 'de': 'deu',
- 'dv': 'div',
- 'dz': 'dzo',
- 'ee': 'ewe',
- 'el': 'ell',
- 'en': 'eng',
- 'eo': 'epo',
- 'es': 'spa',
- 'et': 'est',
- 'eu': 'eus',
- 'fa': 'fas',
- 'ff': 'ful',
- 'fi': 'fin',
- 'fj': 'fij',
- 'fo': 'fao',
- 'fr': 'fra',
- 'fy': 'fry',
- 'ga': 'gle',
- 'gd': 'gla',
- 'gl': 'glg',
- 'gn': 'grn',
- 'gu': 'guj',
- 'gv': 'glv',
- 'ha': 'hau',
- 'he': 'heb',
- 'hi': 'hin',
- 'ho': 'hmo',
- 'hr': 'hrv',
- 'ht': 'hat',
- 'hu': 'hun',
- 'hy': 'hye',
- 'hz': 'her',
- 'ia': 'ina',
- 'id': 'ind',
- 'ie': 'ile',
- 'ig': 'ibo',
- 'ii': 'iii',
- 'ik': 'ipk',
- 'io': 'ido',
- 'is': 'isl',
- 'it': 'ita',
- 'iu': 'iku',
- 'ja': 'jpn',
- 'jv': 'jav',
- 'ka': 'kat',
- 'kg': 'kon',
- 'ki': 'kik',
- 'kj': 'kua',
- 'kk': 'kaz',
- 'kl': 'kal',
- 'km': 'khm',
- 'kn': 'kan',
- 'ko': 'kor',
- 'kr': 'kau',
- 'ks': 'kas',
- 'ku': 'kur',
- 'kv': 'kom',
- 'kw': 'cor',
- 'ky': 'kir',
- 'la': 'lat',
- 'lb': 'ltz',
- 'lg': 'lug',
- 'li': 'lim',
- 'ln': 'lin',
- 'lo': 'lao',
- 'lt': 'lit',
- 'lu': 'lub',
- 'lv': 'lav',
- 'mg': 'mlg',
- 'mh': 'mah',
- 'mi': 'mri',
- 'mk': 'mkd',
- 'ml': 'mal',
- 'mn': 'mon',
- 'mr': 'mar',
- 'ms': 'msa',
- 'mt': 'mlt',
- 'my': 'mya',
- 'na': 'nau',
- 'nb': 'nob',
- 'nd': 'nde',
- 'ne': 'nep',
- 'ng': 'ndo',
- 'nl': 'nld',
- 'nn': 'nno',
- 'no': 'nor',
- 'nr': 'nbl',
- 'nv': 'nav',
- 'ny': 'nya',
- 'oc': 'oci',
- 'oj': 'oji',
- 'om': 'orm',
- 'or': 'ori',
- 'os': 'oss',
- 'pa': 'pan',
- 'pi': 'pli',
- 'pl': 'pol',
- 'ps': 'pus',
- 'pt': 'por',
- 'qu': 'que',
- 'rm': 'roh',
- 'rn': 'run',
- 'ro': 'ron',
- 'ru': 'rus',
- 'rw': 'kin',
- 'sa': 'san',
- 'sc': 'srd',
- 'sd': 'snd',
- 'se': 'sme',
- 'sg': 'sag',
- 'si': 'sin',
- 'sk': 'slk',
- 'sl': 'slv',
- 'sm': 'smo',
- 'sn': 'sna',
- 'so': 'som',
- 'sq': 'sqi',
- 'sr': 'srp',
- 'ss': 'ssw',
- 'st': 'sot',
- 'su': 'sun',
- 'sv': 'swe',
- 'sw': 'swa',
- 'ta': 'tam',
- 'te': 'tel',
- 'tg': 'tgk',
- 'th': 'tha',
- 'ti': 'tir',
- 'tk': 'tuk',
- 'tl': 'tgl',
- 'tn': 'tsn',
- 'to': 'ton',
- 'tr': 'tur',
- 'ts': 'tso',
- 'tt': 'tat',
- 'tw': 'twi',
- 'ty': 'tah',
- 'ug': 'uig',
- 'uk': 'ukr',
- 'ur': 'urd',
- 'uz': 'uzb',
- 've': 'ven',
- 'vi': 'vie',
- 'vo': 'vol',
- 'wa': 'wln',
- 'wo': 'wol',
- 'xh': 'xho',
- 'yi': 'yid',
- 'yo': 'yor',
- 'za': 'zha',
- 'zh': 'zho',
- 'zu': 'zul',
- }
-
- @classmethod
- def _conver_lang_code(cls, code):
- """Convert language code from ISO 639-1 to ISO 639-2/T"""
- return cls._lang_map.get(code[:2])
-
def run(self, information):
if information['ext'] not in ['mp4', 'mkv']:
self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files')
@@ -525,7 +338,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
opts += ['-c:s', 'mov_text']
for (i, lang) in enumerate(sub_langs):
opts.extend(['-map', '%d:0' % (i + 1)])
- lang_code = self._conver_lang_code(lang)
+ lang_code = ISO639Utils.short2long(lang)
if lang_code is not None:
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
index de3169e..fc7ac83 100644
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -50,7 +50,7 @@ def rsa_verify(message, signature, key):
def update_self(to_screen, verbose):
"""Update the program file with the latest version from the repository"""
- UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
+ UPDATE_URL = "https://rg3.github.io/youtube-dl/update/"
VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
JSON_URL = UPDATE_URL + 'versions.json'
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 52d198f..942f76d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -62,6 +62,8 @@ std_headers = {
}
+NO_DEFAULT = object()
+
ENGLISH_MONTH_NAMES = [
'January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November', 'December']
@@ -171,13 +173,15 @@ def xpath_with_ns(path, ns_map):
return '/'.join(replaced)
-def xpath_text(node, xpath, name=None, fatal=False):
+def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
if sys.version_info < (2, 7): # Crazy 2.6
xpath = xpath.encode('ascii')
n = node.find(xpath)
if n is None or n.text is None:
- if fatal:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
name = xpath if name is None else name
raise ExtractorError('Could not find XML element %s' % name)
else:
@@ -1841,7 +1845,10 @@ def srt_subtitles_timecode(seconds):
def dfxp2srt(dfxp_data):
- _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
+ _x = functools.partial(xpath_with_ns, ns_map={
+ 'ttml': 'http://www.w3.org/ns/ttml',
+ 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
+ })
def parse_node(node):
str_or_empty = functools.partial(str_or_none, default='')
@@ -1849,9 +1856,9 @@ def dfxp2srt(dfxp_data):
out = str_or_empty(node.text)
for child in node:
- if child.tag in (_x('ttml:br'), 'br'):
+ if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
out += '\n' + str_or_empty(child.tail)
- elif child.tag in (_x('ttml:span'), 'span'):
+ elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
out += str_or_empty(parse_node(child))
else:
out += str_or_empty(xml.etree.ElementTree.tostring(child))
@@ -1860,7 +1867,7 @@ def dfxp2srt(dfxp_data):
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
out = []
- paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
+ paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
if not paras:
raise ValueError('Invalid dfxp/TTML subtitle')
@@ -1879,6 +1886,468 @@ def dfxp2srt(dfxp_data):
return ''.join(out)
+class ISO639Utils(object):
+ # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
+ _lang_map = {
+ 'aa': 'aar',
+ 'ab': 'abk',
+ 'ae': 'ave',
+ 'af': 'afr',
+ 'ak': 'aka',
+ 'am': 'amh',
+ 'an': 'arg',
+ 'ar': 'ara',
+ 'as': 'asm',
+ 'av': 'ava',
+ 'ay': 'aym',
+ 'az': 'aze',
+ 'ba': 'bak',
+ 'be': 'bel',
+ 'bg': 'bul',
+ 'bh': 'bih',
+ 'bi': 'bis',
+ 'bm': 'bam',
+ 'bn': 'ben',
+ 'bo': 'bod',
+ 'br': 'bre',
+ 'bs': 'bos',
+ 'ca': 'cat',
+ 'ce': 'che',
+ 'ch': 'cha',
+ 'co': 'cos',
+ 'cr': 'cre',
+ 'cs': 'ces',
+ 'cu': 'chu',
+ 'cv': 'chv',
+ 'cy': 'cym',
+ 'da': 'dan',
+ 'de': 'deu',
+ 'dv': 'div',
+ 'dz': 'dzo',
+ 'ee': 'ewe',
+ 'el': 'ell',
+ 'en': 'eng',
+ 'eo': 'epo',
+ 'es': 'spa',
+ 'et': 'est',
+ 'eu': 'eus',
+ 'fa': 'fas',
+ 'ff': 'ful',
+ 'fi': 'fin',
+ 'fj': 'fij',
+ 'fo': 'fao',
+ 'fr': 'fra',
+ 'fy': 'fry',
+ 'ga': 'gle',
+ 'gd': 'gla',
+ 'gl': 'glg',
+ 'gn': 'grn',
+ 'gu': 'guj',
+ 'gv': 'glv',
+ 'ha': 'hau',
+ 'he': 'heb',
+ 'hi': 'hin',
+ 'ho': 'hmo',
+ 'hr': 'hrv',
+ 'ht': 'hat',
+ 'hu': 'hun',
+ 'hy': 'hye',
+ 'hz': 'her',
+ 'ia': 'ina',
+ 'id': 'ind',
+ 'ie': 'ile',
+ 'ig': 'ibo',
+ 'ii': 'iii',
+ 'ik': 'ipk',
+ 'io': 'ido',
+ 'is': 'isl',
+ 'it': 'ita',
+ 'iu': 'iku',
+ 'ja': 'jpn',
+ 'jv': 'jav',
+ 'ka': 'kat',
+ 'kg': 'kon',
+ 'ki': 'kik',
+ 'kj': 'kua',
+ 'kk': 'kaz',
+ 'kl': 'kal',
+ 'km': 'khm',
+ 'kn': 'kan',
+ 'ko': 'kor',
+ 'kr': 'kau',
+ 'ks': 'kas',
+ 'ku': 'kur',
+ 'kv': 'kom',
+ 'kw': 'cor',
+ 'ky': 'kir',
+ 'la': 'lat',
+ 'lb': 'ltz',
+ 'lg': 'lug',
+ 'li': 'lim',
+ 'ln': 'lin',
+ 'lo': 'lao',
+ 'lt': 'lit',
+ 'lu': 'lub',
+ 'lv': 'lav',
+ 'mg': 'mlg',
+ 'mh': 'mah',
+ 'mi': 'mri',
+ 'mk': 'mkd',
+ 'ml': 'mal',
+ 'mn': 'mon',
+ 'mr': 'mar',
+ 'ms': 'msa',
+ 'mt': 'mlt',
+ 'my': 'mya',
+ 'na': 'nau',
+ 'nb': 'nob',
+ 'nd': 'nde',
+ 'ne': 'nep',
+ 'ng': 'ndo',
+ 'nl': 'nld',
+ 'nn': 'nno',
+ 'no': 'nor',
+ 'nr': 'nbl',
+ 'nv': 'nav',
+ 'ny': 'nya',
+ 'oc': 'oci',
+ 'oj': 'oji',
+ 'om': 'orm',
+ 'or': 'ori',
+ 'os': 'oss',
+ 'pa': 'pan',
+ 'pi': 'pli',
+ 'pl': 'pol',
+ 'ps': 'pus',
+ 'pt': 'por',
+ 'qu': 'que',
+ 'rm': 'roh',
+ 'rn': 'run',
+ 'ro': 'ron',
+ 'ru': 'rus',
+ 'rw': 'kin',
+ 'sa': 'san',
+ 'sc': 'srd',
+ 'sd': 'snd',
+ 'se': 'sme',
+ 'sg': 'sag',
+ 'si': 'sin',
+ 'sk': 'slk',
+ 'sl': 'slv',
+ 'sm': 'smo',
+ 'sn': 'sna',
+ 'so': 'som',
+ 'sq': 'sqi',
+ 'sr': 'srp',
+ 'ss': 'ssw',
+ 'st': 'sot',
+ 'su': 'sun',
+ 'sv': 'swe',
+ 'sw': 'swa',
+ 'ta': 'tam',
+ 'te': 'tel',
+ 'tg': 'tgk',
+ 'th': 'tha',
+ 'ti': 'tir',
+ 'tk': 'tuk',
+ 'tl': 'tgl',
+ 'tn': 'tsn',
+ 'to': 'ton',
+ 'tr': 'tur',
+ 'ts': 'tso',
+ 'tt': 'tat',
+ 'tw': 'twi',
+ 'ty': 'tah',
+ 'ug': 'uig',
+ 'uk': 'ukr',
+ 'ur': 'urd',
+ 'uz': 'uzb',
+ 've': 'ven',
+ 'vi': 'vie',
+ 'vo': 'vol',
+ 'wa': 'wln',
+ 'wo': 'wol',
+ 'xh': 'xho',
+ 'yi': 'yid',
+ 'yo': 'yor',
+ 'za': 'zha',
+ 'zh': 'zho',
+ 'zu': 'zul',
+ }
+
+ @classmethod
+ def short2long(cls, code):
+ """Convert language code from ISO 639-1 to ISO 639-2/T"""
+ return cls._lang_map.get(code[:2])
+
+ @classmethod
+ def long2short(cls, code):
+ """Convert language code from ISO 639-2/T to ISO 639-1"""
+ for short_name, long_name in cls._lang_map.items():
+ if long_name == code:
+ return short_name
+
+
+class ISO3166Utils(object):
+ # From http://data.okfn.org/data/core/country-list
+ _country_map = {
+ 'AF': 'Afghanistan',
+ 'AX': 'Åland Islands',
+ 'AL': 'Albania',
+ 'DZ': 'Algeria',
+ 'AS': 'American Samoa',
+ 'AD': 'Andorra',
+ 'AO': 'Angola',
+ 'AI': 'Anguilla',
+ 'AQ': 'Antarctica',
+ 'AG': 'Antigua and Barbuda',
+ 'AR': 'Argentina',
+ 'AM': 'Armenia',
+ 'AW': 'Aruba',
+ 'AU': 'Australia',
+ 'AT': 'Austria',
+ 'AZ': 'Azerbaijan',
+ 'BS': 'Bahamas',
+ 'BH': 'Bahrain',
+ 'BD': 'Bangladesh',
+ 'BB': 'Barbados',
+ 'BY': 'Belarus',
+ 'BE': 'Belgium',
+ 'BZ': 'Belize',
+ 'BJ': 'Benin',
+ 'BM': 'Bermuda',
+ 'BT': 'Bhutan',
+ 'BO': 'Bolivia, Plurinational State of',
+ 'BQ': 'Bonaire, Sint Eustatius and Saba',
+ 'BA': 'Bosnia and Herzegovina',
+ 'BW': 'Botswana',
+ 'BV': 'Bouvet Island',
+ 'BR': 'Brazil',
+ 'IO': 'British Indian Ocean Territory',
+ 'BN': 'Brunei Darussalam',
+ 'BG': 'Bulgaria',
+ 'BF': 'Burkina Faso',
+ 'BI': 'Burundi',
+ 'KH': 'Cambodia',
+ 'CM': 'Cameroon',
+ 'CA': 'Canada',
+ 'CV': 'Cape Verde',
+ 'KY': 'Cayman Islands',
+ 'CF': 'Central African Republic',
+ 'TD': 'Chad',
+ 'CL': 'Chile',
+ 'CN': 'China',
+ 'CX': 'Christmas Island',
+ 'CC': 'Cocos (Keeling) Islands',
+ 'CO': 'Colombia',
+ 'KM': 'Comoros',
+ 'CG': 'Congo',
+ 'CD': 'Congo, the Democratic Republic of the',
+ 'CK': 'Cook Islands',
+ 'CR': 'Costa Rica',
+ 'CI': 'Côte d\'Ivoire',
+ 'HR': 'Croatia',
+ 'CU': 'Cuba',
+ 'CW': 'Curaçao',
+ 'CY': 'Cyprus',
+ 'CZ': 'Czech Republic',
+ 'DK': 'Denmark',
+ 'DJ': 'Djibouti',
+ 'DM': 'Dominica',
+ 'DO': 'Dominican Republic',
+ 'EC': 'Ecuador',
+ 'EG': 'Egypt',
+ 'SV': 'El Salvador',
+ 'GQ': 'Equatorial Guinea',
+ 'ER': 'Eritrea',
+ 'EE': 'Estonia',
+ 'ET': 'Ethiopia',
+ 'FK': 'Falkland Islands (Malvinas)',
+ 'FO': 'Faroe Islands',
+ 'FJ': 'Fiji',
+ 'FI': 'Finland',
+ 'FR': 'France',
+ 'GF': 'French Guiana',
+ 'PF': 'French Polynesia',
+ 'TF': 'French Southern Territories',
+ 'GA': 'Gabon',
+ 'GM': 'Gambia',
+ 'GE': 'Georgia',
+ 'DE': 'Germany',
+ 'GH': 'Ghana',
+ 'GI': 'Gibraltar',
+ 'GR': 'Greece',
+ 'GL': 'Greenland',
+ 'GD': 'Grenada',
+ 'GP': 'Guadeloupe',
+ 'GU': 'Guam',
+ 'GT': 'Guatemala',
+ 'GG': 'Guernsey',
+ 'GN': 'Guinea',
+ 'GW': 'Guinea-Bissau',
+ 'GY': 'Guyana',
+ 'HT': 'Haiti',
+ 'HM': 'Heard Island and McDonald Islands',
+ 'VA': 'Holy See (Vatican City State)',
+ 'HN': 'Honduras',
+ 'HK': 'Hong Kong',
+ 'HU': 'Hungary',
+ 'IS': 'Iceland',
+ 'IN': 'India',
+ 'ID': 'Indonesia',
+ 'IR': 'Iran, Islamic Republic of',
+ 'IQ': 'Iraq',
+ 'IE': 'Ireland',
+ 'IM': 'Isle of Man',
+ 'IL': 'Israel',
+ 'IT': 'Italy',
+ 'JM': 'Jamaica',
+ 'JP': 'Japan',
+ 'JE': 'Jersey',
+ 'JO': 'Jordan',
+ 'KZ': 'Kazakhstan',
+ 'KE': 'Kenya',
+ 'KI': 'Kiribati',
+ 'KP': 'Korea, Democratic People\'s Republic of',
+ 'KR': 'Korea, Republic of',
+ 'KW': 'Kuwait',
+ 'KG': 'Kyrgyzstan',
+ 'LA': 'Lao People\'s Democratic Republic',
+ 'LV': 'Latvia',
+ 'LB': 'Lebanon',
+ 'LS': 'Lesotho',
+ 'LR': 'Liberia',
+ 'LY': 'Libya',
+ 'LI': 'Liechtenstein',
+ 'LT': 'Lithuania',
+ 'LU': 'Luxembourg',
+ 'MO': 'Macao',
+ 'MK': 'Macedonia, the Former Yugoslav Republic of',
+ 'MG': 'Madagascar',
+ 'MW': 'Malawi',
+ 'MY': 'Malaysia',
+ 'MV': 'Maldives',
+ 'ML': 'Mali',
+ 'MT': 'Malta',
+ 'MH': 'Marshall Islands',
+ 'MQ': 'Martinique',
+ 'MR': 'Mauritania',
+ 'MU': 'Mauritius',
+ 'YT': 'Mayotte',
+ 'MX': 'Mexico',
+ 'FM': 'Micronesia, Federated States of',
+ 'MD': 'Moldova, Republic of',
+ 'MC': 'Monaco',
+ 'MN': 'Mongolia',
+ 'ME': 'Montenegro',
+ 'MS': 'Montserrat',
+ 'MA': 'Morocco',
+ 'MZ': 'Mozambique',
+ 'MM': 'Myanmar',
+ 'NA': 'Namibia',
+ 'NR': 'Nauru',
+ 'NP': 'Nepal',
+ 'NL': 'Netherlands',
+ 'NC': 'New Caledonia',
+ 'NZ': 'New Zealand',
+ 'NI': 'Nicaragua',
+ 'NE': 'Niger',
+ 'NG': 'Nigeria',
+ 'NU': 'Niue',
+ 'NF': 'Norfolk Island',
+ 'MP': 'Northern Mariana Islands',
+ 'NO': 'Norway',
+ 'OM': 'Oman',
+ 'PK': 'Pakistan',
+ 'PW': 'Palau',
+ 'PS': 'Palestine, State of',
+ 'PA': 'Panama',
+ 'PG': 'Papua New Guinea',
+ 'PY': 'Paraguay',
+ 'PE': 'Peru',
+ 'PH': 'Philippines',
+ 'PN': 'Pitcairn',
+ 'PL': 'Poland',
+ 'PT': 'Portugal',
+ 'PR': 'Puerto Rico',
+ 'QA': 'Qatar',
+ 'RE': 'Réunion',
+ 'RO': 'Romania',
+ 'RU': 'Russian Federation',
+ 'RW': 'Rwanda',
+ 'BL': 'Saint Barthélemy',
+ 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
+ 'KN': 'Saint Kitts and Nevis',
+ 'LC': 'Saint Lucia',
+ 'MF': 'Saint Martin (French part)',
+ 'PM': 'Saint Pierre and Miquelon',
+ 'VC': 'Saint Vincent and the Grenadines',
+ 'WS': 'Samoa',
+ 'SM': 'San Marino',
+ 'ST': 'Sao Tome and Principe',
+ 'SA': 'Saudi Arabia',
+ 'SN': 'Senegal',
+ 'RS': 'Serbia',
+ 'SC': 'Seychelles',
+ 'SL': 'Sierra Leone',
+ 'SG': 'Singapore',
+ 'SX': 'Sint Maarten (Dutch part)',
+ 'SK': 'Slovakia',
+ 'SI': 'Slovenia',
+ 'SB': 'Solomon Islands',
+ 'SO': 'Somalia',
+ 'ZA': 'South Africa',
+ 'GS': 'South Georgia and the South Sandwich Islands',
+ 'SS': 'South Sudan',
+ 'ES': 'Spain',
+ 'LK': 'Sri Lanka',
+ 'SD': 'Sudan',
+ 'SR': 'Suriname',
+ 'SJ': 'Svalbard and Jan Mayen',
+ 'SZ': 'Swaziland',
+ 'SE': 'Sweden',
+ 'CH': 'Switzerland',
+ 'SY': 'Syrian Arab Republic',
+ 'TW': 'Taiwan, Province of China',
+ 'TJ': 'Tajikistan',
+ 'TZ': 'Tanzania, United Republic of',
+ 'TH': 'Thailand',
+ 'TL': 'Timor-Leste',
+ 'TG': 'Togo',
+ 'TK': 'Tokelau',
+ 'TO': 'Tonga',
+ 'TT': 'Trinidad and Tobago',
+ 'TN': 'Tunisia',
+ 'TR': 'Turkey',
+ 'TM': 'Turkmenistan',
+ 'TC': 'Turks and Caicos Islands',
+ 'TV': 'Tuvalu',
+ 'UG': 'Uganda',
+ 'UA': 'Ukraine',
+ 'AE': 'United Arab Emirates',
+ 'GB': 'United Kingdom',
+ 'US': 'United States',
+ 'UM': 'United States Minor Outlying Islands',
+ 'UY': 'Uruguay',
+ 'UZ': 'Uzbekistan',
+ 'VU': 'Vanuatu',
+ 'VE': 'Venezuela, Bolivarian Republic of',
+ 'VN': 'Viet Nam',
+ 'VG': 'Virgin Islands, British',
+ 'VI': 'Virgin Islands, U.S.',
+ 'WF': 'Wallis and Futuna',
+ 'EH': 'Western Sahara',
+ 'YE': 'Yemen',
+ 'ZM': 'Zambia',
+ 'ZW': 'Zimbabwe',
+ }
+
+ @classmethod
+ def short2full(cls, code):
+ """Convert an ISO 3166-2 country code to the corresponding full name"""
+ return cls._country_map.get(code.upper())
+
+
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
def __init__(self, proxies=None):
# Set default handlers
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 9cf84ff..280afdd 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.06.04.1'
+__version__ = '2015.07.21'