diff options
-rw-r--r-- | README.md | 4 | ||||
-rw-r--r-- | README.txt | 4 | ||||
-rw-r--r-- | devscripts/youtube_genalgo.py | 76 | ||||
-rw-r--r-- | test/helper.py | 33 | ||||
-rw-r--r-- | test/test_youtube_lists.py | 24 | ||||
-rwxr-xr-x | test/test_youtube_sig.py | 57 | ||||
-rw-r--r-- | test/test_youtube_subtitles.py | 38 | ||||
-rw-r--r-- | test/tests.json | 19 | ||||
-rwxr-xr-x | youtube-dl | bin | 117178 -> 118827 bytes | |||
-rw-r--r-- | youtube-dl.1 | 4 | ||||
-rw-r--r-- | youtube-dl.bash-completion | 2 | ||||
-rw-r--r-- | youtube_dl/FileDownloader.py | 2 | ||||
-rw-r--r-- | youtube_dl/YoutubeDL.py | 5 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/arte.py | 122 | ||||
-rw-r--r-- | youtube_dl/extractor/comedycentral.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/cspan.py | 44 | ||||
-rw-r--r-- | youtube_dl/extractor/vevo.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/wimp.py | 28 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 48 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
22 files changed, 361 insertions, 164 deletions
@@ -116,12 +116,14 @@ which means you can modify it, redistribute it or use it however you like. -F, --list-formats list all available formats (currently youtube only) --write-sub write subtitle file (currently youtube only) + --write-auto-sub write automatic subtitle file (currently youtube + only) --only-sub [deprecated] alias of --skip-download --all-subs downloads all the available subtitles of the video (currently youtube only) --list-subs lists all available subtitles for the video (currently youtube only) - --sub-format FORMAT subtitle format [srt/sbv] (default=srt) + --sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only) --sub-lang LANG language of the subtitles to download (optional) use IETF language tags like 'en' @@ -131,12 +131,14 @@ Video Format Options: -F, --list-formats list all available formats (currently youtube only) --write-sub write subtitle file (currently youtube only) + --write-auto-sub write automatic subtitle file (currently youtube + only) --only-sub [deprecated] alias of --skip-download --all-subs downloads all the available subtitles of the video (currently youtube only) --list-subs lists all available subtitles for the video (currently youtube only) - --sub-format FORMAT subtitle format [srt/sbv] (default=srt) + --sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only) --sub-lang LANG language of the subtitles to download (optional) use IETF language tags like 'en' diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py new file mode 100644 index 0000000..b168cea --- /dev/null +++ b/devscripts/youtube_genalgo.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +# Generate youtube signature algorithm from test cases + +import sys + +tests = [ + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", + "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", + "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"), + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", + "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", + "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"), + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", + "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", + "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"), + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", + "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), +] + +def find_matching(wrong, right): + idxs = [wrong.index(c) for c in right] + return compress(idxs) + return ('s[%d]' % i for i in idxs) + +def compress(idxs): + def _genslice(start, end, step): + starts = '' if start == 0 else str(start) + ends = ':%d' % (end+step) + steps = '' if step == 1 else (':%d' % step) + return 's[%s%s%s]' % (starts, ends, steps) + + step = None + for i, prev in zip(idxs[1:], idxs[:-1]): + if step is not None: + if i - prev == step: + continue + yield _genslice(start, prev, step) + step = None + continue + if i - prev in [-1, 1]: + step = i - prev + start = prev + continue + else: + yield 's[%d]' % prev + if step is None: + yield 's[%d]' % i + else: + yield _genslice(start, i, step) + +def _assert_compress(inp, exp): + res = list(compress(inp)) + if res != exp: + print('Got %r, expected %r' % (res, exp)) + assert res == exp +_assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]']) +_assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]']) +_assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]']) + +def gen(wrong, right, indent): + code = ' + '.join(find_matching(wrong, right)) + return 'if len(s) == %d:\n%s return %s\n' % (len(wrong), indent, code) + +def genall(tests): + indent = ' ' * 8 + return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests) + +def main(): + print(genall(tests)) + +if __name__ == '__main__': + main() diff --git a/test/helper.py b/test/helper.py new file mode 100644 index 0000000..842ffc2 --- /dev/null +++ b/test/helper.py @@ -0,0 +1,33 @@ +import io +import json +import os.path + +from youtube_dl import YoutubeDL, YoutubeDLHandler +from youtube_dl.utils import ( + compat_cookiejar, + compat_urllib_request, +) + +# General configuration (from __init__, not very elegant...) +jar = compat_cookiejar.CookieJar() +cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) +proxy_handler = compat_urllib_request.ProxyHandler() +opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) +compat_urllib_request.install_opener(opener) + +PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") +with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + parameters = json.load(pf) + +class FakeYDL(YoutubeDL): + def __init__(self): + self.result = [] + # Different instances of the downloader can't share the same dictionary + # some test set the "sublang" parameter, which would break the md5 checks. + self.params = dict(parameters) + def to_screen(self, s): + print(s) + def trouble(self, s, tb=None): + raise Exception(s) + def download(self, x): + self.result.append(x)
\ No newline at end of file diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 320b440..4486b7e 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,30 +10,8 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE from youtube_dl.utils import * -from youtube_dl import YoutubeDL -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - parameters = json.load(pf) - -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) - -class FakeYDL(YoutubeDL): - def __init__(self): - self.result = [] - self.params = parameters - def to_screen(self, s): - print(s) - def trouble(self, s, tb=None): - raise Exception(s) - def extract_info(self, url): - self.result.append(url) - return url +from helper import FakeYDL class TestYoutubeLists(unittest.TestCase): def assertIsPlaylist(self,info): diff --git a/test/test_youtube_sig.py b/test/test_youtube_sig.py new file mode 100755 index 0000000..e87b625 --- /dev/null +++ b/test/test_youtube_sig.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +import unittest +import sys + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.extractor.youtube import YoutubeIE +from helper import FakeYDL + +sig = YoutubeIE(FakeYDL())._decrypt_signature + +class TestYoutubeSig(unittest.TestCase): + def test_43_43(self): + wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135' + right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE' + self.assertEqual(sig(wrong), right) + + def test_88(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<" + right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej" + self.assertEqual(sig(wrong), right) + + def test_87(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<" + right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr" + self.assertEqual(sig(wrong), right) + + def test_86(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" + right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@" + self.assertEqual(sig(wrong), right) + + def test_85(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<" + right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr" + self.assertEqual(sig(wrong), right) + + def test_84(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<" + right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1" + self.assertEqual(sig(wrong), right) + + def test_83(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" + right = "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ" + self.assertEqual(sig(wrong), right) + + def test_82(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<" + right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9" + self.assertEqual(sig(wrong), right) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index e8f5e4a..86e09c9 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -12,31 +12,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.extractor import YoutubeIE from youtube_dl.utils import * -from youtube_dl import YoutubeDL - -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - parameters = json.load(pf) - -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) - -class FakeYDL(YoutubeDL): - def __init__(self): - self.result = [] - # Different instances of the downloader can't share the same dictionary - # some test set the "sublang" parameter, which would break the md5 checks. - self.params = dict(parameters) - def to_screen(self, s): - print(s) - def trouble(self, s, tb=None): - raise Exception(s) - def download(self, x): - self.result.append(x) +from helper import FakeYDL md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() @@ -84,7 +60,7 @@ class TestYoutubeSubtitles(unittest.TestCase): info_dict = IE.extract('QRS8MkLhQmM') subtitles = info_dict[0]['subtitles'] self.assertEqual(len(subtitles), 13) - def test_youtube_subtitles_format(self): + def test_youtube_subtitles_sbv_format(self): DL = FakeYDL() DL.params['writesubtitles'] = True DL.params['subtitlesformat'] = 'sbv' @@ -92,6 +68,14 @@ class TestYoutubeSubtitles(unittest.TestCase): info_dict = IE.extract('QRS8MkLhQmM') sub = info_dict[0]['subtitles'][0] self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b') + def test_youtube_subtitles_vtt_format(self): + DL = FakeYDL() + DL.params['writesubtitles'] = True + DL.params['subtitlesformat'] = 'vtt' + IE = YoutubeIE(DL) + info_dict = IE.extract('QRS8MkLhQmM') + sub = info_dict[0]['subtitles'][0] + self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7') def test_youtube_list_subtitles(self): DL = FakeYDL() DL.params['listsubtitles'] = True @@ -100,7 +84,7 @@ class TestYoutubeSubtitles(unittest.TestCase): self.assertEqual(info_dict, None) def test_youtube_automatic_captions(self): DL = FakeYDL() - DL.params['writesubtitles'] = True + DL.params['writeautomaticsub'] = True DL.params['subtitleslang'] = 'it' IE = YoutubeIE(DL) info_dict = IE.extract('8YoUxe5ncPo') diff --git a/test/tests.json b/test/tests.json index 5f4f642..ebc7a12 100644 --- a/test/tests.json +++ b/test/tests.json @@ -695,5 +695,24 @@ "info_dict": { "title": "卡马乔国足开大脚长传冲吊集锦" } + }, + { + "name": "CSpan", + "url": "http://www.c-spanvideo.org/program/HolderonV", + "file": "315139.flv", + "md5": "74a623266956f69e4df0068ab6c80fe4", + "info_dict": { + "title": "Attorney General Eric Holder on Voting Rights Act Decision" + }, + "skip": "Requires rtmpdump" + }, + { + "name": "Wimp", + "url": "http://www.wimp.com/deerfence/", + "file": "deerfence.flv", + "md5": "8b215e2e0168c6081a1cf84b2846a2b5", + "info_dict": { + "title": "Watch Till End: Herd of deer jump over a fence." + } } ] Binary files differdiff --git a/youtube-dl.1 b/youtube-dl.1 index 9707195..0ac019f 100644 --- a/youtube-dl.1 +++ b/youtube-dl.1 @@ -136,12 +136,14 @@ redistribute it or use it however you like. \-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ list\ all\ available\ formats\ (currently\ youtube \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only) \-\-write\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ subtitle\ file\ (currently\ youtube\ only) +\-\-write\-auto\-sub\ \ \ \ \ \ \ \ \ \ \ write\ automatic\ subtitle\ file\ (currently\ youtube +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only) \-\-only\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ \-\-skip\-download \-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ all\ the\ available\ subtitles\ of\ the \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ (currently\ youtube\ only) \-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ lists\ all\ available\ subtitles\ for\ the\ video \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only) -\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ subtitle\ format\ [srt/sbv]\ (default=srt) +\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ subtitle\ format\ [srt/sbv/vtt]\ (default=srt) \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only) \-\-sub\-lang\ LANG\ \ \ \ \ \ \ \ \ \ \ \ language\ of\ the\ subtitles\ to\ download\ (optional) \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ IETF\ language\ tags\ like\ \[aq]en\[aq] diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion index 67409bc..a3e9bdf 100644 --- a/youtube-dl.bash-completion +++ b/youtube-dl.bash-completion @@ -3,7 +3,7 @@ __youtube-dl() local cur prev opts COMPREPLY=() cur="${COMP_WORDS[COMP_CWORD]}" - opts="--help --version --update --ignore-errors --rate-limit --retries --buffer-size --no-resize-buffer --dump-user-agent --user-agent --referer --list-extractors --proxy --no-check-certificate --test --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --only-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites" + opts="--help --version --update --ignore-errors --rate-limit --retries --buffer-size --no-resize-buffer --dump-user-agent --user-agent --referer --list-extractors --proxy --no-check-certificate --test --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --write-auto-sub --only-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites" if [[ ${cur} == * ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 445f3e8..155895f 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -137,7 +137,7 @@ class FileDownloader(object): self.ydl.report_warning(*args, **kargs) def report_error(self, *args, **kargs): - self.ydl.error(*args, **kargs) + self.ydl.report_error(*args, **kargs) def slow_down(self, start_time, byte_counter): """Sleep if the download speed is over the rate limit.""" diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b4a966b..9931c98 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -72,9 +72,10 @@ class YoutubeDL(object): writeinfojson: Write the video description to a .info.json file writethumbnail: Write the thumbnail image to a file writesubtitles: Write the video subtitles to a file + writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video listsubtitles: Lists all available subtitles for the video - subtitlesformat: Subtitle format [sbv/srt] (default=srt) + subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) subtitleslang: Language of the subtitles to download keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. @@ -474,7 +475,7 @@ class YoutubeDL(object): self.report_error(u'Cannot write description file ' + descfn) return - if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: + if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitle = info_dict['subtitles'][0] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 2acaab6..6a8fc5e 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -191,6 +191,9 @@ def parseOpts(overrideArguments=None): video_format.add_option('--write-sub', '--write-srt', action='store_true', dest='writesubtitles', help='write subtitle file (currently youtube only)', default=False) + video_format.add_option('--write-auto-sub', '--write-automatic-sub', + action='store_true', dest='writeautomaticsub', + help='write automatic subtitle file (currently youtube only)', default=False) video_format.add_option('--only-sub', action='store_true', dest='skip_download', help='[deprecated] alias of --skip-download', default=False) @@ -202,7 +205,7 @@ def parseOpts(overrideArguments=None): help='lists all available subtitles for the video (currently youtube only)', default=False) video_format.add_option('--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', - help='subtitle format [srt/sbv] (default=srt) (currently youtube only)', default='srt') + help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt') video_format.add_option('--sub-lang', '--srt-lang', action='store', dest='subtitleslang', metavar='LANG', help='language of the subtitles to download (optional) use IETF language tags like \'en\'') @@ -537,6 +540,7 @@ def _real_main(argv=None): 'writeinfojson': opts.writeinfojson, 'writethumbnail': opts.writethumbnail, 'writesubtitles': opts.writesubtitles, + 'writeautomaticsub': opts.writeautomaticsub, 'allsubtitles': opts.allsubtitles, 'listsubtitles': opts.listsubtitles, 'subtitlesformat': opts.subtitlesformat, diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0ea9908..2750fc8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -6,6 +6,7 @@ from .bliptv import BlipTVIE, BlipTVUserIE from .breakcom import BreakIE from .collegehumor import CollegeHumorIE from .comedycentral import ComedyCentralIE +from .cspan import CSpanIE from .dailymotion import DailymotionIE from .depositfiles import DepositFilesIE from .eighttracks import EightTracksIE @@ -49,6 +50,7 @@ from .vbox7 import Vbox7IE from .vevo import VevoIE from .vimeo import VimeoIE from .vine import VineIE +from .wimp import WimpIE from .worldstarhiphop import WorldStarHipHopIE from .xhamster import XHamsterIE from .xnxx import XNXXIE @@ -132,6 +134,8 @@ def gen_extractors(): VevoIE(), JukeboxIE(), TudouIE(), + CSpanIE(), + WimpIE(), GenericIE() ] diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 82e3ffe..b061b95 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -1,53 +1,21 @@ import re -import socket +import json from .common import InfoExtractor from ..utils import ( - compat_http_client, - compat_str, - compat_urllib_error, + # This is used by the not implemented extractLiveStream method compat_urllib_parse, - compat_urllib_request, ExtractorError, unified_strdate, ) class ArteTvIE(InfoExtractor): - """arte.tv information extractor.""" - - _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*' + _VALID_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' _LIVE_URL = r'index-[0-9]+\.html$' IE_NAME = u'arte.tv' - def fetch_webpage(self, url): - request = compat_urllib_request.Request(url) - try: - self.report_download_webpage(url) - webpage = compat_urllib_request.urlopen(request).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err)) - except ValueError as err: - raise ExtractorError(u'Invalid URL: %s' % url) - return webpage - - def grep_webpage(self, url, regex, regexFlags, matchTuples): - page = self.fetch_webpage(url) - mobj = re.search(regex, page, regexFlags) - info = {} - - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) - - for (i, key, err) in matchTuples: - if mobj.group(i) is None: - raise ExtractorError(err) - else: - info[key] = mobj.group(i) - - return info - # TODO implement Live Stream # def extractLiveStream(self, url): # video_lang = url.split('/')[-4] @@ -75,62 +43,44 @@ class ArteTvIE(InfoExtractor): # ) # video_url = u'%s/%s' % (info.get('url'), info.get('path')) - def extractPlus7Stream(self, url): - video_lang = url.split('/')[-3] - info = self.grep_webpage( - url, - r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)', - 0, - [ - (1, 'url', u'Invalid URL: %s' % url) - ] - ) - next_url = compat_urllib_parse.unquote(info.get('url')) - info = self.grep_webpage( - next_url, - r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang, - 0, - [ - (1, 'url', u'Could not find <video> tag: %s' % url) - ] - ) - next_url = compat_urllib_parse.unquote(info.get('url')) - - info = self.grep_webpage( - next_url, - r'<video id="(.*?)".*?>.*?' + - '<name>(.*?)</name>.*?' + - '<dateVideo>(.*?)</dateVideo>.*?' + - '<url quality="hd">(.*?)</url>', - re.DOTALL, - [ - (1, 'id', u'could not extract video id: %s' % url), - (2, 'title', u'could not extract video title: %s' % url), - (3, 'date', u'could not extract video date: %s' % url), - (4, 'url', u'could not extract video url: %s' % url) - ] - ) - - return { - 'id': info.get('id'), - 'url': compat_urllib_parse.unquote(info.get('url')), - 'uploader': u'arte.tv', - 'upload_date': unified_strdate(info.get('date')), - 'title': info.get('title').decode('utf-8'), - 'ext': u'mp4', - 'format': u'NA', - 'player_url': None, - } - def _real_extract(self, url): - video_id = url.split('/')[-1] - self.report_extraction(video_id) + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + # This is not a real id, it can be for example AJT for the news + # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal + video_id = mobj.group('id') if re.search(self._LIVE_URL, video_id) is not None: raise ExtractorError(u'Arte live streams are not yet supported, sorry') # self.extractLiveStream(url) # return + + webpage = self._download_webpage(url, video_id) + json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') + + json_info = self._download_webpage(json_url, video_id, 'Downloading info json') + self.report_extraction(video_id) + info = json.loads(json_info) + player_info = info['videoJsonPlayer'] + + info_dict = {'id': player_info['VID'], + 'title': player_info['VTI'], + 'description': player_info['VDE'], + 'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]), + 'thumbnail': player_info['programImage'], + } + + formats = player_info['VSR'].values() + # We order the formats by quality + formats = sorted(formats, key=lambda f: int(f['height'])) + # Pick the best quality + format_info = formats[-1] + if format_info['mediaType'] == u'rtmp': + info_dict['url'] = format_info['streamer'] + info_dict['play_path'] = 'mp4:' + format_info['url'] + info_dict['ext'] = 'mp4' else: - info = self.extractPlus7Stream(url) + info_dict['url'] = format_info['url'] + info_dict['ext'] = 'mp4' - return [info] + return info_dict diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 1bb3590..6985e88 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -172,7 +172,7 @@ class ComedyCentralIE(InfoExtractor): 'ext': 'mp4', 'format': format, 'thumbnail': None, - 'description': officialTitle, + 'description': compat_str(officialTitle), } results.append(info) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py new file mode 100644 index 0000000..2246515 --- /dev/null +++ b/youtube_dl/extractor/cspan.py @@ -0,0 +1,44 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, +) + +class CSpanIE(InfoExtractor): + _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + prog_name = mobj.group(1) + webpage = self._download_webpage(url, prog_name) + video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id') + data = compat_urllib_parse.urlencode({'programid': video_id, + 'dynamic':'1'}) + info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data + video_info = self._download_webpage(info_url, video_id, u'Downloading video info') + + self.report_extraction(video_id) + + title = self._html_search_regex(r'<string name="title">(.*?)</string>', + video_info, 'title') + description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"', + webpage, 'description', + flags=re.MULTILINE|re.DOTALL) + thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.*?)"', + webpage, 'thumbnail') + + url = self._search_regex(r'<string name="URL">(.*?)</string>', + video_info, 'video url') + url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443') + path = self._search_regex(r'<string name="path">(.*?)</string>', + video_info, 'rtmp play path') + + return {'id': video_id, + 'title': title, + 'ext': 'flv', + 'url': url, + 'play_path': path, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index aa88e1a..49a249a 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -3,7 +3,6 @@ import json from .common import InfoExtractor from ..utils import ( - unified_strdate, ExtractorError, ) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py new file mode 100644 index 0000000..84f065a --- /dev/null +++ b/youtube_dl/extractor/wimp.py @@ -0,0 +1,28 @@ +import re +import base64 + +from .common import InfoExtractor + + +class WimpIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + title = self._search_regex(r'<meta name="description" content="(.+?)" />',webpage, 'video title') + thumbnail_url = self._search_regex(r'<meta property="og\:image" content="(.+?)" />', webpage,'video thumbnail') + googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url') + googleString = base64.b64decode(googleString).decode('ascii') + final_url = self._search_regex('","(.*?)"', googleString,'final video url') + ext = final_url.rpartition(u'.')[2] + + return [{ + 'id': video_id, + 'url': final_url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail_url, + }] + diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index de653cb..c7922c5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -131,15 +131,24 @@ class YoutubeIE(InfoExtractor): def _decrypt_signature(self, s): """Decrypt the key the two subkeys must have a length of 43""" - (a,b) = s.split('.') - if len(a) != 43 or len(b) != 43: - raise ExtractorError(u'Unable to decrypt signature, subkeys lengths %d.%d not supported; retrying might work' % (len(a), len(b))) - if self._downloader.params.get('verbose'): - self.to_screen('encrypted signature length %d.%d' % (len(a), len(b))) - b = ''.join([b[:8],a[0],b[9:18],b[-4],b[19:39], b[18]])[0:40] - a = a[-40:] - s_dec = '.'.join((a,b))[::-1] - return s_dec + + if len(s) == 88: + return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] + elif len(s) == 87: + return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1] + elif len(s) == 86: + return s[2:63] + s[82] + s[64:82] + s[63] + elif len(s) == 85: + return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1] + elif len(s) == 84: + return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] + elif len(s) == 83: + return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36] + elif len(s) == 82: + return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] + + else: + raise ExtractorError(u'Unable to decrypt signature, subkeys length %d not supported; retrying might work' % (len(s))) def _get_available_subtitles(self, video_id): self.report_video_subtitles_download(video_id) @@ -454,14 +463,13 @@ class YoutubeIE(InfoExtractor): if video_subtitles: (sub_error, sub_lang, sub) = video_subtitles[0] if sub_error: - # We try with the automatic captions - video_subtitles = self._request_automatic_caption(video_id, video_webpage) - (sub_error_auto, sub_lang, sub) = video_subtitles[0] - if sub is not None: - pass - else: - # We report the original error - self._downloader.report_warning(sub_error) + self._downloader.report_warning(sub_error) + + if self._downloader.params.get('writeautomaticsub', False): + video_subtitles = self._request_automatic_caption(video_id, video_webpage) + (sub_error, sub_lang, sub) = video_subtitles[0] + if sub_error: + self._downloader.report_warning(sub_error) if self._downloader.params.get('allsubtitles', False): video_subtitles = self._extract_all_subtitles(video_id) @@ -510,6 +518,12 @@ class YoutubeIE(InfoExtractor): if 'sig' in url_data: url += '&signature=' + url_data['sig'][0] elif 's' in url_data: + if self._downloader.params.get('verbose'): + s = url_data['s'][0] + player = self._search_regex(r'html5player-(.+?)\.js', video_webpage, + 'html5 player', fatal=False) + self.to_screen('encrypted signature length %d (%d.%d), itag %s, html5 player %s' % + (len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player)) signature = self._decrypt_signature(url_data['s'][0]) url += '&signature=' + signature if 'ratebypass' not in url: diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3b456e9..d1e8482 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.06.33' +__version__ = '2013.06.34' |