From a3a6d01a9613566347ac39bac9cef3c4b8cd692f Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Sat, 1 Jul 2017 21:09:57 -0500 Subject: [thisoldhouse] Fix video id extraction (closes #13540) --- youtube_dl/extractor/thisoldhouse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py index 197258d..6a3efb9 100644 --- a/youtube_dl/extractor/thisoldhouse.py +++ b/youtube_dl/extractor/thisoldhouse.py @@ -31,5 +31,5 @@ class ThisOldHouseIE(InfoExtractor): drupal_settings = self._parse_json(self._search_regex( r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), display_id) - video_id = drupal_settings['jwplatform']['video_id'] + video_id = list(drupal_settings['comScore'])[0] return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id) -- cgit v1.1 From 99a7e76240f3e79d770bff0029dc3321e14b2a97 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Sat, 1 Jul 2017 21:11:58 -0500 Subject: [thisoldhouse] Update test --- youtube_dl/extractor/thisoldhouse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py index 6a3efb9..0c32135 100644 --- a/youtube_dl/extractor/thisoldhouse.py +++ b/youtube_dl/extractor/thisoldhouse.py @@ -8,7 +8,7 @@ class ThisOldHouseIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P[^/?#]+)' _TESTS = [{ 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench', - 'md5': '946f05bbaa12a33f9ae35580d2dfcfe3', + 'md5': '568acf9ca25a639f0c4ff905826b662f', 'info_dict': { 'id': '2REGtUDQ', 'ext': 'mp4', -- cgit v1.1 From 50ae3f646e6c7eec187078dd43f1cb3777ccfb4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Jul 2017 20:04:08 +0700 Subject: [thisoldhouse] Add more fallbacks for video id (closes #13541) --- youtube_dl/extractor/thisoldhouse.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py index 0c32135..6ab147a 100644 --- a/youtube_dl/extractor/thisoldhouse.py +++ b/youtube_dl/extractor/thisoldhouse.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str +from ..utils import try_get class ThisOldHouseIE(InfoExtractor): @@ -28,8 +30,15 @@ class ThisOldHouseIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - drupal_settings = self._parse_json(self._search_regex( - r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', - webpage, 'drupal settings'), display_id) - video_id = list(drupal_settings['comScore'])[0] + video_id = self._search_regex( + (r'data-mid=(["\'])(?P(?:(?!\1).)+)\1', + r'id=(["\'])inline-video-player-(?P(?:(?!\1).)+)\1'), + webpage, 'video id', default=None, group='id') + if not video_id: + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + video_id = try_get( + drupal_settings, lambda x: x['jwplatform']['video_id'], + compat_str) or list(drupal_settings['comScore'])[0] return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id) -- cgit v1.1 From 4d9ba27bba916d4e305d41fbef0685170eb1991f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Jul 2017 20:12:40 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 7b3c6c6..c17acf4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,22 @@ version -Extractors +Core +* [extractor/common] Improve _json_ld + +Extractors ++ [thisoldhouse] Add more fallbacks for video id +* [thisoldhouse] Fix video id extraction (#13540, #13541) +* [xfileshare] Extend format regular expression (#13536) +* [ted] Fix extraction (#13535) ++ [tastytrade] Add support for tastytrade.com (#13521) +* [dplayit] Relax video id regular expression (#13524) ++ [generic] Extract more generic metadata (#13527) ++ [bbccouk] Capture and output error message (#13501, #13518) +* [cbsnews] Relax video info regular expression (#13284, #13503) ++ [facebook] Add support for plugin video embeds and multiple embeds (#13493) +* [soundcloud] Switch to https for API requests (#13502) +* [pandatv] Switch to https for API and download URLs ++ [pandatv] Add support for https URLs (#13491) + [niconico] Support sp subdomain (#13494) -- cgit v1.1 From b6c9fe416243373bcb59eb8aa5ef0baca8f3c97c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Jul 2017 20:17:10 +0700 Subject: release 2017.07.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 82bbbda..9746035 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.25** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.06.25 +[debug] youtube-dl version 2017.07.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c17acf4..4c20113 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.07.02 Core * [extractor/common] Improve _json_ld diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 010ff76..db2e2ba 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -768,6 +768,7 @@ - **Tagesschau** - **tagesschau:player** - **Tass** + - **TastyTrade** - **TBS** - **TDSLifeway** - **teachertube**: teachertube.com videos diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b6d3788..0db974f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.06.25' +__version__ = '2017.07.02' -- cgit v1.1 From 609ff8ca19f1c4c168a81121074b91cc0f0d4c47 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 5 Jul 2017 23:23:35 +0800 Subject: [utils] Support attributes with no values in get_elements_by_attribute() --- test/test_utils.py | 6 ++++++ youtube_dl/utils.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 2b93b36..30738e7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1228,6 +1228,12 @@ part 3 self.assertEqual(get_element_by_attribute('class', 'foo', html), None) self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None) + html = ''' + + ''' + + self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo') + def test_get_elements_by_class(self): html = ''' nicealso nice diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 39860e9..fdf5e29 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -365,9 +365,9 @@ def get_elements_by_attribute(attribute, value, html, escape_value=True): retlist = [] for m in re.finditer(r'''(?xs) <([a-zA-Z0-9:._-]+) - (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*? + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? \s+%s=['"]?%s['"]? - (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*? + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? \s*> (?P.*?) -- cgit v1.1 From babbc04d459a6e8b6a2e65c522107cce1ca2cbbb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 13 Jun 2017 16:21:26 +0800 Subject: [xuite] Move to the new HTML5 API and reduce # of requests --- youtube_dl/extractor/xuite.py | 88 +++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 53 deletions(-) diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index e081820..0276c0d 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -1,14 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 - from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, + float_or_none, + get_element_by_attribute, parse_iso8601, - parse_duration, + remove_end, ) @@ -24,6 +23,7 @@ class XuiteIE(InfoExtractor): 'id': '3860914', 'ext': 'mp3', 'title': '孤單南半球-歐德陽', + 'description': '孤單南半球-歐德陽', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 247.246, 'timestamp': 1314932940, @@ -44,7 +44,7 @@ class XuiteIE(InfoExtractor): 'duration': 596.458, 'timestamp': 1454242500, 'upload_date': '20160131', - 'uploader': 'yan12125', + 'uploader': '屁姥', 'uploader_id': '12158353', 'categories': ['個人短片'], 'description': 'http://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4', @@ -72,10 +72,10 @@ class XuiteIE(InfoExtractor): # from http://forgetfulbc.blogspot.com/2016/06/date.html 'url': 'http://vlog.xuite.net/embed/cE1xbENoLTI3NDQ3MzM2LmZsdg==?ar=0&as=0', 'info_dict': { - 'id': 'cE1xbENoLTI3NDQ3MzM2LmZsdg==', + 'id': '27447336', 'ext': 'mp4', 'title': '男女平權只是口號?專家解釋約會時男生是否該幫女生付錢 (中字)', - 'description': 'md5:f0abdcb69df300f522a5442ef3146f2a', + 'description': 'md5:1223810fa123b179083a3aed53574706', 'timestamp': 1466160960, 'upload_date': '20160617', 'uploader': 'B.C. & Lowy', @@ -86,29 +86,9 @@ class XuiteIE(InfoExtractor): 'only_matching': True, }] - @staticmethod - def base64_decode_utf8(data): - return base64.b64decode(data.encode('utf-8')).decode('utf-8') - - @staticmethod - def base64_encode_utf8(data): - return base64.b64encode(data.encode('utf-8')).decode('utf-8') - - def _extract_flv_config(self, encoded_media_id): - flv_config = self._download_xml( - 'http://vlog.xuite.net/flash/player?media=%s' % encoded_media_id, - 'flv config') - prop_dict = {} - for prop in flv_config.findall('./property'): - prop_id = self.base64_decode_utf8(prop.attrib['id']) - # CDATA may be empty in flv config - if not prop.text: - continue - encoded_content = self.base64_decode_utf8(prop.text) - prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content) - return prop_dict - def _real_extract(self, url): + # /play/ URLs provide embedded video URL and more metadata + url = url.replace('/embed/', '/play/') video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -121,51 +101,53 @@ class XuiteIE(InfoExtractor): '%s returned error: %s' % (self.IE_NAME, error_msg), expected=True) - encoded_media_id = self._search_regex( - r'attributes\.name\s*=\s*"([^"]+)"', webpage, - 'encoded media id', default=None) - if encoded_media_id is None: - video_id = self._html_search_regex( - r'data-mediaid="(\d+)"', webpage, 'media id') - encoded_media_id = self.base64_encode_utf8(video_id) - flv_config = self._extract_flv_config(encoded_media_id) + media_info = self._parse_json(self._search_regex( + r'var\s+mediaInfo\s*=\s*({.*});', webpage, 'media info'), video_id) - FORMATS = { - 'audio': 'mp3', - 'video': 'mp4', - } + video_id = media_info['MEDIA_ID'] formats = [] - for format_tag in ('src', 'hq_src'): - video_url = flv_config.get(format_tag) + for key in ('html5Url', 'html5HQUrl'): + video_url = media_info.get(key) if not video_url: continue format_id = self._search_regex( - r'\bq=(.+?)\b', video_url, 'format id', default=format_tag) + r'\bq=(.+?)\b', video_url, 'format id', default=None) formats.append({ 'url': video_url, - 'ext': FORMATS.get(flv_config['type'], 'mp4'), + 'ext': 'mp4' if format_id.isnumeric() else format_id, 'format_id': format_id, 'height': int(format_id) if format_id.isnumeric() else None, }) self._sort_formats(formats) - timestamp = flv_config.get('publish_datetime') + timestamp = media_info.get('PUBLISH_DATETIME') if timestamp: timestamp = parse_iso8601(timestamp + ' +0800', ' ') - category = flv_config.get('category') + category = media_info.get('catName') categories = [category] if category else [] + uploader = media_info.get('NICKNAME') + uploader_url = None + + author_div = get_element_by_attribute('itemprop', 'author', webpage) + if author_div: + uploader = uploader or self._html_search_meta('name', author_div) + uploader_url = self._html_search_regex( + r']+itemprop="url"[^>]+href="([^"]+)"', author_div, + 'uploader URL', fatal=False) + return { 'id': video_id, - 'title': flv_config['title'], - 'description': flv_config.get('description'), - 'thumbnail': flv_config.get('thumb'), + 'title': media_info['TITLE'], + 'description': remove_end(media_info.get('metaDesc'), ' (Xuite 影音)'), + 'thumbnail': media_info.get('ogImageUrl'), 'timestamp': timestamp, - 'uploader': flv_config.get('author_name'), - 'uploader_id': flv_config.get('author_id'), - 'duration': parse_duration(flv_config.get('duration')), + 'uploader': uploader, + 'uploader_id': media_info.get('MEMBER_ID'), + 'uploader_url': uploader_url, + 'duration': float_or_none(media_info.get('MEDIA_DURATION'), 1000000), 'categories': categories, 'formats': formats, } -- cgit v1.1 From 0a2e1b2e30045de7834aca880d35253c5e8a3812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 5 Jul 2017 22:13:47 +0700 Subject: [vier] Adapt extraction to redesign (#13575) --- youtube_dl/extractor/vier.py | 47 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 3e67eb8..dbd5ba9 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -15,7 +15,21 @@ from ..utils import ( class VierIE(InfoExtractor): IE_NAME = 'vier' IE_DESC = 'vier.be and vijf.be' - _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/(?:[^/]+/videos/(?P[^/]+)(?:/(?P\d+))?|video/v3/embed/(?P\d+))' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?(?Pvier|vijf)\.be/ + (?: + (?: + [^/]+/videos| + video(?:/[^/]+)* + )/ + (?P[^/]+)(?:/(?P\d+))?| + (?: + video/v3/embed| + embed/video/public + )/(?P\d+) + ) + ''' _NETRC_MACHINE = 'vier' _TESTS = [{ 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', @@ -83,6 +97,15 @@ class VierIE(InfoExtractor): }, { 'url': 'http://www.vier.be/video/v3/embed/16129', 'only_matching': True, + }, { + 'url': 'https://www.vijf.be/embed/video/public/4093', + 'only_matching': True, + }, { + 'url': 'https://www.vier.be/video/blockbusters/in-juli-en-augustus-summer-classics', + 'only_matching': True, + }, { + 'url': 'https://www.vier.be/video/achter-de-rug/2017/achter-de-rug-seizoen-1-aflevering-6', + 'only_matching': True, }] def _real_initialize(self): @@ -133,14 +156,20 @@ class VierIE(InfoExtractor): video_id = self._search_regex( [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], webpage, 'video id', default=video_id or display_id) - application = self._search_regex( - [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], - webpage, 'application', default=site + '_vod') - filename = self._search_regex( - [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], - webpage, 'filename') - - playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename) + + playlist_url = self._search_regex( + r'data-file=(["\'])(?P(?:https?:)?//[^/]+/.+?\.m3u8.*?)\1', + webpage, 'm3u8 url', default=None, group='url') + + if not playlist_url: + application = self._search_regex( + [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], + webpage, 'application', default=site + '_vod') + filename = self._search_regex( + [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], + webpage, 'filename') + playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename) + formats = self._extract_wowza_formats( playlist_url, display_id, skip_protocols=['dash']) self._sort_formats(formats) -- cgit v1.1 From 655470825231eaa03b4b82cbc1314d551e72a01e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 5 Jul 2017 23:20:50 +0700 Subject: [kaltura] Fix typo in subtitles extraction (closes #13569) --- youtube_dl/extractor/kaltura.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 41c1f3d..138d484 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -324,7 +324,7 @@ class KalturaIE(InfoExtractor): if captions: for caption in captions.get('objects', []): # Continue if caption is not ready - if f.get('status') != 2: + if caption.get('status') != 2: continue if not caption.get('id'): continue -- cgit v1.1 From dee2ff1d818bebd74990b7cebbc698f22163a43a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Jul 2017 00:25:37 +0700 Subject: [test_utils] Fix tests under Windows --- test/test_utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 30738e7..7803e5b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -98,6 +98,7 @@ from youtube_dl.compat import ( compat_chr, compat_etree_fromstring, compat_getenv, + compat_os_name, compat_setenv, compat_urlparse, compat_parse_qs, @@ -448,7 +449,9 @@ class TestUtil(unittest.TestCase): def test_shell_quote(self): args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] - self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""") + self.assertEqual( + shell_quote(args), + """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') def test_str_to_int(self): self.assertEqual(str_to_int('123,456'), 123456) @@ -932,7 +935,7 @@ class TestUtil(unittest.TestCase): def test_args_to_str(self): self.assertEqual( args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), - 'foo ba/r -baz \'2 be\' \'\'' + 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""' ) def test_parse_filesize(self): -- cgit v1.1 From 60d4401c5e14d94574e2418d229dcc0c067d3559 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Thu, 6 Jul 2017 10:55:59 -0500 Subject: [espn] Extend _VALID_URL (fixes #13244) --- youtube_dl/extractor/espn.py | 48 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 8795e0d..7a74360 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -10,7 +10,25 @@ from ..utils import ( class ESPNIE(InfoExtractor): - _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P\d+)' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:(?:\w+\.)+)?espn\.go| + (?:www\.)?espn + )\.com/ + (?: + (?: + video/clip| + watch/player + ) + (?: + \?.*?\bid=| + /_/id/ + ) + ) + (?P\d+) + ''' + _TESTS = [{ 'url': 'http://espn.go.com/video/clip?id=10365079', 'info_dict': { @@ -25,21 +43,35 @@ class ESPNIE(InfoExtractor): 'skip_download': True, }, }, { - # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season - 'url': 'http://espn.go.com/video/clip?id=2743663', + 'url': 'https://broadband.espn.go.com/video/clip?id=18910086', 'info_dict': { - 'id': '2743663', + 'id': '18910086', 'ext': 'mp4', - 'title': 'Must-See Moments: Best of the MLS season', - 'description': 'md5:4c2d7232beaea572632bec41004f0aeb', - 'timestamp': 1449446454, - 'upload_date': '20151207', + 'title': 'Kyrie spins around defender for two', + 'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b', + 'timestamp': 1489539155, + 'upload_date': '20170315', }, 'params': { 'skip_download': True, }, 'expected_warnings': ['Unable to download f4m manifest'], }, { + 'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672', + 'only_matching': True, + }, { + 'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774', + 'only_matching': True, + }, { + 'url': 'http://www.espn.com/watch/player?id=19141491', + 'only_matching': True, + }, { + 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875', + 'only_matching': True, + }, { + 'url': 'http://www.espn.com/watch/player/_/id/19141491', + 'only_matching': True, + }, { 'url': 'http://www.espn.com/video/clip?id=10365079', 'only_matching': True, }, { -- cgit v1.1 From ddeff4be3fb491a82d775fcd4140fc49a443864c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Jul 2017 23:03:51 +0700 Subject: Credit @gfabiano for #13382, #13385, #13415 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index e2bdebe..2f828ad 100644 --- a/AUTHORS +++ b/AUTHORS @@ -220,3 +220,4 @@ gritstub Adam Voss Mike Fährmann Jan Kundrát +Giuseppe Fabiano -- cgit v1.1 From ab328411d53eee079a66f373bc83cf09d207dae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Jul 2017 23:05:27 +0700 Subject: Credit @orng for ruv (#13396) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 2f828ad..ec58744 100644 --- a/AUTHORS +++ b/AUTHORS @@ -221,3 +221,4 @@ Adam Voss Mike Fährmann Jan Kundrát Giuseppe Fabiano +Örn Guðjónsson -- cgit v1.1 From 8a04ade86bf3a7f0a321232850a2d03a52c47819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Jul 2017 23:06:53 +0700 Subject: Credit @parmjitv for #13322, #13503, #13541, #13549 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index ec58744..053159c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -222,3 +222,4 @@ Mike Fährmann Jan Kundrát Giuseppe Fabiano Örn Guðjónsson +Parmjit Virk -- cgit v1.1 From 00e5c363159f7771dc3a2e33b1a38af520782372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Jul 2017 22:22:29 +0700 Subject: [xhamster] Add support for new URL schema (closes #13593) --- youtube_dl/extractor/xhamster.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 6987b2e..de9ec9b 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -14,7 +14,15 @@ from ..utils import ( class XHamsterIE(InfoExtractor): - _VALID_URL = r'(?Phttps?)://(?:.+?\.)?xhamster\.com/movies/(?P[0-9]+)/(?P.*?)\.html(?:\?.*)?' + _VALID_URL = r'''(?x) + https?:// + (?:.+?\.)?xhamster\.com/ + (?: + movies/(?P\d+)/(?P[^/]*)\.html| + videos/(?P[^/]*)-(?P\d+) + ) + ''' + _TESTS = [{ 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', 'md5': '8281348b8d3c53d39fffb377d24eac4e', @@ -66,6 +74,10 @@ class XHamsterIE(InfoExtractor): # This video is visible for marcoalfa123456's friends only 'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html', 'only_matching': True, + }, { + # new URL schema + 'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821', + 'only_matching': True, }] def _real_extract(self, url): @@ -81,11 +93,10 @@ class XHamsterIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - seo = mobj.group('seo') - proto = mobj.group('proto') - mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo) - webpage = self._download_webpage(mrss_url, video_id) + video_id = mobj.group('id') or mobj.group('id_2') + display_id = mobj.group('display_id') or mobj.group('display_id_2') + + webpage = self._download_webpage(url, video_id) error = self._html_search_regex( r']+id=["\']videoClosed["\'][^>]*>(.+?)', -- cgit v1.1 From d852c6bc5995444096a6c07b66ea075269412cf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Jul 2017 22:49:11 +0700 Subject: [xhamster] Extract all formats and fix duration extraction (#13593) --- youtube_dl/extractor/xhamster.py | 75 +++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index de9ec9b..c42b59e 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( clean_html, dict_get, @@ -28,6 +29,7 @@ class XHamsterIE(InfoExtractor): 'md5': '8281348b8d3c53d39fffb377d24eac4e', 'info_dict': { 'id': '1509445', + 'display_id': 'femaleagent_shy_beauty_takes_the_bait', 'ext': 'mp4', 'title': 'FemaleAgent Shy beauty takes the bait', 'upload_date': '20121014', @@ -40,6 +42,7 @@ class XHamsterIE(InfoExtractor): 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', 'info_dict': { 'id': '2221348', + 'display_id': 'britney_spears_sexy_booty', 'ext': 'mp4', 'title': 'Britney Spears Sexy Booty', 'upload_date': '20130914', @@ -81,18 +84,7 @@ class XHamsterIE(InfoExtractor): }] def _real_extract(self, url): - def extract_video_url(webpage, name): - return self._search_regex( - [r'''file\s*:\s*(?P["'])(?P.+?)(?P=q)''', - r'''["'])(?P.+?)(?P=q)\s+class=["']mp4Thumb''', - r''']+file=(?P["'])(?P.+?)(?P=q)[^>]*>'''], - webpage, name, group='mp4') - - def is_hd(webpage): - return '
]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)'], webpage, 'title') + formats = [] + format_urls = set() + + sources = self._parse_json( + self._search_regex( + r'sources\s*:\s*({.+?})\s*,?\s*\n', webpage, 'sources', + default='{}'), + video_id, fatal=False) + for format_id, format_url in sources.items(): + if not isinstance(format_url, compat_str): + continue + if format_url in format_urls: + continue + format_urls.add(format_url) + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'height': int_or_none(self._search_regex( + r'^(\d+)[pP]', format_id, 'height', default=None)) + }) + + video_url = self._search_regex( + [r'''file\s*:\s*(?P["'])(?P.+?)(?P=q)''', + r'''["'])(?P.+?)(?P=q)\s+class=["']mp4Thumb''', + r''']+file=(?P["'])(?P.+?)(?P=q)[^>]*>'''], + webpage, 'video url', group='mp4', default=None) + if video_url and video_url not in format_urls: + formats.append({ + 'url': video_url, + }) + + self._sort_formats(formats) + # Only a few videos have an description mobj = re.search(r'Description: ([^<]+)', webpage) description = mobj.group(1) if mobj else None @@ -128,7 +153,8 @@ class XHamsterIE(InfoExtractor): webpage, 'thumbnail', fatal=False, group='thumbnail') duration = parse_duration(self._search_regex( - r'Runtime:\s*\s*([\d:]+)', webpage, + [r'<[^<]+\bitemprop=["\']duration["\'][^<]+\bcontent=["\'](.+?)["\']', + r'Runtime:\s*\s*([\d:]+)'], webpage, 'duration', fatal=False)) view_count = int_or_none(self._search_regex( @@ -143,30 +169,6 @@ class XHamsterIE(InfoExtractor): age_limit = self._rta_search(webpage) - hd = is_hd(webpage) - - format_id = 'hd' if hd else 'sd' - - video_url = extract_video_url(webpage, format_id) - formats = [{ - 'url': video_url, - 'format_id': 'hd' if hd else 'sd', - 'preference': 1, - }] - - if not hd: - mrss_url = self._search_regex(r'Categories:.+?)', webpage, 'categories', default=None) @@ -175,6 +177,7 @@ class XHamsterIE(InfoExtractor): return { 'id': video_id, + 'display_id': display_id, 'title': title, 'description': description, 'upload_date': upload_date, -- cgit v1.1 From eadd313321f4fbacac2ec10f2f382197e401d7f7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 8 Jul 2017 15:48:05 +0800 Subject: [yam] Remove extractor mymedia.yam.com is dead. An wikipedia user also pointed out that Yam's blog service is no longer available. [1] [1] https://zh.wikipedia.org/zh-tw/%E5%A4%A9%E7%A9%BA%E9%83%A8%E8%90%BD --- ChangeLog | 6 ++ youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/yam.py | 123 ------------------------------------- 3 files changed, 6 insertions(+), 124 deletions(-) delete mode 100644 youtube_dl/extractor/yam.py diff --git a/ChangeLog b/ChangeLog index 4c20113..5d07c12 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +- [yam] Remove extractor + + version 2017.07.02 Core diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bbdb4a2..b83c3ab 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1280,7 +1280,6 @@ from .yahoo import ( YahooIE, YahooSearchIE, ) -from .yam import YamIE from .yandexmusic import ( YandexMusicTrackIE, YandexMusicAlbumIE, diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py deleted file mode 100644 index ef55355..0000000 --- a/youtube_dl/extractor/yam.py +++ /dev/null @@ -1,123 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - float_or_none, - month_by_abbreviation, - ExtractorError, - get_element_by_attribute, -) - - -class YamIE(InfoExtractor): - IE_DESC = '蕃薯藤yam天空部落' - _VALID_URL = r'https?://mymedia\.yam\.com/m/(?P\d+)' - - _TESTS = [{ - # An audio hosted on Yam - 'url': 'http://mymedia.yam.com/m/2283921', - 'md5': 'c011b8e262a52d5473d9c2e3c9963b9c', - 'info_dict': { - 'id': '2283921', - 'ext': 'mp3', - 'title': '發現 - 趙薇 京華煙雲主題曲', - 'description': '發現 - 趙薇 京華煙雲主題曲', - 'uploader_id': 'princekt', - 'upload_date': '20080807', - 'duration': 313.0, - } - }, { - # An external video hosted on YouTube - 'url': 'http://mymedia.yam.com/m/3599430', - 'md5': '03127cf10d8f35d120a9e8e52e3b17c6', - 'info_dict': { - 'id': 'CNpEoQlrIgA', - 'ext': 'mp4', - 'upload_date': '20150306', - 'uploader': '新莊社大瑜伽社', - 'description': 'md5:11e2e405311633ace874f2e6226c8b17', - 'uploader_id': '2323agoy', - 'title': '20090412陽明山二子坪-1', - }, - 'skip': 'Video does not exist', - }, { - 'url': 'http://mymedia.yam.com/m/3598173', - 'info_dict': { - 'id': '3598173', - 'ext': 'mp4', - }, - 'skip': 'cause Yam system error', - }, { - 'url': 'http://mymedia.yam.com/m/3599437', - 'info_dict': { - 'id': '3599437', - 'ext': 'mp4', - }, - 'skip': 'invalid YouTube URL', - }, { - 'url': 'http://mymedia.yam.com/m/2373534', - 'md5': '7ff74b91b7a817269d83796f8c5890b1', - 'info_dict': { - 'id': '2373534', - 'ext': 'mp3', - 'title': '林俊傑&蔡卓妍-小酒窩', - 'description': 'md5:904003395a0fcce6cfb25028ff468420', - 'upload_date': '20080928', - 'uploader_id': 'onliner2', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - - # Check for errors - system_msg = self._html_search_regex( - r'系統訊息(?:
|\n|\r)*([^<>]+)
', page, 'system message', - default=None) - if system_msg: - raise ExtractorError(system_msg, expected=True) - - # Is it hosted externally on YouTube? - youtube_url = self._html_search_regex( - r']+class="heading"[^>]*>\s*(.+)\s*', page, 'title') - - api_page = self._download_webpage( - 'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id, - note='Downloading API page') - api_result_obj = compat_urlparse.parse_qs(api_page) - - info_table = get_element_by_attribute('class', 'info', page) - uploader_id = self._html_search_regex( - r':[\n ]+(?P[A-Z][a-z]{2})\s+' + - r'(?P\d{1,2}), (?P\d{4})', page) - if mobj: - upload_date = '%s%02d%02d' % ( - mobj.group('year'), - month_by_abbreviation(mobj.group('mon')), - int(mobj.group('day'))) - else: - upload_date = None - duration = float_or_none(api_result_obj['totaltime'][0], scale=1000) - - return { - 'id': video_id, - 'url': api_result_obj['mp3file'][0], - 'title': title, - 'description': self._html_search_meta('description', page), - 'duration': duration, - 'uploader_id': uploader_id, - 'upload_date': upload_date, - } -- cgit v1.1 From a49804816c0246d81b9d34d9f89f99fae06da887 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Jul 2017 18:12:15 +0700 Subject: [dailymotion] Add support for new layout (close #13580) --- youtube_dl/extractor/dailymotion.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index f8db76c..74e9913 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -147,7 +147,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): view_count_str = self._search_regex( (r']+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"', r'video_views_count[^>]+>\s+([\s\d\,.]+)'), - webpage, 'view count', fatal=False) + webpage, 'view count', default=None) if view_count_str: view_count_str = re.sub(r'\s', '', view_count_str) view_count = str_to_int(view_count_str) @@ -159,7 +159,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826 r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', r'buildPlayer\(({.+?})\);', - r'var\s+config\s*=\s*({.+?});'], + r'var\s+config\s*=\s*({.+?});', + # New layout regex (see https://github.com/rg3/youtube-dl/issues/13580) + r'__PLAYER_CONFIG__\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: player = self._parse_json(player_v5, video_id) -- cgit v1.1 From 8b347a389eaa6d545ada901c2e236a5eb2272960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Jul 2017 00:26:13 +0700 Subject: [googledrive] Fix height extraction (closes #13603) --- youtube_dl/extractor/googledrive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 9705cfa..c40da85 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -92,7 +92,7 @@ class GoogleDriveIE(InfoExtractor): if resolution: f.update({ 'width': resolution[0], - 'height': resolution[0], + 'height': resolution[1], }) formats.append(f) self._sort_formats(formats) -- cgit v1.1 From 7a5773090789bec38a3f58dfb09039155919a540 Mon Sep 17 00:00:00 2001 From: rrooij Date: Sun, 9 Jul 2017 09:21:40 +0200 Subject: [npo:live] Fix live stream id extraction (closes #13568) --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 5f8b6de..516b1e9 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -341,7 +341,7 @@ class NPOLiveIE(NPOBaseIE): webpage = self._download_webpage(url, display_id) live_id = self._search_regex( - r'data-prid="([^"]+)"', webpage, 'live id') + [r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id') return { '_type': 'url_transparent', -- cgit v1.1 From 15237fcd51dca192103f08a910660616e3b241b8 Mon Sep 17 00:00:00 2001 From: mlindner Date: Sun, 9 Jul 2017 00:54:52 -0700 Subject: [veoh] Extend _VALID_URL --- youtube_dl/extractor/veoh.py | 73 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index 0f5d687..b20dddc 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -12,47 +12,46 @@ from ..utils import ( class VeohIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P(?:v|yapi-)[\da-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P(?:v|e|yapi-)[\da-zA-Z]+)' - _TESTS = [ - { - 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3', - 'md5': '620e68e6a3cff80086df3348426c9ca3', - 'info_dict': { - 'id': '56314296', - 'ext': 'mp4', - 'title': 'Straight Backs Are Stronger', - 'uploader': 'LUMOback', - 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ', - }, + _TESTS = [{ + 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3', + 'md5': '620e68e6a3cff80086df3348426c9ca3', + 'info_dict': { + 'id': '56314296', + 'ext': 'mp4', + 'title': 'Straight Backs Are Stronger', + 'uploader': 'LUMOback', + 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ', }, - { - 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage', - 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa', - 'info_dict': { - 'id': '27701988', - 'ext': 'mp4', - 'title': 'Chile workers cover up to avoid skin damage', - 'description': 'md5:2bd151625a60a32822873efc246ba20d', - 'uploader': 'afp-news', - 'duration': 123, - }, - 'skip': 'This video has been deleted.', + }, { + 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage', + 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa', + 'info_dict': { + 'id': '27701988', + 'ext': 'mp4', + 'title': 'Chile workers cover up to avoid skin damage', + 'description': 'md5:2bd151625a60a32822873efc246ba20d', + 'uploader': 'afp-news', + 'duration': 123, }, - { - 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX', - 'md5': '4fde7b9e33577bab2f2f8f260e30e979', - 'note': 'Embedded ooyala video', - 'info_dict': { - 'id': '69525809', - 'ext': 'mp4', - 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery', - 'description': 'md5:f5a11c51f8fb51d2315bca0937526891', - 'uploader': 'newsy-videos', - }, - 'skip': 'This video has been deleted.', + 'skip': 'This video has been deleted.', + }, { + 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX', + 'md5': '4fde7b9e33577bab2f2f8f260e30e979', + 'note': 'Embedded ooyala video', + 'info_dict': { + 'id': '69525809', + 'ext': 'mp4', + 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery', + 'description': 'md5:f5a11c51f8fb51d2315bca0937526891', + 'uploader': 'newsy-videos', }, - ] + 'skip': 'This video has been deleted.', + }, { + 'url': 'http://www.veoh.com/watch/e152215AJxZktGS', + 'only_matching': True, + }] def _extract_formats(self, source): formats = [] -- cgit v1.1 From 5af2fd7fa02734c2a23f917fb60f1c14da149d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Jul 2017 15:55:04 +0700 Subject: [eagleplatform] Add support for another embed pattern (#13557) --- youtube_dl/extractor/eagleplatform.py | 36 +++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 76d39ad..5e1de04 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -60,16 +60,40 @@ class EaglePlatformIE(InfoExtractor): webpage) if mobj is not None: return mobj.group('url') - # Basic usage embedding (see http://dultonmedia.github.io/eplayer/) + PLAYER_JS_RE = r''' + ]+ + src=(?P["\'])(?:https?:)?//(?P(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs) + .+? + ''' + # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) mobj = re.search( r'''(?xs) - ]+ - src=(?P["\'])(?:https?:)?//(?P.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1) - .+? + %s ]+ - class=(?P["\'])eagleplayer(?P=q2)[^>]+ + class=(?P["\'])eagleplayer(?P=qclass)[^>]+ data-id=["\'](?P\d+) - ''', webpage) + ''' % PLAYER_JS_RE, webpage) + if mobj is not None: + return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() + # Generalization of "Javascript code usage", "Combined usage" and + # "Usage without attaching to DOM" embeddings (see + # http://dultonmedia.github.io/eplayer/) + mobj = re.search( + r'''(?xs) + %s + + ''' % PLAYER_JS_RE, webpage) if mobj is not None: return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() -- cgit v1.1 From 665e9452461abaff7127653265c78bd585acea6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Jul 2017 15:57:33 +0700 Subject: [eagleplatform] Add support for referrer protected videos (closes #13557) --- youtube_dl/extractor/eagleplatform.py | 25 ++++++++++++++++++++++--- youtube_dl/extractor/generic.py | 10 +++++----- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 5e1de04..34891a3 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -11,6 +11,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + unsmuggle_url, ) @@ -50,6 +51,10 @@ class EaglePlatformIE(InfoExtractor): 'view_count': int, }, 'skip': 'Georestricted', + }, { + # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) + 'url': 'tvrainru.media.eagleplatform.com:582306', + 'only_matching': True, }] @staticmethod @@ -103,9 +108,10 @@ class EaglePlatformIE(InfoExtractor): if status != 200: raise ExtractorError(' '.join(response['errors']), expected=True) - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs): + def _download_json(self, url_or_request, video_id, *args, **kwargs): try: - response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) + response = super(EaglePlatformIE, self)._download_json( + url_or_request, video_id, *args, **kwargs) except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError): response = self._parse_json(ee.cause.read().decode('utf-8'), video_id) @@ -117,11 +123,24 @@ class EaglePlatformIE(InfoExtractor): return self._download_json(url_or_request, video_id, note)['data'][0] def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + mobj = re.match(self._VALID_URL, url) host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') + headers = {} + query = { + 'id': video_id, + } + + referrer = smuggled_data.get('referrer') + if referrer: + headers['Referer'] = referrer + query['referrer'] = referrer + player_data = self._download_json( - 'http://%s/api/player_data?id=%s' % (host, video_id), video_id) + 'http://%s/api/player_data' % host, video_id, + headers=headers, query=query) media = player_data['data']['playlist']['viewports'][0]['medialist'][0] diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f9bff43..7232f39 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1185,7 +1185,7 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, - # Eagle.Platform embed (generic URL) + # EaglePlatform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used @@ -1200,7 +1200,7 @@ class GenericIE(InfoExtractor): 'age_limit': 0, }, }, - # ClipYou (Eagle.Platform) embed (custom URL) + # ClipYou (EaglePlatform) embed (custom URL) { 'url': 'http://muz-tv.ru/play/7129/', # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used @@ -2443,12 +2443,12 @@ class GenericIE(InfoExtractor): if kaltura_url: return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) - # Look for Eagle.Platform embeds + # Look for EaglePlatform embeds eagleplatform_url = EaglePlatformIE._extract_url(webpage) if eagleplatform_url: - return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key()) + return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key()) - # Look for ClipYou (uses Eagle.Platform) embeds + # Look for ClipYou (uses EaglePlatform) embeds mobj = re.search( r']+src="https?://(?Pmedia\.clipyou\.ru)/index/player\?.*\brecord_id=(?P\d+).*"', webpage) if mobj is not None: -- cgit v1.1 From 250b042c7e71a6e8bbff534aa41c2b92dae1acf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Jul 2017 16:02:38 +0700 Subject: [generic] Add tests for #13557 --- youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7232f39..95c3869 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1199,6 +1199,24 @@ class GenericIE(InfoExtractor): 'view_count': int, 'age_limit': 0, }, + 'params': { + 'skip_download': True, + }, + }, + # referrer protected EaglePlatform embed + { + 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', + 'info_dict': { + 'id': '582306', + 'ext': 'mp4', + 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 3382, + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, }, # ClipYou (EaglePlatform) embed (custom URL) { @@ -1212,6 +1230,9 @@ class GenericIE(InfoExtractor): 'duration': 216, 'view_count': int, }, + 'params': { + 'skip_download': True, + }, }, # Pladform embed { -- cgit v1.1 From 4328ddf82b812420ffc120b4150251f751bff08c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Jul 2017 16:29:52 +0700 Subject: [extractor/common] Add support for AMP tags in _parse_html5_media_entries --- youtube_dl/extractor/common.py | 7 +++++-- youtube_dl/extractor/generic.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index afeb4c5..daa1088 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2132,15 +2132,18 @@ class InfoExtractor(object): return is_plain_url, formats entries = [] + # amp-video and amp-audio are very similar to their HTML5 counterparts + # so we wll include them right here (see + # https://www.ampproject.org/docs/reference/components/amp-video) media_tags = [(media_tag, media_type, '') for media_tag, media_type - in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)] + in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)] media_tags.extend(re.findall( # We only allow video|audio followed by a whitespace or '>'. # Allowing more characters may end up in significant slow down (see # https://github.com/rg3/youtube-dl/issues/11979, example URL: # http://www.porntrex.com/maps/videositemap.xml). - r'(?s)(<(?Pvideo|audio)(?:\s+[^>]*)?>)(.*?)', webpage)) + r'(?s)(<(?P(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)', webpage)) for media_tag, media_type, media_content in media_tags: media_info = { 'formats': [], diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 95c3869..919f4f9 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1770,6 +1770,16 @@ class GenericIE(InfoExtractor): }, 'add_ie': [MediasetIE.ie_key()], }, + { + # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video) + 'url': 'https://tvrain.ru/amp/418921/', + 'md5': 'cc00413936695987e8de148b67d14f1d', + 'info_dict': { + 'id': '418921', + 'ext': 'mp4', + 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject -- cgit v1.1 From d2b9f362fabad8f9490825456d8ed679d7159271 Mon Sep 17 00:00:00 2001 From: Christopher Smith Date: Thu, 29 Jun 2017 13:10:45 -0600 Subject: [cjsw] Add extractor --- youtube_dl/extractor/cjsw.py | 41 ++++++++++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 42 insertions(+) create mode 100644 youtube_dl/extractor/cjsw.py diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py new file mode 100644 index 0000000..087cac9 --- /dev/null +++ b/youtube_dl/extractor/cjsw.py @@ -0,0 +1,41 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class CJSWIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/\S+/(?P[0-9]+)' + IE_NAME = 'cjsw' + _TEST = { + 'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620', + 'md5': 'cee14d40f1e9433632c56e3d14977120', + 'info_dict': { + 'id': '20170620', + 'ext': 'mp3', + 'title': 'Freshly Squeezed', + 'description': 'Sled Island artists featured // Live session with Phi Pho, followed by a live session with Sinzere & The Late Nights! // Stay Fresh Y\'all!!', + } + } + + def _real_extract(self, url): + episode_id = self._match_id(url) + + webpage = self._download_webpage(url, episode_id) + + title = self._search_regex( + r']+data-showname=(["\'])(?P(?!\1).+?)\1[^>]*>', webpage, 'title', group='title') + description = self._html_search_regex( + r'<p>(?P<description>.+?)</p>', webpage, 'description', fatal=False) + formats = [{ + 'url': self._search_regex( + r'<button[^>]+data-audio-src=(["\'])(?P<audio_url>(?!\1).+?)\1[^>]*>', webpage, 'audio_url', group='audio_url'), + 'ext': 'mp3', + 'vcodec': 'none', + }] + return { + 'id': episode_id, + 'title': title, + 'description': description, + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b83c3ab..4524fa6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -185,6 +185,7 @@ from .chirbit import ( ChirbitProfileIE, ) from .cinchcast import CinchcastIE +from .cjsw import CJSWIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .cliprs import ClipRsIE -- cgit v1.1 From c319d1c4833f89df818fe39f4c99cdc5c9a8bf01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 17:00:45 +0700 Subject: [csjw] Fix issues and improve extraction (closes #13525) --- youtube_dl/extractor/cjsw.py | 57 +++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py index 087cac9..aab6ea5 100644 --- a/youtube_dl/extractor/cjsw.py +++ b/youtube_dl/extractor/cjsw.py @@ -1,41 +1,66 @@ -# coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..utils import ( + determine_ext, + unescapeHTML, +) class CJSWIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/\S+/(?P<id>[0-9]+)' - IE_NAME = 'cjsw' + _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)' _TEST = { 'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620', 'md5': 'cee14d40f1e9433632c56e3d14977120', 'info_dict': { - 'id': '20170620', + 'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41', 'ext': 'mp3', - 'title': 'Freshly Squeezed', - 'description': 'Sled Island artists featured // Live session with Phi Pho, followed by a live session with Sinzere & The Late Nights! // Stay Fresh Y\'all!!', - } + 'title': 'Freshly Squeezed – Episode June 20, 2017', + 'description': 'md5:c967d63366c3898a80d0c7b0ff337202', + 'series': 'Freshly Squeezed', + 'episode_id': '20170620', + }, } def _real_extract(self, url): - episode_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + program, episode_id = mobj.group('program', 'id') + audio_id = '%s/%s' % (program, episode_id) webpage = self._download_webpage(url, episode_id) - title = self._search_regex( - r'<button[^>]+data-showname=(["\'])(?P<title>(?!\1).+?)\1[^>]*>', webpage, 'title', group='title') - description = self._html_search_regex( - r'<p>(?P<description>.+?)</p>', webpage, 'description', fatal=False) + title = unescapeHTML(self._search_regex( + (r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)', + r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'), + webpage, 'title', group='title')) + + audio_url = self._search_regex( + r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'audio url', group='url') + + audio_id = self._search_regex( + r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3', + audio_url, 'audio id', default=audio_id) + formats = [{ - 'url': self._search_regex( - r'<button[^>]+data-audio-src=(["\'])(?P<audio_url>(?!\1).+?)\1[^>]*>', webpage, 'audio_url', group='audio_url'), - 'ext': 'mp3', + 'url': audio_url, + 'ext': determine_ext(audio_url, 'mp3'), 'vcodec': 'none', }] + + description = self._html_search_regex( + r'<p>(?P<description>.+?)</p>', webpage, 'description', fatal=False) + series = self._search_regex( + r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage, + 'series', default=program, group='name') + return { - 'id': episode_id, + 'id': audio_id, 'title': title, 'description': description, 'formats': formats, + 'series': series, + 'episode_id': episode_id, } -- cgit v1.1 From 0d2f0b0357325823782884327a158aeccf4f9b49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 17:05:11 +0700 Subject: [csjw] Make description optional --- youtube_dl/extractor/cjsw.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py index aab6ea5..dd27158 100644 --- a/youtube_dl/extractor/cjsw.py +++ b/youtube_dl/extractor/cjsw.py @@ -11,7 +11,7 @@ from ..utils import ( class CJSWIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620', 'md5': 'cee14d40f1e9433632c56e3d14977120', 'info_dict': { @@ -22,7 +22,11 @@ class CJSWIE(InfoExtractor): 'series': 'Freshly Squeezed', 'episode_id': '20170620', }, - } + }, { + # no description + 'url': 'http://cjsw.com/program/road-pops/episode/20170707/', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -51,7 +55,8 @@ class CJSWIE(InfoExtractor): }] description = self._html_search_regex( - r'<p>(?P<description>.+?)</p>', webpage, 'description', fatal=False) + r'<p>(?P<description>.+?)</p>', webpage, 'description', + default=None) series = self._search_regex( r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage, 'series', default=program, group='name') -- cgit v1.1 From a02682fd13ce5ba88d2508c90559eaa7f43b65d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 17:09:44 +0700 Subject: Keep in sync with ffmpeg's current malformed AAC bitstream wording (closes #13587) --- youtube_dl/YoutubeDL.py | 4 ++-- youtube_dl/postprocessor/ffmpeg.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b3a6d4d..60ee4b7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1890,7 +1890,7 @@ class YoutubeDL(object): info_dict.get('protocol') == 'm3u8' and self.params.get('hls_prefer_native')): if fixup_policy == 'warn': - self.report_warning('%s: malformated aac bitstream.' % ( + self.report_warning('%s: malformed AAC bitstream detected.' % ( info_dict['id'])) elif fixup_policy == 'detect_or_warn': fixup_pp = FFmpegFixupM3u8PP(self) @@ -1899,7 +1899,7 @@ class YoutubeDL(object): info_dict['__postprocessors'].append(fixup_pp) else: self.report_warning( - '%s: malformated aac bitstream. %s' + '%s: malformed AAC bitstream detected. %s' % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) else: assert fixup_policy in ('ignore', 'never') diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index f021ea8..51256a3 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -542,7 +542,7 @@ class FFmpegFixupM3u8PP(FFmpegPostProcessor): temp_filename = prepend_extension(filename, 'temp') options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] - self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename) + self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename) self.run_ffmpeg(filename, temp_filename, options) os.remove(encodeFilename(filename)) -- cgit v1.1 From ed84454d358f3cbfdc43dab31328b165f9c72c68 Mon Sep 17 00:00:00 2001 From: Santiago Calcagno <santicalcagno@gmail.com> Date: Tue, 13 Jun 2017 12:32:04 -0300 Subject: [egghead:course] Fix extraction --- youtube_dl/extractor/egghead.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index db92146..01fcdb6 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -22,18 +20,18 @@ class EggheadCourseIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - - title = self._html_search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'title') - ul = self._search_regex(r'(?s)<ul class="series-lessons-list">(.*?)</ul>', webpage, 'session list') + api_url = 'https://egghead.io/api/v1/series/' + playlist_id + course = self._download_json(api_url, playlist_id) + title = course.get('title') + description = course.get('description') - found = re.findall(r'(?s)<a class="[^"]*"\s*href="([^"]+)">\s*<li class="item', ul) - entries = [self.url_result(m) for m in found] + lessons = course.get('lessons') + entries = [{'_type': 'url', 'ie_key': 'Wistia', 'url': 'wistia:' + l.get('wistia_id')} for l in lessons] return { '_type': 'playlist', 'id': playlist_id, 'title': title, - 'description': self._og_search_description(webpage), + 'description': description, 'entries': entries, } -- cgit v1.1 From 485cb375766df8f2ef79b7fe2915ead4ef61a01e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 17:28:42 +0700 Subject: [egghead:course] Improve (closes #13370) --- youtube_dl/extractor/egghead.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index 01fcdb6..c86f523 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class EggheadCourseIE(InfoExtractor): IE_DESC = 'egghead.io course' IE_NAME = 'egghead:course' - _VALID_URL = r'https://egghead\.io/courses/(?P<id>[a-zA-Z_0-9-]+)' + _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)' _TEST = { 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript', 'playlist_count': 29, @@ -20,18 +20,16 @@ class EggheadCourseIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - api_url = 'https://egghead.io/api/v1/series/' + playlist_id - course = self._download_json(api_url, playlist_id) - title = course.get('title') - description = course.get('description') - lessons = course.get('lessons') - entries = [{'_type': 'url', 'ie_key': 'Wistia', 'url': 'wistia:' + l.get('wistia_id')} for l in lessons] + course = self._download_json( + 'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id) - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': title, - 'description': description, - 'entries': entries, - } + entries = [ + self.url_result( + 'wistia:%s' % lesson['wistia_id'], ie='Wistia', + video_id=lesson['wistia_id'], video_title=lesson.get('title')) + for lesson in course['lessons'] if lesson.get('wistia_id')] + + return self.playlist_result( + entries, playlist_id, course.get('title'), + course.get('description')) -- cgit v1.1 From 58179eb7d96ebef26a0083e80a2022fab4ca1558 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 17:55:40 +0700 Subject: [abc.net.au:iview] Extract more formats (closes #13492, closes #13489) --- youtube_dl/extractor/abc.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 0247cab..60f753b 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -3,11 +3,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, js_to_json, int_or_none, parse_iso8601, + try_get, ) @@ -124,7 +126,20 @@ class ABCIViewIE(InfoExtractor): title = video_params.get('title') or video_params['seriesTitle'] stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') - formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id) + format_urls = [ + try_get(stream, lambda x: x['hds-unmetered'], compat_str)] + + # May have higher quality video + sd_url = try_get( + stream, lambda x: x['streams']['hds']['sd'], compat_str) + if sd_url: + format_urls.append(sd_url.replace('metered', 'um')) + + formats = [] + for format_url in format_urls: + if format_url: + formats.extend( + self._extract_akamai_formats(format_url, video_id)) self._sort_formats(formats) subtitles = {} -- cgit v1.1 From 256a746d21634eccad07a1e6dcafedcdf8b6181b Mon Sep 17 00:00:00 2001 From: luboss <lubos.katrinec@gmail.com> Date: Fri, 2 Jun 2017 22:44:39 +0200 Subject: [joj] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/joj.py | 56 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100755 youtube_dl/extractor/joj.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4524fa6..9ee0808 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -470,6 +470,7 @@ from .jamendo import ( ) from .jeuxvideo import JeuxVideoIE from .jove import JoveIE +from .joj import JojIE from .jwplatform import JWPlatformIE from .jpopsukitv import JpopsukiIE from .kaltura import KalturaIE diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py new file mode 100755 index 0000000..2ebfec9 --- /dev/null +++ b/youtube_dl/extractor/joj.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +import re + + +class JojIE(InfoExtractor): + _VALID_URL = r'https?://[a-z0-9]+\.joj\.sk/([^/]+/)*(?P<title_query>(?P<release_date>[0-9]{4}(-[0-9]{2}){2}).*)' # noqa + _TESTS = [{ + 'url': 'https://www.joj.sk/nove-byvanie/archiv/2017-05-28-nove-byvanie', # noqa + 'info_dict': { + 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932', + 'ext': 'mp4', + 'title': 'Nové Bývanie', + 'release_date': '20170528' + } + }, { + 'url': 'http://nasi.joj.sk/epizody/2016-09-06-stari-rodicia', + 'info_dict': { + 'id': 'f18b2c5f-9ea8-4941-a164-a814c53306ad', + 'ext': 'mp4', + 'title': 'Starí Rodičia', + 'release_date': '20160906' + } + }] + + media_src_url = 'http://n16.joj.sk/storage/' + xml_source_url = 'https://media.joj.sk/services/Video.php?clip=' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + release_date = mobj.group('release_date').replace('-', '') + webpage = self._download_webpage(url, 'id') + video_id = self._html_search_regex( + r'https?://([a-z0-9]+\.)joj\.sk/embed/(?P<video_id>[a-f0-9\-]+)', + webpage, 'id', group='video_id') + xml_playlist_url = self.xml_source_url + video_id + xml_playlist_et = self._download_xml(xml_playlist_url, 'XML playlist') + formats = [] + for file_el in xml_playlist_et.findall('files/file'): + try: + height = int(file_el.attrib['id'].replace('p', '')) + except ValueError: + height = 0 + formats.append({'height': height, + 'url': self.media_src_url + file_el.attrib['path'].replace( # noqa + 'dat/', '', 1)}) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage).title(), + 'formats': formats, + 'release_date': release_date + } -- cgit v1.1 From 73cf76a93fe48240bf82b1685b1403f05b793ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 19:05:18 +0700 Subject: [joj] Rewrite and add support for generic embeds (closes #13268) --- youtube_dl/extractor/generic.py | 17 +++++++ youtube_dl/extractor/joj.py | 108 ++++++++++++++++++++++++++++------------ 2 files changed, 93 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 919f4f9..f2c577f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -91,6 +91,7 @@ from .anvato import AnvatoIE from .washingtonpost import WashingtonPostIE from .wistia import WistiaIE from .mediaset import MediasetIE +from .joj import JojIE class GenericIE(InfoExtractor): @@ -1771,6 +1772,16 @@ class GenericIE(InfoExtractor): 'add_ie': [MediasetIE.ie_key()], }, { + # JOJ.sk embeds + 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok', + 'info_dict': { + 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok', + 'title': 'Slovenskom sa prehnala vlna silných búrok', + }, + 'playlist_mincount': 5, + 'add_ie': [JojIE.ie_key()], + }, + { # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video) 'url': 'https://tvrain.ru/amp/418921/', 'md5': 'cc00413936695987e8de148b67d14f1d', @@ -2722,6 +2733,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) + # Look for JOJ.sk embeds + joj_urls = JojIE._extract_urls(webpage) + if joj_urls: + return self.playlist_from_matches( + joj_urls, video_id, video_title, ie=JojIE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py index 2ebfec9..a764023 100755 --- a/youtube_dl/extractor/joj.py +++ b/youtube_dl/extractor/joj.py @@ -1,56 +1,100 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor import re +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + js_to_json, + try_get, +) + class JojIE(InfoExtractor): - _VALID_URL = r'https?://[a-z0-9]+\.joj\.sk/([^/]+/)*(?P<title_query>(?P<release_date>[0-9]{4}(-[0-9]{2}){2}).*)' # noqa + _VALID_URL = r'''(?x) + (?: + joj:| + https?://media\.joj\.sk/embed/ + ) + (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}) + ''' _TESTS = [{ - 'url': 'https://www.joj.sk/nove-byvanie/archiv/2017-05-28-nove-byvanie', # noqa + 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932', 'info_dict': { 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932', 'ext': 'mp4', - 'title': 'Nové Bývanie', - 'release_date': '20170528' + 'title': 'NOVÉ BÝVANIE', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 3118, } }, { - 'url': 'http://nasi.joj.sk/epizody/2016-09-06-stari-rodicia', - 'info_dict': { - 'id': 'f18b2c5f-9ea8-4941-a164-a814c53306ad', - 'ext': 'mp4', - 'title': 'Starí Rodičia', - 'release_date': '20160906' - } + 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932', + 'only_matching': True, }] - media_src_url = 'http://n16.joj.sk/storage/' - xml_source_url = 'https://media.joj.sk/services/Video.php?clip=' + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', + webpage) def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - release_date = mobj.group('release_date').replace('-', '') - webpage = self._download_webpage(url, 'id') - video_id = self._html_search_regex( - r'https?://([a-z0-9]+\.)joj\.sk/embed/(?P<video_id>[a-f0-9\-]+)', - webpage, 'id', group='video_id') - xml_playlist_url = self.xml_source_url + video_id - xml_playlist_et = self._download_xml(xml_playlist_url, 'XML playlist') + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'https://media.joj.sk/embed/%s' % video_id, video_id) + + title = self._search_regex( + (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1', + r'<title>(?P<title>[^<]+)'), webpage, 'title', + default=None, group='title') or self._og_search_title(webpage) + + bitrates = self._parse_json( + self._search_regex( + r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates', + default='{}'), + video_id, transform_source=js_to_json, fatal=False) + formats = [] - for file_el in xml_playlist_et.findall('files/file'): - try: - height = int(file_el.attrib['id'].replace('p', '')) - except ValueError: - height = 0 - formats.append({'height': height, - 'url': self.media_src_url + file_el.attrib['path'].replace( # noqa - 'dat/', '', 1)}) + for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []: + if isinstance(format_url, compat_str): + height = self._search_regex( + r'(\d+)[pP]\.', format_url, 'height', default=None) + formats.append({ + 'url': format_url, + 'format_id': '%sp' % height if height else None, + 'height': int(height), + }) + if not formats: + playlist = self._download_xml( + 'https://media.joj.sk/services/Video.php?clip=%s' % video_id, + video_id) + for file_el in playlist.findall('./files/file'): + path = file_el.get('path') + if not path: + continue + format_id = file_el.get('id') or file_el.get('label') + formats.append({ + 'url': 'http://n16.joj.sk/storage/%s' % path.replace( + 'dat/', '', 1), + 'format_id': format_id, + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', format_id or path, 'height', + default=None)), + }) self._sort_formats(formats) + thumbnail = self._og_search_thumbnail(webpage) + + duration = int_or_none(self._search_regex( + r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) + return { 'id': video_id, - 'title': self._og_search_title(webpage).title(), + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, 'formats': formats, - 'release_date': release_date } -- cgit v1.1 From 6e925598d68f5d5216aa3e9abed5c7706a68c891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 19:15:48 +0700 Subject: [csjw] Add coding cookie --- youtube_dl/extractor/cjsw.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py index dd27158..505bdbe 100644 --- a/youtube_dl/extractor/cjsw.py +++ b/youtube_dl/extractor/cjsw.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re -- cgit v1.1 From 71a1db89198100a0e9bc5099aeed622264690203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 20:06:24 +0700 Subject: [dailymail] Add support for embeds --- youtube_dl/extractor/dailymail.py | 17 ++++++++++++++--- youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dailymail.py b/youtube_dl/extractor/dailymail.py index 538565c..af39780 100644 --- a/youtube_dl/extractor/dailymail.py +++ b/youtube_dl/extractor/dailymail.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -12,8 +14,8 @@ from ..utils import ( class DailyMailIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html', 'md5': 'f6129624562251f628296c3a9ffde124', 'info_dict': { @@ -22,7 +24,16 @@ class DailyMailIE(InfoExtractor): 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'', 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84', } - } + }, { + 'url': 'http://www.dailymail.co.uk/embed/video/1295863.html', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)', + webpage) def _real_extract(self, url): video_id = self._match_id(url) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f2c577f..5e8890d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -57,6 +57,7 @@ from .dailymotion import ( DailymotionIE, DailymotionCloudIE, ) +from .dailymail import DailyMailIE from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE from .mtv import MTVServicesEmbeddedIE @@ -760,6 +761,20 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Dailymotion'], }, + # DailyMail embed + { + 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot', + 'info_dict': { + 'id': '1495629', + 'ext': 'mp4', + 'title': 'Care worker punches elderly dementia patient in head 11 times', + 'description': 'md5:3a743dee84e57e48ec68bf67113199a5', + }, + 'add_ie': ['DailyMail'], + 'params': { + 'skip_download': True, + }, + }, # YouTube embed { 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html', @@ -2190,6 +2205,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) + # Look for DailyMail embeds + dailymail_urls = DailyMailIE._extract_urls(webpage) + if dailymail_urls: + return self.playlist_from_matches( + dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key()) + # Look for embedded Wistia player wistia_url = WistiaIE._extract_url(webpage) if wistia_url: -- cgit v1.1 From 207acd8465b51d9d00d2bdda22f10858eb7f1bb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 20:15:15 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 5d07c12..edfde8b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,27 @@ version <unreleased> -Extractors +Core ++ [extractor/common] Add support for AMP tags in _parse_html5_media_entries ++ [utils] Support attributes with no values in get_elements_by_attribute + +Extractors ++ [dailymail] Add support for embeds ++ [joj] Add support for joj.sk (#13268) +* [abc.net.au:iview] Extract more formats (#13492, #13489) +* [egghead:course] Fix extraction (#6635, #13370) ++ [cjsw] Add support for cjsw.com (#13525) ++ [eagleplatform] Add support for referrer protected videos (#13557) ++ [eagleplatform] Add support for another embed pattern (#13557) +* [veoh] Extend URL regular expression (#13601) +* [npo:live] Fix live stream id extraction (#13568, #13605) +* [googledrive] Fix height extraction (#13603) ++ [dailymotion] Add support for new layout (#13580) - [yam] Remove extractor +* [xhamster] Extract all formats and fix duration extraction (#13593) ++ [xhamster] Add support for new URL schema (#13593) +* [espn] Extend URL regular expression (#13244, #13549) +* [kaltura] Fix typo in subtitles extraction (#13569) +* [vier] Adapt extraction to redesign (#13575) version 2017.07.02 -- cgit v1.1 From 65c416dda896f8a0023f01547e6b707dd57ed30a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 20:16:38 +0700 Subject: release 2017.07.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 9746035..c431485 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.07.02 +[debug] youtube-dl version 2017.07.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index edfde8b..c379cae 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.07.09 Core + [extractor/common] Add support for AMP tags in _parse_html5_media_entries diff --git a/docs/supportedsites.md b/docs/supportedsites.md index db2e2ba..b6a147f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -154,6 +154,7 @@ - **chirbit** - **chirbit:profile** - **Cinchcast** + - **CJSW** - **Clipfish** - **cliphunter** - **ClipRs** @@ -369,6 +370,7 @@ - **Jamendo** - **JamendoAlbum** - **JeuxVideo** + - **Joj** - **Jove** - **jpopsuki.tv** - **JWPlatform** @@ -996,7 +998,6 @@ - **XVideos** - **XXXYMovies** - **Yahoo**: Yahoo screen and movies - - **Yam**: 蕃薯藤yam天空部落 - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:track**: Яндекс.Музыка - Трек diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0db974f..14358a7 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.07.02' +__version__ = '2017.07.09' -- cgit v1.1 From 7bf539edcc3dc44481d5196fd01637698653ffc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 10 Jul 2017 00:14:41 +0700 Subject: [eagleplatform] Fix test --- youtube_dl/extractor/eagleplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 34891a3..4278927 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -53,7 +53,7 @@ class EaglePlatformIE(InfoExtractor): 'skip': 'Georestricted', }, { # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) - 'url': 'tvrainru.media.eagleplatform.com:582306', + 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306', 'only_matching': True, }] -- cgit v1.1 From b71c18b4343d54ce8373e9a11df882aca1ae82a0 Mon Sep 17 00:00:00 2001 From: coreynicholson <coreynicholson@users.noreply.github.com> Date: Sun, 9 Jul 2017 22:24:04 +0100 Subject: [vlive:playlist] Add extractor --- youtube_dl/extractor/extractors.py | 3 +- youtube_dl/extractor/vlive.py | 56 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9ee0808..eb15417 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1206,7 +1206,8 @@ from .vk import ( ) from .vlive import ( VLiveIE, - VLiveChannelIE + VLiveChannelIE, + VLivePlaylistIE ) from .vodlocker import VodlockerIE from .vodpl import VODPlIE diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index e589406..f3825db 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -49,6 +49,10 @@ class VLiveIE(InfoExtractor): }, }] + @classmethod + def suitable(cls, url): + return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url) + def _real_extract(self, url): video_id = self._match_id(url) @@ -261,3 +265,55 @@ class VLiveChannelIE(InfoExtractor): return self.playlist_result( entries, channel_code, channel_name) + + +class VLivePlaylistIE(InfoExtractor): + IE_NAME = 'vlive:playlist' + _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.vlive.tv/video/22867/playlist/22912', + 'info_dict': { + 'id': '22912', + 'title': 'Valentine Day Message from TWICE' + }, + 'playlist_mincount': 9 + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + video_id_match = re.match(self._VALID_URL, url) + assert video_id_match + video_id = compat_str(video_id_match.group('video_id')) + + VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s' + if self._downloader.params.get('noplaylist'): + self.to_screen( + 'Downloading just video %s because of --no-playlist' % video_id) + return self.url_result( + VIDEO_URL_TEMPLATE % video_id, + ie=VLiveIE.ie_key(), video_id=video_id) + + self.to_screen( + 'Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + + webpage = self._download_webpage( + 'http://www.vlive.tv/video/%s/playlist/%s' % (video_id, playlist_id), video_id) + + playlist_name = self._html_search_regex( + r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)', + webpage, 'playlist name', fatal=False) + + item_ids = self._search_regex( + r'\bvar\s+playlistVideoSeqs\s*=\s*(\[[^]]+\])', + webpage, 'playlist item ids') + + entries = [] + for item_id in self._parse_json(item_ids, playlist_id): + item_id = compat_str(item_id) + entries.append( + self.url_result( + VIDEO_URL_TEMPLATE % item_id, + ie=VLiveIE.ie_key(), video_id=item_id)) + + return self.playlist_result( + entries, playlist_id, playlist_name) -- cgit v1.1 From e3cd1fcdd177613acae4198cafbff51fbbb912c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 10 Jul 2017 04:32:24 +0700 Subject: [vlive:playlist] Relax and simplify --- youtube_dl/extractor/vlive.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index f3825db..77c120a 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -280,10 +280,8 @@ class VLivePlaylistIE(InfoExtractor): } def _real_extract(self, url): - playlist_id = self._match_id(url) - video_id_match = re.match(self._VALID_URL, url) - assert video_id_match - video_id = compat_str(video_id_match.group('video_id')) + mobj = re.match(self._VALID_URL, url) + video_id, playlist_id = mobj.group('video_id', 'id') VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s' if self._downloader.params.get('noplaylist'): @@ -294,26 +292,27 @@ class VLivePlaylistIE(InfoExtractor): ie=VLiveIE.ie_key(), video_id=video_id) self.to_screen( - 'Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + 'Downloading playlist %s - add --no-playlist to just download video' + % playlist_id) webpage = self._download_webpage( - 'http://www.vlive.tv/video/%s/playlist/%s' % (video_id, playlist_id), video_id) + 'http://www.vlive.tv/video/%s/playlist/%s' + % (video_id, playlist_id), playlist_id) - playlist_name = self._html_search_regex( - r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)', - webpage, 'playlist name', fatal=False) + item_ids = self._parse_json( + self._search_regex( + r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage, + 'playlist video seqs'), + playlist_id) - item_ids = self._search_regex( - r'\bvar\s+playlistVideoSeqs\s*=\s*(\[[^]]+\])', - webpage, 'playlist item ids') + entries = [ + self.url_result( + VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(), + video_id=compat_str(item_id)) + for item_id in item_ids] - entries = [] - for item_id in self._parse_json(item_ids, playlist_id): - item_id = compat_str(item_id) - entries.append( - self.url_result( - VIDEO_URL_TEMPLATE % item_id, - ie=VLiveIE.ie_key(), video_id=item_id)) + playlist_name = self._html_search_regex( + r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)', + webpage, 'playlist title', fatal=False) - return self.playlist_result( - entries, playlist_id, playlist_name) + return self.playlist_result(entries, playlist_id, playlist_name) -- cgit v1.1 From c3c94ca4a40504147fce387ffb7eb9cb43233550 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 10 Jul 2017 21:34:27 +0800 Subject: [giantbomb] Extract m3u8 formats (closes #13626) --- ChangeLog | 6 ++++++ youtube_dl/extractor/giantbomb.py | 14 ++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index c379cae..a37d621 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [giantbomb] Extract m3u8 formats (#13626) + + version 2017.07.09 Core diff --git a/youtube_dl/extractor/giantbomb.py b/youtube_dl/extractor/giantbomb.py index 29b684d..6a1b1e9 100644 --- a/youtube_dl/extractor/giantbomb.py +++ b/youtube_dl/extractor/giantbomb.py @@ -5,9 +5,10 @@ import json from .common import InfoExtractor from ..utils import ( - unescapeHTML, - qualities, + determine_ext, int_or_none, + qualities, + unescapeHTML, ) @@ -15,7 +16,7 @@ class GiantBombIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)' _TEST = { 'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/', - 'md5': '57badeface303ecf6b98b812de1b9018', + 'md5': 'c8ea694254a59246a42831155dec57ac', 'info_dict': { 'id': '2300-9782', 'display_id': 'quick-look-destiny-the-dark-below', @@ -51,11 +52,16 @@ class GiantBombIE(InfoExtractor): for format_id, video_url in video['videoStreams'].items(): if format_id == 'f4m_stream': continue - if video_url.endswith('.f4m'): + ext = determine_ext(video_url) + if ext == 'f4m': f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id) if f4m_formats: f4m_formats[0]['quality'] = quality(format_id) formats.extend(f4m_formats) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, display_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) else: formats.append({ 'url': video_url, -- cgit v1.1 From bb13949197458fc6bd888bbe9255c391927a997b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 7 Jun 2017 14:47:25 +0800 Subject: [niconico] Check login errors (#12486) --- youtube_dl/extractor/niconico.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index f268a72..695e32e 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -1,23 +1,22 @@ # coding: utf-8 from __future__ import unicode_literals -import re import json import datetime from .common import InfoExtractor from ..compat import ( + compat_parse_qs, compat_urlparse, ) from ..utils import ( + determine_ext, ExtractorError, int_or_none, parse_duration, parse_iso8601, - sanitized_Request, - xpath_text, - determine_ext, urlencode_postdata, + xpath_text, ) @@ -101,19 +100,24 @@ class NiconicoIE(InfoExtractor): return True # Log in + login_ok = True login_form_strs = { - 'mail': username, + 'mail_tel': username, 'password': password, } - login_data = urlencode_postdata(login_form_strs) - request = sanitized_Request( - 'https://secure.nicovideo.jp/secure/login', login_data) - login_results = self._download_webpage( - request, None, note='Logging in', errnote='Unable to log in') - if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None: + urlh = self._request_webpage( + 'https://account.nicovideo.jp/api/v1/login', None, + note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(login_form_strs)) + if urlh is False: + login_ok = False + else: + parts = compat_urlparse.urlparse(urlh.geturl()) + if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login': + login_ok = False + if not login_ok: self._downloader.report_warning('unable to log in: bad username or password') - return False - return True + return login_ok def _real_extract(self, url): video_id = self._match_id(url) -- cgit v1.1 From 708f6f511e1d03363c9dbac7ed42cbdebfbc7718 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 11 Jul 2017 15:04:45 +0800 Subject: [niconico] Fix authentication error handling (closes #12486) --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index a37d621..db1776d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [niconico] Fix authentication error handling (#12486) * [giantbomb] Extract m3u8 formats (#13626) -- cgit v1.1 From 2edfd745df4b7a764d7456e7536e47ef140de24d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 4 Jun 2017 00:41:55 +0800 Subject: [twitter] Extract mp4 urls via mobile API (closes #12726) --- youtube_dl/extractor/twitter.py | 121 +++++++++++++++++++++++++++++----------- 1 file changed, 88 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 37e3bc4..2041b41 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -7,11 +7,13 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( determine_ext, + dict_get, + ExtractorError, float_or_none, - xpath_text, - remove_end, int_or_none, - ExtractorError, + remove_end, + try_get, + xpath_text, ) from .periscope import PeriscopeIE @@ -22,6 +24,15 @@ class TwitterBaseIE(InfoExtractor): vmap_data = self._download_xml(vmap_url, video_id) return xpath_text(vmap_data, './/MediaFile').strip() + @staticmethod + def _search_dimensions_in_video_url(a_format, video_url): + m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url) + if m: + a_format.update({ + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) + class TwitterCardIE(TwitterBaseIE): IE_NAME = 'twitter:card' @@ -90,6 +101,59 @@ class TwitterCardIE(TwitterBaseIE): }, ] + def _parse_media_info(self, media_info, video_id): + formats = [] + for media_variant in media_info.get('variants', []): + media_url = media_variant['url'] + if media_url.endswith('.m3u8'): + formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls')) + elif media_url.endswith('.mpd'): + formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash')) + else: + vbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000) + a_format = { + 'url': media_url, + 'format_id': 'http-%d' % vbr if vbr else 'http', + 'vbr': vbr, + } + # Reported bitRate may be zero + if not a_format['vbr']: + del a_format['vbr'] + + self._search_dimensions_in_video_url(a_format, media_url) + + formats.append(a_format) + return formats + + def _extract_mobile_formats(self, username, video_id): + webpage = self._download_webpage( + 'https://mobile.twitter.com/%s/status/%s' % (username, video_id), + video_id, 'Downloading mobile webpage', + headers={ + # A recent mobile UA is necessary for `gt` cookie + 'User-Agent': 'Mozilla/5.0 (Android 6.0.1; Mobile; rv:54.0) Gecko/54.0 Firefox/54.0', + }) + main_script_url = self._html_search_regex( + r'<script[^>]+src="([^"]+main\.[^"]+)"', webpage, 'main script URL') + main_script = self._download_webpage( + main_script_url, video_id, 'Downloading main script') + bearer_token = self._search_regex( + r'BEARER_TOKEN\s*:\s*"([^"]+)"', + main_script, 'bearer token') + guest_token = self._search_regex( + r'document\.cookie\s*=\s*decodeURIComponent\("gt=(\d+)', + webpage, 'guest token') + api_data = self._download_json( + 'https://api.twitter.com/2/timeline/conversation/%s.json' % video_id, + video_id, 'Downloading mobile API data', + headers={ + 'Authorization': 'Bearer ' + bearer_token, + 'x-guest-token': guest_token, + }) + media_info = try_get(api_data, lambda o: o['globalObjects']['tweets'][video_id] + ['extended_entities']['media'][0]['video_info']) or {} + return self._parse_media_info(media_info, video_id) + def _real_extract(self, url): video_id = self._match_id(url) @@ -117,14 +181,6 @@ class TwitterCardIE(TwitterBaseIE): if periscope_url: return self.url_result(periscope_url, PeriscopeIE.ie_key()) - def _search_dimensions_in_video_url(a_format, video_url): - m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url) - if m: - a_format.update({ - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') if video_url: @@ -135,7 +191,7 @@ class TwitterCardIE(TwitterBaseIE): 'url': video_url, } - _search_dimensions_in_video_url(f, video_url) + self._search_dimensions_in_video_url(f, video_url) formats.append(f) @@ -152,29 +208,14 @@ class TwitterCardIE(TwitterBaseIE): media_info = entity['mediaInfo'] if media_info: - for media_variant in media_info['variants']: - media_url = media_variant['url'] - if media_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls')) - elif media_url.endswith('.mpd'): - formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash')) - else: - vbr = int_or_none(media_variant.get('bitRate'), scale=1000) - a_format = { - 'url': media_url, - 'format_id': 'http-%d' % vbr if vbr else 'http', - 'vbr': vbr, - } - # Reported bitRate may be zero - if not a_format['vbr']: - del a_format['vbr'] - - _search_dimensions_in_video_url(a_format, media_url) - - formats.append(a_format) - + formats.extend(self._parse_media_info(media_info, video_id)) duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9) + username = config.get('user', {}).get('screen_name') + if username: + formats.extend(self._extract_mobile_formats(username, video_id)) + + self._remove_duplicate_formats(formats) self._sort_formats(formats) title = self._search_regex(r'<title>([^<]+)', webpage, 'title') @@ -301,6 +342,20 @@ class TwitterIE(InfoExtractor): 'timestamp': 1474613214, }, 'add_ie': ['Periscope'], + }, { + # has mp4 formats via mobile API + 'url': 'https://twitter.com/news_al3alm/status/852138619213144067', + 'info_dict': { + 'id': '852138619213144067', + 'ext': 'mp4', + 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة', + 'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"', + 'uploader': 'عالم الأخبار', + 'uploader_id': 'news_al3alm', + }, + 'params': { + 'format': 'best[format_id^=http-]', + }, }] def _real_extract(self, url): -- cgit v1.1 From 7f176ac4775aed5e5a72d2f0c4b579d1b886d419 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Jul 2017 15:35:19 +0800 Subject: [periscope] Support pscp.tv URLs in embedded frames And fix a relevant twitter test --- ChangeLog | 1 + youtube_dl/extractor/periscope.py | 2 +- youtube_dl/extractor/twitter.py | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index db1776d..ffb4b69 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [periscope] Support pscp.tv URLs in embedded frames * [niconico] Fix authentication error handling (#12486) * [giantbomb] Extract m3u8 formats (#13626) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 1add6b8..bfa12ed 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -49,7 +49,7 @@ class PeriscopeIE(PeriscopeBaseIE): @staticmethod def _extract_url(webpage): mobj = re.search( - r']+src=([\'"])(?P(?:https?:)?//(?:www\.)?periscope\.tv/(?:(?!\1).)+)\1', webpage) + r']+src=([\'"])(?P(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage) if mobj: return mobj.group('url') diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 2041b41..e4bc7e0 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -335,10 +335,11 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '1zqKVVlkqLaKB', 'ext': 'mp4', - 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence', + 'title': 'Sgt Kerry Schmidt - LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence', + 'description': 'Sgt Kerry Schmidt on Twitter: "LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence https://t.co/EKrVgIXF3s"', 'upload_date': '20160923', 'uploader_id': 'OPP_HSD', - 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police', + 'uploader': 'Sgt Kerry Schmidt', 'timestamp': 1474613214, }, 'add_ie': ['Periscope'], -- cgit v1.1 From 9be31e771cd9481ea690c01eed398645deadc1de Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Jul 2017 15:48:34 +0800 Subject: [twitter] Support HLS streams in vmap URLs --- ChangeLog | 1 + youtube_dl/extractor/twitter.py | 23 ++++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index ffb4b69..8e64511 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [twitter] Support HLS streams in vmap URLs + [periscope] Support pscp.tv URLs in embedded frames * [niconico] Fix authentication error handling (#12486) * [giantbomb] Extract m3u8 formats (#13626) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index e4bc7e0..89eabe7 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -20,9 +20,16 @@ from .periscope import PeriscopeIE class TwitterBaseIE(InfoExtractor): - def _get_vmap_video_url(self, vmap_url, video_id): + def _extract_formats_from_vmap_url(self, vmap_url, video_id): vmap_data = self._download_xml(vmap_url, video_id) - return xpath_text(vmap_data, './/MediaFile').strip() + video_url = xpath_text(vmap_data, './/MediaFile').strip() + if determine_ext(video_url) == 'm3u8': + return self._extract_m3u8_formats( + video_url, video_id, ext='mp4', m3u8_id='hls', + entry_protocol='m3u8_native') + return [{ + 'url': video_url, + }] @staticmethod def _search_dimensions_in_video_url(a_format, video_url): @@ -197,9 +204,8 @@ class TwitterCardIE(TwitterBaseIE): vmap_url = config.get('vmapUrl') or config.get('vmap_url') if vmap_url: - formats.append({ - 'url': self._get_vmap_video_url(vmap_url, video_id), - }) + formats.extend( + self._extract_formats_from_vmap_url(vmap_url, video_id)) media_info = None @@ -449,7 +455,7 @@ class TwitterAmplifyIE(TwitterBaseIE): vmap_url = self._html_search_meta( 'twitter:amplify:vmap', webpage, 'vmap url') - video_url = self._get_vmap_video_url(vmap_url, video_id) + formats = self._extract_formats_from_vmap_url(vmap_url, video_id) thumbnails = [] thumbnail = self._html_search_meta( @@ -471,11 +477,10 @@ class TwitterAmplifyIE(TwitterBaseIE): }) video_w, video_h = _find_dimension('player') - formats = [{ - 'url': video_url, + formats[0].update({ 'width': video_w, 'height': video_h, - }] + }) return { 'id': video_id, -- cgit v1.1 From e8f20ffa032a791548a66bb7b694c424673537e6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Jul 2017 16:05:15 +0800 Subject: [vine] Make sure the title won't be empty And fix a relevant TwitterCard test case --- ChangeLog | 1 + youtube_dl/extractor/twitter.py | 1 + youtube_dl/extractor/vine.py | 6 ++++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8e64511..a5de3c2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [vine] Make sure the title won't be empty + [twitter] Support HLS streams in vmap URLs + [periscope] Support pscp.tv URLs in embedded frames * [niconico] Fix authentication error handling (#12486) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 89eabe7..2ff5541 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -91,6 +91,7 @@ class TwitterCardIE(TwitterBaseIE): 'uploader_id': '1189339351084113920', 'uploader': 'ArsenalTerje', 'title': 'Vine by ArsenalTerje', + 'timestamp': 1447451307, }, 'add_ie': ['Vine'], }, { diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 4957a07..46950d3 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -92,10 +92,12 @@ class VineIE(InfoExtractor): username = data.get('username') + alt_title = 'Vine by %s' % username if username else None + return { 'id': video_id, - 'title': data.get('description'), - 'alt_title': 'Vine by %s' % username if username else None, + 'title': data.get('description') or alt_title or 'Vine video', + 'alt_title': alt_title, 'thumbnail': data.get('thumbnailUrl'), 'timestamp': unified_timestamp(data.get('created')), 'uploader': username, -- cgit v1.1 From 3615bfe1b4b97cb35ddd63c455160a50c2a10961 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Jul 2017 16:46:37 +0800 Subject: [twitter] Fix remaining tests --- youtube_dl/extractor/twitter.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 2ff5541..6eaf360 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -54,7 +54,8 @@ class TwitterCardIE(TwitterBaseIE): 'title': 'Twitter Card', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 30.033, - } + }, + 'skip': 'Video gone', }, { 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768', @@ -66,6 +67,7 @@ class TwitterCardIE(TwitterBaseIE): 'thumbnail': r're:^https?://.*\.jpg', 'duration': 80.155, }, + 'skip': 'Video gone', }, { 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', @@ -83,7 +85,7 @@ class TwitterCardIE(TwitterBaseIE): }, { 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568', - 'md5': 'ab2745d0b0ce53319a534fccaa986439', + 'md5': '6dabeaca9e68cbb71c99c322a4b42a11', 'info_dict': { 'id': 'iBb2x00UVlv', 'ext': 'mp4', @@ -96,12 +98,12 @@ class TwitterCardIE(TwitterBaseIE): 'add_ie': ['Vine'], }, { 'url': 'https://twitter.com/i/videos/tweet/705235433198714880', - 'md5': '3846d0a07109b5ab622425449b59049d', + 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88', 'info_dict': { 'id': '705235433198714880', 'ext': 'mp4', 'title': 'Twitter web player', - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*', }, }, { 'url': 'https://twitter.com/i/videos/752274308186120192', @@ -303,10 +305,10 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'Donte - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'Donte on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'JG', + 'uploader': 'Donte', 'uploader_id': 'jaydingeer', }, 'params': { @@ -318,9 +320,11 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': 'MIOxnrUteUd', 'ext': 'mp4', - 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン', - 'uploader': 'TAKUMA', - 'uploader_id': '1004126642786242560', + 'title': 'FilmDrunk - Vine of the day', + 'description': 'FilmDrunk on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"', + 'uploader': 'FilmDrunk', + 'uploader_id': 'Filmdrunk', + 'timestamp': 1402826626, 'upload_date': '20140615', }, 'add_ie': ['Vine'], -- cgit v1.1 From f2bb33a9868e499f4582fee24a7b67d559d33575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Jul 2017 21:36:45 +0700 Subject: [ted] Fix subtitles extraction (closes #13628, closes #13629) --- youtube_dl/extractor/ted.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index f27d0e3..06a27fd 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -271,20 +271,22 @@ class TEDIE(InfoExtractor): } def _get_subtitles(self, video_id, talk_info): - languages = [lang['languageCode'] for lang in talk_info.get('languages', [])] - if languages: - sub_lang_list = {} - for l in languages: - sub_lang_list[l] = [ - { - 'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext), - 'ext': ext, - } - for ext in ['ted', 'srt'] - ] - return sub_lang_list - else: - return {} + sub_lang_list = {} + for language in try_get( + talk_info, + (lambda x: x['downloads']['languages'], + lambda x: x['languages']), list): + lang_code = language.get('languageCode') or language.get('ianaCode') + if not lang_code: + continue + sub_lang_list[lang_code] = [ + { + 'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, lang_code, ext), + 'ext': ext, + } + for ext in ['ted', 'srt'] + ] + return sub_lang_list def _watch_info(self, url, name): webpage = self._download_webpage(url, name) -- cgit v1.1 From 9a0942ad55bba714d6eaeb9ee4f66a138ec85e17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Jul 2017 22:59:36 +0700 Subject: [drtv] Make HLS and HDS extraction non fatal --- youtube_dl/extractor/drtv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index c84624f..69effba 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -118,7 +118,7 @@ class DRTVIE(InfoExtractor): if target == 'HDS': f4m_formats = self._extract_f4m_formats( uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43', - video_id, preference, f4m_id=format_id) + video_id, preference, f4m_id=format_id, fatal=False) if kind == 'AudioResource': for f in f4m_formats: f['vcodec'] = 'none' @@ -126,7 +126,8 @@ class DRTVIE(InfoExtractor): elif target == 'HLS': formats.extend(self._extract_m3u8_formats( uri, video_id, 'mp4', entry_protocol='m3u8_native', - preference=preference, m3u8_id=format_id)) + preference=preference, m3u8_id=format_id, + fatal=False)) else: bitrate = link.get('Bitrate') if bitrate: -- cgit v1.1 From 15da37c7dc8cf14ba5ce880aa1805fceaa71fc44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Jul 2017 00:40:54 +0700 Subject: [YoutubeDL] Don't expand env variables in meta fields (closes #13637) --- test/test_YoutubeDL.py | 6 ++++++ youtube_dl/YoutubeDL.py | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 75945e3..70989e2 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -527,6 +527,8 @@ class TestYoutubeDL(unittest.TestCase): 'ext': 'mp4', 'width': None, 'height': 1080, + 'title1': '$PATH', + 'title2': '%PATH%', } def fname(templ): @@ -545,10 +547,14 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%%'), '%') + self.assertEqual(fname('%%%%'), '%%') self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4') self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4') self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s') self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4') + self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH') + self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%') def test_format_note(self): ydl = YoutubeDL() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 60ee4b7..8730d32 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -20,6 +20,7 @@ import re import shutil import subprocess import socket +import string import sys import time import tokenize @@ -674,7 +675,19 @@ class YoutubeDL(object): FORMAT_RE.format(numeric_field), r'%({0})s'.format(numeric_field), outtmpl) - filename = expand_path(outtmpl % template_dict) + # expand_path translates '%%' into '%' and '$$' into '$' + # correspondingly that is not what we want since we need to keep + # '%%' intact for template dict substitution step. Working around + # with boundary-alike separator hack. + sep = ''.join([random.choice(string.ascii_letters) for _ in range(32)]) + outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) + + # outtmpl should be expand_path'ed before template dict substitution + # because meta fields may contain env variables we don't want to + # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and + # title "Hello $PATH", we don't want `$PATH` to be expanded. + filename = expand_path(outtmpl).replace(sep, '') % template_dict + # Temporary fix for #4787 # 'Treat' all problem characters by passing filename through preferredencoding # to workaround encoding issues with subprocess on python2 @ Windows -- cgit v1.1 From f354d8480700c5e6f288bfce497a363b4c6f0859 Mon Sep 17 00:00:00 2001 From: rrooij Date: Fri, 14 Jul 2017 17:10:17 +0200 Subject: [5tv] Add another video URL pattern (closes #13354) --- youtube_dl/extractor/fivetv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py index 15736c9..9f98637 100644 --- a/youtube_dl/extractor/fivetv.py +++ b/youtube_dl/extractor/fivetv.py @@ -43,7 +43,7 @@ class FiveTVIE(InfoExtractor): 'info_dict': { 'id': 'glavnoe', 'ext': 'mp4', - 'title': 'Итоги недели с 8 по 14 июня 2015 года', + 'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$', 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -70,7 +70,8 @@ class FiveTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r']+?href="([^"]+)"[^>]+?class="videoplayer"', + [r']+?class="flowplayer[^>]+?data-href="([^"]+)"', + r']+?href="([^"]+)"[^>]+?class="videoplayer"'], webpage, 'video url') title = self._og_search_title(webpage, default=None) or self._search_regex( -- cgit v1.1 From 00dbdfc1f741b919a0add36394065ce1aeccfda8 Mon Sep 17 00:00:00 2001 From: satunnainen Date: Fri, 14 Jul 2017 18:11:07 +0300 Subject: [slideshare] Fix extraction --- youtube_dl/extractor/slideshare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py index 74a1dc6..e89ebeb 100644 --- a/youtube_dl/extractor/slideshare.py +++ b/youtube_dl/extractor/slideshare.py @@ -31,7 +31,7 @@ class SlideshareIE(InfoExtractor): page_title = mobj.group('title') webpage = self._download_webpage(url, page_title) slideshare_obj = self._search_regex( - r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);', + r'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);', webpage, 'slideshare object') info = json.loads(slideshare_obj) if info['slideshow']['type'] != 'video': -- cgit v1.1 From 7d02dcfaa2589453ee3cc6c88ee27f04c252f8a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Jul 2017 22:37:04 +0700 Subject: [youtube] Don't capture YouTube Red ad for creator meta field (closes #13621) --- youtube_dl/extractor/youtube.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 77cd271..4597ccb 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -673,6 +673,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }, # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) + # YouTube Red ad is not captured for creator { 'url': '__2ABJjxzNo', 'info_dict': { @@ -1649,7 +1650,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_webpage, 'license', default=None) m_music = re.search( - r']+class="title"[^>]*>\s*Music\s*\s*]*>\s*
  • (?P.+?) by (?P<creator>.+?)(?:\(.+?\))?</li', + r'''(?x) + <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s* + <ul[^>]*>\s* + <li>(?P<title>.+?) + by (?P<creator>.+?) + (?: + \(.+?\)| + <a[^>]* + (?: + \bhref=["\']/red[^>]*>| # drop possible + >\s*Listen ad-free with YouTube Red # YouTube Red ad + ) + .*? + )?</li + ''', video_webpage) if m_music: video_alt_title = remove_quotes(unescapeHTML(m_music.group('title'))) -- cgit v1.1 From 2583c0b54e56f6dbce85a079d91a05e9b13c2dce Mon Sep 17 00:00:00 2001 From: Robin Neatherway <robin.neatherway@gmail.com> Date: Fri, 14 Jul 2017 17:08:32 +0100 Subject: Fix bugs caused by typos --- youtube_dl/downloader/ism.py | 3 +-- youtube_dl/extractor/audioboom.py | 2 +- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/karrierevideos.py | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 5f6f9fa..9b001ec 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -98,7 +98,7 @@ def write_piff_header(stream, params): if is_audio: smhd_payload = s88.pack(0) # balance - smhd_payload = u16.pack(0) # reserved + smhd_payload += u16.pack(0) # reserved media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header else: vmhd_payload = u16.pack(0) # graphics mode @@ -126,7 +126,6 @@ def write_piff_header(stream, params): if fourcc == 'AACL': sample_entry_box = box(b'mp4a', sample_entry_payload) else: - sample_entry_payload = sample_entry_payload sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # reserved sample_entry_payload += u32.pack(0) * 3 # pre defined diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py index e48bb89..393f381 100644 --- a/youtube_dl/extractor/audioboom.py +++ b/youtube_dl/extractor/audioboom.py @@ -43,7 +43,7 @@ class AudioBoomIE(InfoExtractor): def from_clip(field): if clip: - clip.get(field) + return clip.get(field) audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property( 'audio', webpage, 'audio url') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5e8890d..8c2ff39 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2740,7 +2740,7 @@ class GenericIE(InfoExtractor): rutube_urls = RutubeIE._extract_urls(webpage) if rutube_urls: return self.playlist_from_matches( - rutube_urls, ie=RutubeIE.ie_key()) + rutube_urls, video_id, video_title, ie=RutubeIE.ie_key()) # Look for WashingtonPost embeds wapo_urls = WashingtonPostIE._extract_urls(webpage) diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py index 4e9eb67..f236a2f 100644 --- a/youtube_dl/extractor/karrierevideos.py +++ b/youtube_dl/extractor/karrierevideos.py @@ -48,7 +48,7 @@ class KarriereVideosIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = (self._html_search_meta('title', webpage, default=None) or - self._search_regex(r'<h1 class="title">([^<]+)</h1>')) + self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title')) video_id = self._search_regex( r'/config/video/(.+?)\.xml', webpage, 'video id') -- cgit v1.1 From 4e826cd9aec383768a7b25aa3161efd4672f9310 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Jul 2017 06:48:12 +0700 Subject: [nexx] Add extractor (closes #10807, closes #13465) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 22 ++++ youtube_dl/extractor/nexx.py | 221 +++++++++++++++++++++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 youtube_dl/extractor/nexx.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index eb15417..9d34447 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -653,6 +653,7 @@ from .nextmedia import ( AppleDailyIE, NextTVIE, ) +from .nexx import NexxIE from .nfb import NFBIE from .nfl import NFLIE from .nhk import NhkVodIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8c2ff39..123a212 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -36,6 +36,7 @@ from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) +from .nexx import NexxIE from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE @@ -1549,6 +1550,22 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['BrightcoveLegacy'], }, + # Nexx embed + { + 'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503', + 'info_dict': { + 'id': '247746', + 'ext': 'mp4', + 'title': "Yesterday's Jam (OV)", + 'description': 'md5:09bc0984723fed34e2581624a84e05f0', + 'timestamp': 1492594816, + 'upload_date': '20170419', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, # Facebook <iframe> embed { 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html', @@ -2133,6 +2150,11 @@ class GenericIE(InfoExtractor): if bc_urls: return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew') + # Look for Nexx embeds + nexx_urls = NexxIE._extract_urls(webpage) + if nexx_urls: + return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key()) + # Look for ThePlatform embeds tp_urls = ThePlatformIE._extract_urls(webpage) if tp_urls: diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py new file mode 100644 index 0000000..60b42cb --- /dev/null +++ b/youtube_dl/extractor/nexx.py @@ -0,0 +1,221 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import hashlib +import random +import re +import time + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + parse_duration, + try_get, + urlencode_postdata, +) + + +class NexxIE(InfoExtractor): + _VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/(?P<id>\d+)' + _TESTS = [{ + # movie + 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907', + 'md5': '16746bfc28c42049492385c989b26c4a', + 'info_dict': { + 'id': '128907', + 'ext': 'mp4', + 'title': 'Stiftung Warentest', + 'alt_title': 'Wie ein Test abläuft', + 'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2', + 'release_year': 2013, + 'creator': 'SPIEGEL TV', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2509, + 'timestamp': 1384264416, + 'upload_date': '20131112', + }, + 'params': { + 'format': 'bestvideo', + }, + }, { + # episode + 'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858', + 'info_dict': { + 'id': '247858', + 'ext': 'mp4', + 'title': 'Return of the Golden Child (OV)', + 'description': 'md5:5d969537509a92b733de21bae249dc63', + 'release_year': 2017, + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1397, + 'timestamp': 1495033267, + 'upload_date': '20170517', + 'episode_number': 2, + 'season_number': 2, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, { + 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + # Reference: + # 1. https://nx-s.akamaized.net/files/201510/44.pdf + + entries = [] + + # JavaScript Integration + for domain_id, video_id in re.findall( + r'''(?isx) + <script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(\d+).+? + onPLAYReady.+? + _play\.init\s*\(.+?\s*,\s*(\d+)\s*,\s*.+?\) + ''', webpage): + entries.append('https://api.nexx.cloud/v3/%s/videos/byid/%s' % (domain_id, video_id)) + + # TODO: support more embed formats + + return entries + + def _handle_error(self, response): + status = int_or_none(try_get( + response, lambda x: x['metadata']['status']) or 200) + if 200 <= status < 300: + return + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']), + expected=True) + + def _call_api(self, domain_id, path, video_id, data=None, headers={}): + headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8' + result = self._download_json( + 'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id, + 'Downloading %s JSON' % path, data=urlencode_postdata(data), + headers=headers) + self._handle_error(result) + return result['result'] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + domain_id, video_id = mobj.group('domain_id', 'id') + + # Reverse engineered from JS code (see getDeviceID function) + device_id = '%d:%d:%d%d' % ( + random.randint(1, 4), int(time.time()), + random.randint(1e4, 99999), random.randint(1, 9)) + + result = self._call_api(domain_id, 'session/init', video_id, data={ + 'nxp_devh': device_id, + 'nxp_userh': '', + 'precid': '0', + 'playlicense': '0', + 'screenx': '1920', + 'screeny': '1080', + 'playerversion': '6.0.00', + 'gateway': 'html5', + 'adGateway': '', + 'explicitlanguage': 'en-US', + 'addTextTemplates': '1', + 'addDomainData': '1', + 'addAdModel': '1', + }, headers={ + 'X-Request-Enable-Auth-Fallback': '1', + }) + + cid = result['general']['cid'] + + # As described in [1] X-Request-Token generation algorithm is + # as follows: + # md5( operation + domain_id + domain_secret ) + # where domain_secret is a static value that will be given by nexx.tv + # as per [1]. Here is how this "secret" is generated (reversed + # from _play.api.init function, search for clienttoken). So it's + # actually not static and not that much of a secret. + # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf + secret = result['device']['clienttoken'][int(device_id[0]):] + secret = secret[0:len(secret) - int(device_id[-1])] + + op = 'byid' + + # Reversed from JS code for _play.api.call function (search for + # X-Request-Token) + request_token = hashlib.md5( + ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest() + + video = self._call_api( + domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={ + 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description', + 'addInteractionOptions': '1', + 'addStatusDetails': '1', + 'addStreamDetails': '1', + 'addCaptions': '1', + 'addScenes': '1', + 'addHotSpots': '1', + 'addBumpers': '1', + 'captionFormat': 'data', + }, headers={ + 'X-Request-CID': cid, + 'X-Request-Token': request_token, + }) + + general = video['general'] + title = general['title'] + + stream_data = video['streamdata'] + language = general.get('language_raw') or '' + + # TODO: reverse more cdns and formats + + cdn = stream_data['cdnType'] + assert cdn == 'azure' + + azure_locator = stream_data['azureLocator'] + + AZURE_URL = 'http://nx-p%02d.akamaized.net/' + + for secure in ('s', ''): + cdn_shield = stream_data.get('cdnShieldHTTP%s' % secure.upper()) + if cdn_shield: + azure_base = 'http%s://%s' % (secure, cdn_shield) + break + else: + azure_base = AZURE_URL % int(stream_data['azureAccount'].replace('nexxplayplus', '')) + + is_ml = ',' in language + azure_m3u8_url = '%s%s/%s_src%s.ism/Manifest(format=m3u8-aapl)' % ( + azure_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + + protection_token = try_get( + video, lambda x: x['protectiondata']['token'], compat_str) + if protection_token: + azure_m3u8_url += '?hdnts=%s' % protection_token + + formats = self._extract_m3u8_formats( + azure_m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='%s-hls' % cdn) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'alt_title': general.get('subtitle'), + 'description': general.get('description'), + 'release_year': int_or_none(general.get('year')), + 'creator': general.get('studio') or general.get('studio_adref'), + 'thumbnail': try_get( + video, lambda x: x['imagedata']['thumb'], compat_str), + 'duration': parse_duration(general.get('runtime')), + 'timestamp': int_or_none(general.get('uploaded')), + 'episode_number': int_or_none(try_get( + video, lambda x: x['episodedata']['episode'])), + 'season_number': int_or_none(try_get( + video, lambda x: x['episodedata']['season'])), + 'formats': formats, + } -- cgit v1.1 From c7604d79e9993e76845141a61fdf3c308af917e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Jul 2017 06:52:23 +0700 Subject: [spiegeltv] Delegate extraction to nexx (closes #13159) --- youtube_dl/extractor/spiegeltv.py | 113 +++----------------------------------- 1 file changed, 8 insertions(+), 105 deletions(-) diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index e1cfb86..6ccf4c3 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -1,114 +1,17 @@ -# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse -from ..utils import ( - determine_ext, - float_or_none, -) +from .nexx import NexxIE class SpiegeltvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/(?:#/)?filme/(?P<id>[\-a-z0-9]+)' - _TESTS = [{ - 'url': 'http://www.spiegel.tv/filme/flug-mh370/', - 'info_dict': { - 'id': 'flug-mh370', - 'ext': 'm4v', - 'title': 'Flug MH370', - 'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines', - 'thumbnail': r're:http://.*\.jpg$', - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - 'url': 'http://www.spiegel.tv/#/filme/alleskino-die-wahrheit-ueber-maenner/', + _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)' + _TEST = { + 'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/', 'only_matching': True, - }] + } def _real_extract(self, url): - if '/#/' in url: - url = url.replace('/#/', '/') - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title') - - apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com' - version_json = self._download_json( - '%s/version.json' % apihost, video_id, - note='Downloading version information') - version_name = version_json['version_name'] - - slug_json = self._download_json( - '%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id), - video_id, - note='Downloading object information') - oid = slug_json['object_id'] - - media_json = self._download_json( - '%s/%s/restapi/media/%s.json' % (apihost, version_name, oid), - video_id, note='Downloading media information') - uuid = media_json['uuid'] - is_wide = media_json['is_wide'] - - server_json = self._download_json( - 'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json', - video_id, note='Downloading server information') - - format = '16x9' if is_wide else '4x3' - - formats = [] - for streamingserver in server_json['streamingserver']: - endpoint = streamingserver.get('endpoint') - if not endpoint: - continue - play_path = 'mp4:%s_spiegeltv_0500_%s.m4v' % (uuid, format) - if endpoint.startswith('rtmp'): - formats.append({ - 'url': endpoint, - 'format_id': 'rtmp', - 'app': compat_urllib_parse_urlparse(endpoint).path[1:], - 'play_path': play_path, - 'player_path': 'http://prod-static.spiegel.tv/frontend-076.swf', - 'ext': 'flv', - 'rtmp_live': True, - }) - elif determine_ext(endpoint) == 'm3u8': - formats.append({ - 'url': endpoint.replace('[video]', play_path), - 'ext': 'm4v', - 'format_id': 'hls', # Prefer hls since it allows to workaround georestriction - 'protocol': 'm3u8', - 'preference': 1, - 'http_headers': { - 'Accept-Encoding': 'deflate', # gzip causes trouble on the server side - }, - }) - else: - formats.append({ - 'url': endpoint, - }) - self._check_formats(formats, video_id) - - thumbnails = [] - for image in media_json['images']: - thumbnails.append({ - 'url': image['url'], - 'width': image['width'], - 'height': image['height'], - }) - - description = media_json['subtitle'] - duration = float_or_none(media_json.get('duration_in_ms'), scale=1000) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'thumbnails': thumbnails, - 'formats': formats, - } + return self.url_result( + 'https://api.nexx.cloud/v3/748/videos/byid/%s' + % self._match_id(url), ie=NexxIE.ie_key()) -- cgit v1.1 From ea3f20494f64c18123c61f722a7864e3dbdde566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Jul 2017 07:02:05 +0700 Subject: [youtube] PEP 8 --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4597ccb..2e71795 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1660,7 +1660,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): <a[^>]* (?: \bhref=["\']/red[^>]*>| # drop possible - >\s*Listen ad-free with YouTube Red # YouTube Red ad + >\s*Listen ad-free with YouTube Red # YouTube Red ad ) .*? )?</li -- cgit v1.1 From 961ea474b6c6965d49a58d0400d0368fa0300b19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Jul 2017 07:02:14 +0700 Subject: [YoutubeDL] PEP 8 --- youtube_dl/YoutubeDL.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 8730d32..89c07be 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -20,13 +20,14 @@ import re import shutil import subprocess import socket -import string import sys import time import tokenize import traceback import random +from string import ascii_letters + from .compat import ( compat_basestring, compat_cookiejar, @@ -679,7 +680,7 @@ class YoutubeDL(object): # correspondingly that is not what we want since we need to keep # '%%' intact for template dict substitution step. Working around # with boundary-alike separator hack. - sep = ''.join([random.choice(string.ascii_letters) for _ in range(32)]) + sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) # outtmpl should be expand_path'ed before template dict substitution -- cgit v1.1 From ef78563e9c148d8a24382c282d74a960e244569e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Jul 2017 07:33:26 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index a5de3c2..5f3ea6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,11 +1,25 @@ version <unreleased> +Core +* [YoutubeDL] Don't expand environment variables in meta fields (#13637) + Extractors +* [spiegeltv] Delegate extraction to nexx extractor (#13159) ++ [nexx] Add support for nexx.cloud (#10807, #13465) +* [generic] Fix rutube embeds extraction (#13641) +* [karrierevideos] Fix title extraction (#13641) +* [youtube] Don't capture YouTube Red ad for creator meta field (#13621) +* [slideshare] Fix extraction (#13617) ++ [5tv] Add another video URL pattern (#13354, #13606) +* [drtv] Make HLS and HDS extraction non fatal +* [ted] Fix subtitles extraction (#13628, #13629) * [vine] Make sure the title won't be empty + [twitter] Support HLS streams in vmap URLs + [periscope] Support pscp.tv URLs in embedded frames +* [twitter] Extract mp4 urls via mobile API (#12726) * [niconico] Fix authentication error handling (#12486) * [giantbomb] Extract m3u8 formats (#13626) ++ [vlive:playlist] Add support for playlists (#13613) version 2017.07.09 -- cgit v1.1 From cea931a9e5cf682eafe5b7cdacbbe22630b7a9e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Jul 2017 07:36:05 +0700 Subject: release 2017.07.15 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c431485..0f20d04 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.09** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.15** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.07.09 +[debug] youtube-dl version 2017.07.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 5f3ea6b..7d71fc5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.07.15 Core * [YoutubeDL] Don't expand environment variables in meta fields (#13637) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b6a147f..d7304ba 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -521,6 +521,7 @@ - **NextMedia**: 蘋果日報 - **NextMediaActionNews**: 蘋果日報 - 動新聞 - **NextTV**: 壹電視 + - **Nexx** - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** @@ -942,6 +943,7 @@ - **vk:wallpost** - **vlive** - **vlive:channel** + - **vlive:playlist** - **Vodlocker** - **VODPl** - **VODPlatform** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 14358a7..82e166f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.07.09' +__version__ = '2017.07.15' -- cgit v1.1 From 94b817edebb63c3d8485e1ae27cc394dd9e21f9d Mon Sep 17 00:00:00 2001 From: troywith77 <ruitang307@gmail.com> Date: Tue, 9 May 2017 13:10:18 +0800 Subject: [pearvideo] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pear.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 youtube_dl/extractor/pear.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9d34447..75c1a3d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -762,6 +762,7 @@ from .pandoratv import PandoraTVIE from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE +from .pear import PearIE from .people import PeopleIE from .periscope import ( PeriscopeIE, diff --git a/youtube_dl/extractor/pear.py b/youtube_dl/extractor/pear.py new file mode 100644 index 0000000..77fd468 --- /dev/null +++ b/youtube_dl/extractor/pear.py @@ -0,0 +1,34 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class PearIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.pearvideo.com/video_1076290', + 'info_dict': { + 'id': '1076290', + 'ext': 'mp4', + 'title': '小浣熊在主人家玻璃上滚石头:没砸', + 'description': '小浣熊找到一个小石头,仿佛发现了一个宝贝。它不停地用石头按在玻璃上,滚来滚去,吸引主人注意。', + 'url': 'http://video.pearvideo.com/mp4/short/20170508/cont-1076290-10438018-hd.mp4' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'<h1[^>]+class="video-tt">(.+)</h1>', webpage, 'title', fatal=False) + description = self._html_search_regex(r'<div[^>]+class="summary"[^>]*>([^<]+)<', webpage, 'description', fatal=False) + url = self._html_search_regex(r'hdUrl="(.*?)"', webpage, 'url', fatal=False) + + return { + 'id': video_id, + 'ext': 'mp4', + 'title': title, + 'description': description, + 'url': url + } -- cgit v1.1 From decf86044d17a8ec04e43a4805a0092622d976ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 03:06:04 +0700 Subject: [pearvideo] Improve (closes #13031) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/pear.py | 34 -------------------- youtube_dl/extractor/pearvideo.py | 63 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 35 deletions(-) delete mode 100644 youtube_dl/extractor/pear.py create mode 100644 youtube_dl/extractor/pearvideo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 75c1a3d..28f0d3f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -762,7 +762,7 @@ from .pandoratv import PandoraTVIE from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE -from .pear import PearIE +from .pearvideo import PearVideoIE from .people import PeopleIE from .periscope import ( PeriscopeIE, diff --git a/youtube_dl/extractor/pear.py b/youtube_dl/extractor/pear.py deleted file mode 100644 index 77fd468..0000000 --- a/youtube_dl/extractor/pear.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class PearIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.pearvideo.com/video_1076290', - 'info_dict': { - 'id': '1076290', - 'ext': 'mp4', - 'title': '小浣熊在主人家玻璃上滚石头:没砸', - 'description': '小浣熊找到一个小石头,仿佛发现了一个宝贝。它不停地用石头按在玻璃上,滚来滚去,吸引主人注意。', - 'url': 'http://video.pearvideo.com/mp4/short/20170508/cont-1076290-10438018-hd.mp4' - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex(r'<h1[^>]+class="video-tt">(.+)</h1>', webpage, 'title', fatal=False) - description = self._html_search_regex(r'<div[^>]+class="summary"[^>]*>([^<]+)<', webpage, 'description', fatal=False) - url = self._html_search_regex(r'hdUrl="(.*?)"', webpage, 'url', fatal=False) - - return { - 'id': video_id, - 'ext': 'mp4', - 'title': title, - 'description': description, - 'url': url - } diff --git a/youtube_dl/extractor/pearvideo.py b/youtube_dl/extractor/pearvideo.py new file mode 100644 index 0000000..1d77722 --- /dev/null +++ b/youtube_dl/extractor/pearvideo.py @@ -0,0 +1,63 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + qualities, + unified_timestamp, +) + + +class PearVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>\d+)' + _TEST = { + 'url': 'http://www.pearvideo.com/video_1076290', + 'info_dict': { + 'id': '1076290', + 'ext': 'mp4', + 'title': '小浣熊在主人家玻璃上滚石头:没砸', + 'description': 'md5:01d576b747de71be0ee85eb7cac25f9d', + 'timestamp': 1494275280, + 'upload_date': '20170508', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + quality = qualities( + ('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src')) + + formats = [{ + 'url': mobj.group('url'), + 'format_id': mobj.group('id'), + 'quality': quality(mobj.group('id')), + } for mobj in re.finditer( + r'(?P<id>[a-zA-Z]+)Url\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\2', + webpage)] + self._sort_formats(formats) + + title = self._search_regex( + (r'<h1[^>]+\bclass=(["\'])video-tt\1[^>]*>(?P<value>[^<]+)', + r'<[^>]+\bdata-title=(["\'])(?P<value>(?:(?!\1).)+)\1'), + webpage, 'title', group='value') + description = self._search_regex( + (r'<div[^>]+\bclass=(["\'])summary\1[^>]*>(?P<value>[^<]+)', + r'<[^>]+\bdata-summary=(["\'])(?P<value>(?:(?!\1).)+)\1'), + webpage, 'description', default=None, + group='value') or self._html_search_meta('Description', webpage) + timestamp = unified_timestamp(self._search_regex( + r'<div[^>]+\bclass=["\']date["\'][^>]*>([^<]+)', + webpage, 'timestamp', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'formats': formats, + } -- cgit v1.1 From 089b97cfee8553886d33cd52b7ede178cebd7034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 04:30:48 +0700 Subject: [nexx] Improve JS embed extraction --- youtube_dl/extractor/nexx.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index 60b42cb..12450d4 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -72,13 +72,17 @@ class NexxIE(InfoExtractor): entries = [] # JavaScript Integration - for domain_id, video_id in re.findall( - r'''(?isx) - <script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(\d+).+? - onPLAYReady.+? - _play\.init\s*\(.+?\s*,\s*(\d+)\s*,\s*.+?\) - ''', webpage): - entries.append('https://api.nexx.cloud/v3/%s/videos/byid/%s' % (domain_id, video_id)) + mobj = re.search( + r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)', + webpage) + if mobj: + domain_id = mobj.group('id') + for video_id in re.findall( + r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)', + webpage): + entries.append( + 'https://api.nexx.cloud/v3/%s/videos/byid/%s' + % (domain_id, video_id)) # TODO: support more embed formats -- cgit v1.1 From 3f59b0154a8b6dc85425edfbb3dfdc64f41a6ecb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 04:32:37 +0700 Subject: [nexx:embed] Add extractor for iframe embeds --- youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/generic.py | 31 ++++++++++++++++++++++++- youtube_dl/extractor/nexx.py | 46 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 28f0d3f..e8a066b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -653,7 +653,10 @@ from .nextmedia import ( AppleDailyIE, NextTVIE, ) -from .nexx import NexxIE +from .nexx import ( + NexxIE, + NexxEmbedIE, +) from .nfb import NFBIE from .nfl import NFLIE from .nhk import NhkVodIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 123a212..0ab2ef2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -36,7 +36,10 @@ from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) -from .nexx import NexxIE +from .nexx import ( + NexxIE, + NexxEmbedIE, +) from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE @@ -1566,6 +1569,27 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # Nexx iFrame embed + { + 'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html', + 'info_dict': { + 'id': '161464', + 'ext': 'mp4', + 'title': 'Nervenkitzel Achterbahn', + 'alt_title': 'Karussellbauer in Deutschland', + 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', + 'release_year': 2005, + 'creator': 'SPIEGEL TV', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2761, + 'timestamp': 1394021479, + 'upload_date': '20140305', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, # Facebook <iframe> embed { 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html', @@ -2155,6 +2179,11 @@ class GenericIE(InfoExtractor): if nexx_urls: return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key()) + # Look for Nexx iFrame embeds + nexx_embed_urls = NexxEmbedIE._extract_urls(webpage) + if nexx_embed_urls: + return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key()) + # Look for ThePlatform embeds tp_urls = ThePlatformIE._extract_urls(webpage) if tp_urls: diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index 12450d4..e296027 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -88,6 +88,10 @@ class NexxIE(InfoExtractor): return entries + @staticmethod + def _extract_url(webpage): + return NexxIE._extract_urls(webpage)[0] + def _handle_error(self, response): status = int_or_none(try_get( response, lambda x: x['metadata']['status']) or 200) @@ -223,3 +227,45 @@ class NexxIE(InfoExtractor): video, lambda x: x['episodedata']['season'])), 'formats': formats, } + + +class NexxEmbedIE(InfoExtractor): + _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1', + 'md5': '16746bfc28c42049492385c989b26c4a', + 'info_dict': { + 'id': '161464', + 'ext': 'mp4', + 'title': 'Nervenkitzel Achterbahn', + 'alt_title': 'Karussellbauer in Deutschland', + 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', + 'release_year': 2005, + 'creator': 'SPIEGEL TV', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2761, + 'timestamp': 1394021479, + 'upload_date': '20140305', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + } + + @staticmethod + def _extract_urls(webpage): + # Reference: + # 1. https://nx-s.akamaized.net/files/201510/44.pdf + + # iFrame Embed Integration + return [mobj.group('url') for mobj in re.finditer( + r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1', + webpage)] + + def _real_extract(self, url): + embed_id = self._match_id(url) + + webpage = self._download_webpage(url, embed_id) + + return self.url_result(NexxIE._extract_url(webpage), ie=NexxIE.ie_key()) -- cgit v1.1 From 749ca5eced0b9a8ed1ef79f4ee80908e0ac242c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 04:33:14 +0700 Subject: [extractor/common] Fix playlist_from_matches --- youtube_dl/extractor/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index daa1088..748b4d5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -730,12 +730,12 @@ class InfoExtractor(object): video_info['title'] = video_title return video_info - def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None): - urlrs = orderedSet( + def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None): + urls = orderedSet( self.url_result(self._proto_relative_url(getter(m) if getter else m), ie) for m in matches) return self.playlist_result( - urlrs, playlist_id=video_id, playlist_title=video_title) + urls, playlist_id=playlist_id, playlist_title=playlist_title) @staticmethod def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None): -- cgit v1.1 From 00d06e3cfcb7bfffa3b585694525f05a6ce36af9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 04:38:20 +0700 Subject: [spiegel:article] Add support for nexx iframe embeds (closes #13029) --- youtube_dl/extractor/spiegel.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index ec1b603..8598377 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from .nexx import NexxEmbedIE from .spiegeltv import SpiegeltvIE from ..compat import compat_urlparse from ..utils import ( @@ -143,6 +144,9 @@ class SpiegelArticleIE(InfoExtractor): entries = [ self.url_result(compat_urlparse.urljoin( self.http_scheme() + '//spiegel.de/', embed_path)) - for embed_path in embeds - ] - return self.playlist_result(entries) + for embed_path in embeds] + if embeds: + return self.playlist_result(entries) + + return self.playlist_from_matches( + NexxEmbedIE._extract_urls(webpage), ie=NexxEmbedIE.ie_key()) -- cgit v1.1 From 13eb526f111dc9b98421f3e05287980b88766409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 05:23:19 +0700 Subject: [nexx:embed] PEP 8 --- youtube_dl/extractor/nexx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index e296027..d0235fd 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -260,7 +260,7 @@ class NexxEmbedIE(InfoExtractor): # iFrame Embed Integration return [mobj.group('url') for mobj in re.finditer( - r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1', + r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1', webpage)] def _real_extract(self, url): -- cgit v1.1 From 7abed4e06c8935026873a9c01821d5a5b2d80d4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 12:40:45 +0700 Subject: [crunchyroll] Relax series and season regex (closes #13659) --- youtube_dl/extractor/crunchyroll.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 2ffa4a7..8bdaf0c 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -510,7 +510,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text # webpage provide more accurate data than series_title from XML series = self._html_search_regex( - r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)', + r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d', webpage, 'series', fatal=False) season = xpath_text(metadata, 'series_title') @@ -518,7 +518,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text episode_number = int_or_none(xpath_text(metadata, 'episode_number')) season_number = int_or_none(self._search_regex( - r'(?s)<h4[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h4>\s*<h4>\s*Season (\d+)', + r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)', webpage, 'season number', default=None)) return { -- cgit v1.1 From 83d00044c1c9aef487f1c30bb50246e9ad039636 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 20:48:09 +0700 Subject: [adn] Improve error reporting (#13663) --- youtube_dl/extractor/adn.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index 39f80b2..cffdab6 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -107,11 +107,13 @@ class ADNIE(InfoExtractor): metas = options.get('metas') or {} title = metas.get('title') or video_info['title'] links = player_config.get('links') or {} + error = None if not links: links_url = player_config['linksurl'] links_data = self._download_json(urljoin( self._BASE_URL, links_url), video_id) links = links_data.get('links') or {} + error = links_data.get('error') formats = [] for format_id, qualities in links.items(): @@ -130,7 +132,8 @@ class ADNIE(InfoExtractor): for f in m3u8_formats: f['language'] = 'fr' formats.extend(m3u8_formats) - error = options.get('error') + if not error: + error = options.get('error') if not formats and error: raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) self._sort_formats(formats) -- cgit v1.1 From bb176df3bbef62dcf958dcc542f778686e44db63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 17 Jul 2017 22:19:40 +0700 Subject: [spiegel:article] Move test --- youtube_dl/extractor/generic.py | 21 --------------------- youtube_dl/extractor/spiegel.py | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0ab2ef2..36c81ed 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1569,27 +1569,6 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, - # Nexx iFrame embed - { - 'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html', - 'info_dict': { - 'id': '161464', - 'ext': 'mp4', - 'title': 'Nervenkitzel Achterbahn', - 'alt_title': 'Karussellbauer in Deutschland', - 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', - 'release_year': 2005, - 'creator': 'SPIEGEL TV', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 2761, - 'timestamp': 1394021479, - 'upload_date': '20140305', - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - }, # Facebook <iframe> embed { 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html', diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 8598377..84298fe 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -122,6 +122,26 @@ class SpiegelArticleIE(InfoExtractor): }, 'playlist_count': 6, + }, { + # Nexx iFrame embed + 'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html', + 'info_dict': { + 'id': '161464', + 'ext': 'mp4', + 'title': 'Nervenkitzel Achterbahn', + 'alt_title': 'Karussellbauer in Deutschland', + 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', + 'release_year': 2005, + 'creator': 'SPIEGEL TV', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2761, + 'timestamp': 1394021479, + 'upload_date': '20140305', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, }] def _real_extract(self, url): -- cgit v1.1 From d20b1c6725fce956b44413cced449b7d09b12de2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Jul 2017 18:14:14 +0800 Subject: [dispeak] Recognize sevt subdomain (closes #13276) --- ChangeLog | 6 ++++++ youtube_dl/extractor/dispeak.py | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 7d71fc5..8e442d1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [dispeak] Recognize sevt subdomain (#13276) + + version 2017.07.15 Core diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py index a78cb8a..c05f601 100644 --- a/youtube_dl/extractor/dispeak.py +++ b/youtube_dl/extractor/dispeak.py @@ -13,7 +13,7 @@ from ..utils import ( class DigitallySpeakingIE(InfoExtractor): - _VALID_URL = r'https?://(?:evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml' + _VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml' _TESTS = [{ # From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface @@ -28,6 +28,10 @@ class DigitallySpeakingIE(InfoExtractor): # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC 'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml', 'only_matching': True, + }, { + # From http://www.gdcvault.com/play/1013700/Advanced-Material + 'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml', + 'only_matching': True, }] def _parse_mp4(self, metadata): -- cgit v1.1 From 85f5a74b6cf44b0c8b612c264c36eaabd958f501 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Jul 2017 21:19:09 +0800 Subject: [tbs] Mark as broken and skip invalid tests --- youtube_dl/extractor/tbs.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py index bf93eb8..e947453 100644 --- a/youtube_dl/extractor/tbs.py +++ b/youtube_dl/extractor/tbs.py @@ -8,6 +8,9 @@ from ..utils import extract_attributes class TBSIE(TurnerBaseIE): + # https://github.com/rg3/youtube-dl/issues/13658 + _WORKING = False + _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html' _TESTS = [{ 'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html', @@ -17,7 +20,8 @@ class TBSIE(TurnerBaseIE): 'ext': 'mp4', 'title': 'Theatrical Trailer', 'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.', - } + }, + 'skip': 'TBS videos are deleted after a while', }, { 'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html', 'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56', @@ -26,7 +30,8 @@ class TBSIE(TurnerBaseIE): 'ext': 'mp4', 'title': 'You Better Run', 'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.', - } + }, + 'skip': 'TBS videos are deleted after a while', }] def _real_extract(self, url): -- cgit v1.1 From fa63cf6c2301972b7d0ae76fb7a11c7d1a2786a9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Jul 2017 22:57:51 +0800 Subject: [youku:show] Fix playlist extraction (closes #13248) --- ChangeLog | 1 + youtube_dl/extractor/youku.py | 66 +++++++++++++++++++++++++------------------ 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8e442d1..a83523c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [youku:show] Fix playlist extraction (#13248) + [dispeak] Recognize sevt subdomain (#13276) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index dcce15d..4ae9adb 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import itertools import random import re import string @@ -222,50 +221,61 @@ class YoukuShowIE(InfoExtractor): _VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html' IE_NAME = 'youku:show' - _TEST = { + _TESTS = [{ 'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html', 'info_dict': { 'id': 'zc7c670be07ff11e48b3f', - 'title': '花千骨 未删减版', + 'title': '花千骨 DVD版', 'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558', }, 'playlist_count': 50, - } + }, { + # Episode number not starting from 1 + 'url': 'http://list.youku.com/show/id_zefbfbd70efbfbd780bef.html', + 'info_dict': { + 'id': 'zefbfbd70efbfbd780bef', + 'title': '超级飞侠3', + 'description': 'md5:275715156abebe5ccc2a1992e9d56b98', + }, + 'playlist_count': 24, + }] - _PAGE_SIZE = 40 + def _extract_entries(self, playlist_data_url, show_id, idx, query, url): + query['callback'] = 'cb' + playlist_data = self._download_json( + playlist_data_url, show_id, query=query, + note='Downloading playlist data page %d' % (idx + 1), + transform_source=lambda s: js_to_json(strip_jsonp(s)))['html'] + drama_list = (get_element_by_class('p-drama-grid', playlist_data) or + get_element_by_class('p-drama-half-row', playlist_data)) + if drama_list is None: + raise ExtractorError('No episodes found') + video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list) + return playlist_data, [ + self.url_result(urljoin(url, video_url), YoukuIE.ie_key()) + for video_url in video_urls] def _real_extract(self, url): show_id = self._match_id(url) webpage = self._download_webpage(url, show_id) - entries = [] page_config = self._parse_json(self._search_regex( r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'), show_id, transform_source=js_to_json) - for idx in itertools.count(0): - if idx == 0: - playlist_data_url = 'http://list.youku.com/show/module' - query = {'id': page_config['showid'], 'tab': 'point'} - else: - playlist_data_url = 'http://list.youku.com/show/point' - query = { + first_page, entries = self._extract_entries( + 'http://list.youku.com/show/module', show_id, 0, { + 'id': page_config['showid'], + 'tab': 'showInfo', + }, url) + # The first reload_id has the same items as first_page + reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)[1:] + for idx, reload_id in enumerate(reload_ids): + _, new_entries = self._extract_entries( + 'http://list.youku.com/show/episode', show_id, idx + 1, { 'id': page_config['showid'], - 'stage': 'reload_%d' % (self._PAGE_SIZE * idx + 1), - } - query['callback'] = 'cb' - playlist_data = self._download_json( - playlist_data_url, show_id, query=query, - note='Downloading playlist data page %d' % (idx + 1), - transform_source=lambda s: js_to_json(strip_jsonp(s)))['html'] - video_urls = re.findall( - r'<div[^>]+class="p-thumb"[^<]+<a[^>]+href="([^"]+)"', - playlist_data) - new_entries = [ - self.url_result(urljoin(url, video_url), YoukuIE.ie_key()) - for video_url in video_urls] + 'stage': reload_id, + }, url) entries.extend(new_entries) - if len(new_entries) < self._PAGE_SIZE: - break desc = self._html_search_meta('description', webpage, fatal=False) playlist_title = desc.split(',')[0] if desc else None -- cgit v1.1 From 3fcf346ac16e6fe1963a3eab861d6bd9c32ce6db Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Jul 2017 23:20:46 +0800 Subject: [youku:show] Refine playlist extraction Handle playlists that the initial page is not the first page --- youtube_dl/extractor/youku.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 4ae9adb..0c4bc2e 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -13,7 +13,6 @@ from ..utils import ( js_to_json, str_or_none, strip_jsonp, - urljoin, ) @@ -238,13 +237,16 @@ class YoukuShowIE(InfoExtractor): 'description': 'md5:275715156abebe5ccc2a1992e9d56b98', }, 'playlist_count': 24, + }, { + # Ongoing playlist. The initial page is the last one + 'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html', + 'only_matchine': True, }] - def _extract_entries(self, playlist_data_url, show_id, idx, query, url): + def _extract_entries(self, playlist_data_url, show_id, note, query): query['callback'] = 'cb' playlist_data = self._download_json( - playlist_data_url, show_id, query=query, - note='Downloading playlist data page %d' % (idx + 1), + playlist_data_url, show_id, query=query, note=note, transform_source=lambda s: js_to_json(strip_jsonp(s)))['html'] drama_list = (get_element_by_class('p-drama-grid', playlist_data) or get_element_by_class('p-drama-half-row', playlist_data)) @@ -252,29 +254,39 @@ class YoukuShowIE(InfoExtractor): raise ExtractorError('No episodes found') video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list) return playlist_data, [ - self.url_result(urljoin(url, video_url), YoukuIE.ie_key()) + self.url_result(self._proto_relative_url(video_url, 'http:'), YoukuIE.ie_key()) for video_url in video_urls] def _real_extract(self, url): show_id = self._match_id(url) webpage = self._download_webpage(url, show_id) + entries = [] page_config = self._parse_json(self._search_regex( r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'), show_id, transform_source=js_to_json) - first_page, entries = self._extract_entries( - 'http://list.youku.com/show/module', show_id, 0, { + first_page, initial_entries = self._extract_entries( + 'http://list.youku.com/show/module', show_id, + note='Downloading initial playlist data page', + query={ 'id': page_config['showid'], 'tab': 'showInfo', - }, url) + }) + first_page_reload_id = self._html_search_regex( + r'<div[^>]+id="(reload_\d+)', first_page, 'first page reload id') # The first reload_id has the same items as first_page - reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)[1:] + reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page) for idx, reload_id in enumerate(reload_ids): + if reload_id == first_page_reload_id: + entries.extend(initial_entries) + continue _, new_entries = self._extract_entries( - 'http://list.youku.com/show/episode', show_id, idx + 1, { + 'http://list.youku.com/show/episode', show_id, + note='Downloading playlist data page %d' % (idx + 1), + query={ 'id': page_config['showid'], 'stage': reload_id, - }, url) + }) entries.extend(new_entries) desc = self._html_search_meta('description', webpage, fatal=False) -- cgit v1.1 From c653326a1425f4c271f387fde7a706bf4b52a7a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 20 Jul 2017 22:49:52 +0700 Subject: [funnyordie] Extract more metadata (closes #13677) --- youtube_dl/extractor/funnyordie.py | 64 +++++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 4940936..f85e7de 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -1,10 +1,14 @@ from __future__ import unicode_literals -import json import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + unified_timestamp, +) class FunnyOrDieIE(InfoExtractor): @@ -18,6 +22,10 @@ class FunnyOrDieIE(InfoExtractor): 'title': 'Heart-Shaped Box: Literal Video Version', 'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338', 'thumbnail': r're:^http:.*\.jpg$', + 'uploader': 'DASjr', + 'timestamp': 1317904928, + 'upload_date': '20111006', + 'duration': 318.3, }, }, { 'url': 'http://www.funnyordie.com/embed/e402820827', @@ -27,6 +35,8 @@ class FunnyOrDieIE(InfoExtractor): 'title': 'Please Use This Song (Jon Lajoie)', 'description': 'Please use this to sell something. www.jonlajoie.com', 'thumbnail': r're:^http:.*\.jpg$', + 'timestamp': 1398988800, + 'upload_date': '20140502', }, 'params': { 'skip_download': True, @@ -100,15 +110,53 @@ class FunnyOrDieIE(InfoExtractor): 'url': 'http://www.funnyordie.com%s' % src, }] - post_json = self._search_regex( - r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details') - post = json.loads(post_json) + timestamp = unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp', default=None)) + + uploader = self._html_search_regex( + r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h', + webpage, 'uploader', default=None) + + title, description, thumbnail, duration = [None] * 4 + + medium = self._parse_json( + self._search_regex( + r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium', + default='{}'), + video_id, fatal=False) + if medium: + title = medium.get('title') + duration = float_or_none(medium.get('duration')) + if not timestamp: + timestamp = unified_timestamp(medium.get('publishDate')) + + post = self._parse_json( + self._search_regex( + r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details', + default='{}'), + video_id, fatal=False) + if post: + if not title: + title = post.get('name') + description = post.get('description') + thumbnail = post.get('picture') + + if not title: + title = self._og_search_title(webpage) + if not description: + description = self._og_search_description(webpage) + if not duration: + duration = int_or_none(self._html_search_meta( + ('video:duration', 'duration'), webpage, 'duration', default=False)) return { 'id': video_id, - 'title': post['name'], - 'description': post.get('description'), - 'thumbnail': post.get('picture'), + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'timestamp': timestamp, + 'duration': duration, 'formats': formats, 'subtitles': subtitles, } -- cgit v1.1 From dc6520aa3d1fe7afc52613e392f15dde90af4844 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 20 Jul 2017 23:22:36 +0700 Subject: [egghead:lesson] Add extractor (#6635) --- youtube_dl/extractor/egghead.py | 49 ++++++++++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 5 +++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index c86f523..e4a3046 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -2,6 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + unified_timestamp, +) class EggheadCourseIE(InfoExtractor): @@ -33,3 +38,47 @@ class EggheadCourseIE(InfoExtractor): return self.playlist_result( entries, playlist_id, course.get('title'), course.get('description')) + + +class EggheadLessonIE(InfoExtractor): + IE_DESC = 'egghead.io lesson' + IE_NAME = 'egghead:lesson' + _VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', + 'info_dict': { + 'id': 'fv5yotjxcg', + 'ext': 'mp4', + 'title': 'Create linear data flow with container style types (Box)', + 'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e', + 'thumbnail': r're:^https?:.*\.jpg$', + 'timestamp': 1481296768, + 'upload_date': '20161209', + 'duration': 304, + 'view_count': 0, + 'tags': ['javascript', 'free'], + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + lesson_id = self._match_id(url) + + lesson = self._download_json( + 'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id) + + return { + '_type': 'url_transparent', + 'ie_key': 'Wistia', + 'url': 'wistia:%s' % lesson['wistia_id'], + 'id': lesson['wistia_id'], + 'title': lesson.get('title'), + 'description': lesson.get('summary'), + 'thumbnail': lesson.get('thumb_nail'), + 'timestamp': unified_timestamp(lesson.get('published_at')), + 'duration': int_or_none(lesson.get('duration')), + 'view_count': int_or_none(lesson.get('plays_count')), + 'tags': try_get(lesson, lambda x: x['tag_list'], list), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e8a066b..db7616c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -298,7 +298,10 @@ from .dw import ( from .eagleplatform import EaglePlatformIE from .ebaumsworld import EbaumsWorldIE from .echomsk import EchoMskIE -from .egghead import EggheadCourseIE +from .egghead import ( + EggheadCourseIE, + EggheadLessonIE, +) from .ehow import EHowIE from .eighttracks import EightTracksIE from .einthusan import EinthusanIE -- cgit v1.1 From 0396806f671e5828c2abdeb8048acf8b654507b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 21 Jul 2017 00:13:32 +0700 Subject: [YoutubeDL] Do not override id, extractor and extractor_key in url_transparent All these meta fields must be borrowed from final extractor that actually performs extraction. This commit fixes extractor id in download archives for url_transparent downloads. Previously, 'transparent' extractor was erroneously used for extractor archive id, e.g. 'eggheadlesson 4n8ugwwj5t' instead of 'wistia 4n8ugwwj5t'. --- test/test_YoutubeDL.py | 7 ++++++- youtube_dl/YoutubeDL.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 70989e2..e0decb8 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -41,6 +41,7 @@ def _make_result(formats, **kwargs): 'id': 'testid', 'title': 'testttitle', 'extractor': 'testex', + 'extractor_key': 'TestEx', } res.update(**kwargs) return res @@ -761,7 +762,8 @@ class TestYoutubeDL(unittest.TestCase): '_type': 'url_transparent', 'url': 'foo2:', 'ie_key': 'Foo2', - 'title': 'foo1 title' + 'title': 'foo1 title', + 'id': 'foo1_id', } class Foo2IE(InfoExtractor): @@ -787,6 +789,9 @@ class TestYoutubeDL(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['url'], TEST_URL) self.assertEqual(downloaded['title'], 'foo1 title') + self.assertEqual(downloaded['id'], 'testid') + self.assertEqual(downloaded['extractor'], 'testex') + self.assertEqual(downloaded['extractor_key'], 'TestEx') if __name__ == '__main__': diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 89c07be..f94836d 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -860,7 +860,7 @@ class YoutubeDL(object): force_properties = dict( (k, v) for k, v in ie_result.items() if v is not None) - for f in ('_type', 'url', 'ie_key'): + for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'): if f in force_properties: del force_properties[f] new_result = info.copy() -- cgit v1.1 From 7d9a1db1110b13e8e6b65613ebb3daf7f0ff3c4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 22 Jul 2017 11:40:46 +0700 Subject: [dramafever] Remove video id from title (closes #13699) --- youtube_dl/extractor/dramafever.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index e7abc88..03fa3aa 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -12,6 +12,7 @@ from ..utils import ( ExtractorError, clean_html, int_or_none, + remove_end, sanitized_Request, urlencode_postdata ) @@ -73,7 +74,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'info_dict': { 'id': '4512.1', 'ext': 'mp4', - 'title': 'Cooking with Shin 4512.1', + 'title': 'Cooking with Shin', 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', 'episode': 'Episode 1', 'episode_number': 1, @@ -91,7 +92,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'info_dict': { 'id': '4826.4', 'ext': 'mp4', - 'title': 'Mnet Asian Music Awards 2015 4826.4', + 'title': 'Mnet Asian Music Awards 2015', 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91', 'episode': 'Mnet Asian Music Awards 2015 - Part 3', 'episode_number': 4, @@ -122,6 +123,10 @@ class DramaFeverIE(DramaFeverBaseIE): countries=self._GEO_COUNTRIES) raise + # title is postfixed with video id for some reason, removing + if info.get('title'): + info['title'] = remove_end(info['title'], video_id).strip() + series_id, episode_number = video_id.split('.') episode_info = self._download_json( # We only need a single episode info, so restricting page size to one episode -- cgit v1.1 From f76c02c87b479310b0e090216895879257b1062a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 22 Jul 2017 11:41:40 +0700 Subject: [dramafever] Fix tests --- youtube_dl/extractor/dramafever.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 03fa3aa..9a498d7 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -73,7 +73,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', 'info_dict': { 'id': '4512.1', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Cooking with Shin', 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', 'episode': 'Episode 1', @@ -81,7 +81,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1404336058, 'upload_date': '20140702', - 'duration': 343, + 'duration': 344, }, 'params': { # m3u8 download @@ -91,7 +91,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1', 'info_dict': { 'id': '4826.4', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Mnet Asian Music Awards 2015', 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91', 'episode': 'Mnet Asian Music Awards 2015 - Part 3', @@ -99,7 +99,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1450213200, 'upload_date': '20151215', - 'duration': 5602, + 'duration': 5359, }, 'params': { # m3u8 download -- cgit v1.1 From 359aa2fdd145d11a29a04f620fed95acbf142f66 Mon Sep 17 00:00:00 2001 From: dubber0 <rexa.mose@gmail.com> Date: Sat, 22 Jul 2017 14:15:55 +0200 Subject: [npo] Add support for npo3.nl URLs --- youtube_dl/extractor/npo.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 516b1e9..fa4ef20 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -28,7 +28,7 @@ class NPOBaseIE(InfoExtractor): class NPOIE(NPOBaseIE): IE_NAME = 'npo' - IE_DESC = 'npo.nl and ntr.nl' + IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl' _VALID_URL = r'''(?x) (?: npo:| @@ -38,7 +38,7 @@ class NPOIE(NPOBaseIE): npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}| ntr\.nl/(?:[^/]+/){2,}| omroepwnl\.nl/video/fragment/[^/]+__| - zapp\.nl/[^/]+/[^/]+/ + (?:zapp|npo3)\.nl/(?:[^/]+/){2} ) ) (?P<id>[^/?#]+) @@ -147,6 +147,9 @@ class NPOIE(NPOBaseIE): 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', 'only_matching': True, }, { + 'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870', + 'only_matching': True, + }, { # live stream 'url': 'npo:LI_NL1_4188102', 'only_matching': True, -- cgit v1.1 From 327c8364f11f23dd919e8009c6adb021c34054fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 22 Jul 2017 21:35:14 +0700 Subject: [sportbox:embed] Fix extraction --- youtube_dl/extractor/sportbox.py | 61 ++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index e7bd5bf..54497c8 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -4,7 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + determine_ext, + int_or_none, + js_to_json, +) class SportBoxEmbedIE(InfoExtractor): @@ -14,8 +18,10 @@ class SportBoxEmbedIE(InfoExtractor): 'info_dict': { 'id': '211355', 'ext': 'mp4', - 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', + 'title': '211355', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 292, + 'view_count': int, }, 'params': { # m3u8 download @@ -24,6 +30,9 @@ class SportBoxEmbedIE(InfoExtractor): }, { 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580', 'only_matching': True, + }, { + 'url': 'https://news.sportbox.ru/vdl/player/media/193095', + 'only_matching': True, }] @staticmethod @@ -37,36 +46,34 @@ class SportBoxEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - formats = [] - - def cleanup_js(code): - # desktop_advert_config contains complex Javascripts and we don't need it - return js_to_json(re.sub(r'desktop_advert_config.*', '', code)) - - jwplayer_data = self._parse_json(self._search_regex( - r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id, - transform_source=cleanup_js) - - hls_url = jwplayer_data.get('hls_url') - if hls_url: - formats.extend(self._extract_m3u8_formats( - hls_url, video_id, ext='mp4', m3u8_id='hls')) - - rtsp_url = jwplayer_data.get('rtsp_url') - if rtsp_url: - formats.append({ - 'url': rtsp_url, - 'format_id': 'rtsp', - }) + wjplayer_data = self._parse_json( + self._search_regex( + r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'), + video_id, transform_source=js_to_json) + formats = [] + for source in wjplayer_data['sources']: + src = source.get('src') + if not src: + continue + if determine_ext(src) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src, + }) self._sort_formats(formats) - title = jwplayer_data['node_title'] - thumbnail = jwplayer_data.get('image_url') + view_count = int_or_none(self._search_regex( + r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None)) return { 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, + 'title': video_id, + 'thumbnail': wjplayer_data.get('poster'), + 'duration': int_or_none(wjplayer_data.get('duration')), + 'view_count': view_count, 'formats': formats, } -- cgit v1.1 From 0017d9ad6de831384e74db14a821e4c94020c9ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 00:12:01 +0700 Subject: [YoutubeDL] Improve default format specification (closes #13704) --- test/test_YoutubeDL.py | 11 +++++++++++ youtube_dl/YoutubeDL.py | 31 +++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e0decb8..4af14f9 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -449,6 +449,17 @@ class TestFormatSelection(unittest.TestCase): pass self.assertEqual(ydl.downloaded_info_dicts, []) + def test_default_format_spec(self): + ydl = YDL({'simulate': True}) + self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') + + ydl = YDL({'outtmpl': '-'}) + self.assertEqual(ydl._default_format_spec({}), 'best') + + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best') + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best') + class TestYoutubeDL(unittest.TestCase): def test_subtitles(self): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f94836d..367ae35 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1064,6 +1064,25 @@ class YoutubeDL(object): return op(actual_value, comparison_value) return _filter + def _default_format_spec(self, info_dict, download=True): + req_format_list = [] + + def can_have_partial_formats(): + if self.params.get('simulate', False): + return True + if not download: + return True + if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': + return False + if info_dict.get('is_live'): + return False + merger = FFmpegMergerPP(self) + return merger.available and merger.can_merge() + if can_have_partial_formats(): + req_format_list.append('bestvideo+bestaudio') + req_format_list.append('best') + return '/'.join(req_format_list) + def build_format_selector(self, format_spec): def syntax_error(note, start): message = ( @@ -1534,14 +1553,10 @@ class YoutubeDL(object): req_format = self.params.get('format') if req_format is None: - req_format_list = [] - if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and - not info_dict.get('is_live')): - merger = FFmpegMergerPP(self) - if merger.available and merger.can_merge(): - req_format_list.append('bestvideo+bestaudio') - req_format_list.append('best') - req_format = '/'.join(req_format_list) + req_format = self._default_format_spec(info_dict, download=download) + if self.params.get('verbose'): + self.to_stdout('[debug] Default format spec: %s' % req_format) + format_selector = self.build_format_selector(req_format) # While in format selection we may need to have an access to the original -- cgit v1.1 From e0f1fb0a27612c2398df59dd85194edfdf8cbc2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 00:25:23 +0700 Subject: [mtv] Skip missing video parts (closes #13690) --- youtube_dl/extractor/mtv.py | 28 ++++++++++++++++++++++------ youtube_dl/extractor/vh1.py | 12 ++++++++---- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 8acea14..fc098cd 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -83,7 +83,7 @@ class MTVServicesInfoExtractor(InfoExtractor): hls_url = rendition.find('./src').text formats.extend(self._extract_m3u8_formats( hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id='hls')) + m3u8_id='hls', fatal=False)) else: # fms try: @@ -106,7 +106,8 @@ class MTVServicesInfoExtractor(InfoExtractor): }]) except (KeyError, TypeError): raise ExtractorError('Invalid rendition field.') - self._sort_formats(formats) + if formats: + self._sort_formats(formats) return formats def _extract_subtitles(self, mdoc, mtvn_id): @@ -133,8 +134,11 @@ class MTVServicesInfoExtractor(InfoExtractor): mediagen_url += 'acceptMethods=' mediagen_url += 'hls' if use_hls else 'fms' - mediagen_doc = self._download_xml(mediagen_url, video_id, - 'Downloading video urls') + mediagen_doc = self._download_xml( + mediagen_url, video_id, 'Downloading video urls', fatal=False) + + if mediagen_doc is False: + return None item = mediagen_doc.find('./video/item') if item is not None and item.get('type') == 'text': @@ -174,6 +178,13 @@ class MTVServicesInfoExtractor(InfoExtractor): formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id) + # Some parts of complete video may be missing (e.g. missing Act 3 in + # http://www.southpark.de/alle-episoden/s14e01-sexual-healing) + if not formats: + return None + + self._sort_formats(formats) + return { 'title': title, 'formats': formats, @@ -205,9 +216,14 @@ class MTVServicesInfoExtractor(InfoExtractor): title = xpath_text(idoc, './channel/title') description = xpath_text(idoc, './channel/description') + entries = [] + for item in idoc.findall('.//item'): + info = self._get_video_info(item, use_hls) + if info: + entries.append(info) + return self.playlist_result( - [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')], - playlist_title=title, playlist_description=description) + entries, playlist_title=title, playlist_description=description) def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): triforce_feed = self._parse_json(self._search_regex( diff --git a/youtube_dl/extractor/vh1.py b/youtube_dl/extractor/vh1.py index 6be3774..570fa45 100644 --- a/youtube_dl/extractor/vh1.py +++ b/youtube_dl/extractor/vh1.py @@ -121,7 +121,11 @@ class VH1IE(MTVIE): idoc = self._download_xml( doc_url, video_id, 'Downloading info', transform_source=fix_xml_ampersands) - return self.playlist_result( - [self._get_video_info(item) for item in idoc.findall('.//item')], - playlist_id=video_id, - ) + + entries = [] + for item in idoc.findall('.//item'): + info = self._get_video_info(item) + if info: + entries.append(info) + + return self.playlist_result(entries, playlist_id=video_id) -- cgit v1.1 From 935d6c20c00536cf39cf2844295266e64492bb10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 00:44:50 +0700 Subject: [vidio] Make duration non fatal and fix typo --- youtube_dl/extractor/vidio.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py index 701bb1d..01da32f 100644 --- a/youtube_dl/extractor/vidio.py +++ b/youtube_dl/extractor/vidio.py @@ -56,7 +56,8 @@ class VidioIE(InfoExtractor): self._sort_formats(formats) duration = int_or_none(duration or self._search_regex( - r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration')) + r'data-video-duration=(["\'])(?P<duration>\d+)\1', webpage, + 'duration', fatal=False, group='duration')) thumbnail = thumbnail or self._og_search_thumbnail(webpage) like_count = int_or_none(self._search_regex( -- cgit v1.1 From 71dde5eecf07ae3a8871e5d4a05a944097e17fb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 00:59:07 +0700 Subject: [itv] Fix production id extraction (closes #13671) --- youtube_dl/extractor/itv.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index f315680..26c48e4 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -59,12 +59,18 @@ class ITVIE(InfoExtractor): def _add_sub_element(element, name): return etree.SubElement(element, _add_ns(name)) + production_id = ( + params.get('data-video-autoplay-id') or + '%s#001' % ( + params.get('data-video-episode-id') or + video_id.replace('a', '/'))) + req_env = etree.Element(_add_ns('soapenv:Envelope')) _add_sub_element(req_env, 'soapenv:Header') body = _add_sub_element(req_env, 'soapenv:Body') get_playlist = _add_sub_element(body, ('tem:GetPlaylist')) request = _add_sub_element(get_playlist, 'tem:request') - _add_sub_element(request, 'itv:ProductionId').text = params['data-video-id'] + _add_sub_element(request, 'itv:ProductionId').text = production_id _add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper() vodcrid = _add_sub_element(request, 'itv:Vodcrid') _add_sub_element(vodcrid, 'com:Id') -- cgit v1.1 From 425f41319aec6940195818e980005cef4946eb75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 01:06:08 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index a83523c..cd8a34a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,28 @@ version <unreleased> +Core +* [YoutubeDL] Improve default format specification (#13704) +* [YoutubeDL] Do not override id, extractor and extractor_key for + url_transparent entities +* [extractor/common] Fix playlist_from_matches + Extractors +* [itv] Fix production id extraction (#13671, #13703) +* [vidio] Make duration non fatal and fix typo +* [mtv] Skip missing video parts (#13690) +* [sportbox:embed] Fix extraction ++ [npo] Add support for npo3.nl URLs (#13695) +* [dramafever] Remove video id from title (#13699) ++ [egghead:lesson] Add support for lessons (#6635) +* [funnyordie] Extract more metadata (#13677) * [youku:show] Fix playlist extraction (#13248) + [dispeak] Recognize sevt subdomain (#13276) +* [adn] Improve error reporting (#13663) +* [crunchyroll] Relax series and season regex (#13659) ++ [spiegel:article] Add support for nexx iframe embeds (#13029) ++ [nexx:embed] Add support for iframe embeds +* [nexx] Improve JS embed extraction ++ [pearvideo] Add support for pearvideo.com (#13031) version 2017.07.15 -- cgit v1.1 From 0db492c02a60dbfb44514833445bf267f5319ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 01:09:09 +0700 Subject: release 2017.07.23 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 13 ++++++++----- youtube_dl/version.py | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0f20d04..37d09d7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.23** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.07.15 +[debug] youtube-dl version 2017.07.23 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index cd8a34a..302d32a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.07.23 Core * [YoutubeDL] Improve default format specification (#13704) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d7304ba..eb09c47 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -42,7 +42,7 @@ - **Allocine** - **AlphaPorno** - **AMCNetworks** - - **anderetijden**: npo.nl and ntr.nl + - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **AnimeOnDemand** - **anitube.se** - **Anvato** @@ -238,6 +238,7 @@ - **EbaumsWorld** - **EchoMsk** - **egghead:course**: egghead.io course + - **egghead:lesson**: egghead.io lesson - **eHow** - **Einthusan** - **eitb.tv** @@ -522,6 +523,7 @@ - **NextMediaActionNews**: 蘋果日報 - 動新聞 - **NextTV**: 壹電視 - **Nexx** + - **NexxEmbed** - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** @@ -552,7 +554,7 @@ - **NowTVList** - **nowvideo**: NowVideo - **Noz** - - **npo**: npo.nl and ntr.nl + - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **npo.nl:live** - **npo.nl:radio** - **npo.nl:radio:fragment** @@ -596,6 +598,7 @@ - **Patreon** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **pcmag** + - **PearVideo** - **People** - **periscope**: Periscope - **periscope:user**: Periscope user videos @@ -772,7 +775,7 @@ - **tagesschau:player** - **Tass** - **TastyTrade** - - **TBS** + - **TBS** (Currently broken) - **TDSLifeway** - **teachertube**: teachertube.com videos - **teachertube:user:collection**: teachertube.com user and collection videos @@ -950,7 +953,7 @@ - **VoiceRepublic** - **VoxMedia** - **Vporn** - - **vpro**: npo.nl and ntr.nl + - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Vrak** - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be - **vrv** @@ -976,7 +979,7 @@ - **wholecloud**: WholeCloud - **Wimp** - **Wistia** - - **wnl**: npo.nl and ntr.nl + - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **WorldStarHipHop** - **wrzuta.pl** - **wrzuta.pl:playlist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 82e166f..a8dbb93 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.07.15' +__version__ = '2017.07.23' -- cgit v1.1 From 905d18a7aa42263c66f311ac0cdf46b2caa2f4d6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 23 Jul 2017 16:21:35 +0800 Subject: [options] Correctly hide login info from debug outputs (#13696) Iterate over opts instead of PRIVATE_OPTS for both performance and correctness --- ChangeLog | 6 ++++++ test/test_options.py | 26 ++++++++++++++++++++++++++ youtube_dl/options.py | 38 ++++++++++++++++++-------------------- 3 files changed, 50 insertions(+), 20 deletions(-) create mode 100644 test/test_options.py diff --git a/ChangeLog b/ChangeLog index 302d32a..8e63b5c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Core +* [options] Correctly hide login info from debug outputs (#13696) + + version 2017.07.23 Core diff --git a/test/test_options.py b/test/test_options.py new file mode 100644 index 0000000..785281f --- /dev/null +++ b/test/test_options.py @@ -0,0 +1,26 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.options import _hide_login_info + + +class TestOptions(unittest.TestCase): + def test_hide_login_inf(self): + self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']), + ['-u', 'PRIVATE', '-p', 'PRIVATE']) + self.assertEqual(_hide_login_info(['-u']), ['-u']) + self.assertEqual(_hide_login_info(['-u', 'foo', '-u', 'bar']), + ['-u', 'PRIVATE', '-u', 'PRIVATE']) + self.assertEqual(_hide_login_info(['--username=foo']), + ['--username=PRIVATE']) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 79e9fd1..38439c9 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -20,6 +20,24 @@ from .utils import ( from .version import __version__ +def _hide_login_info(opts): + PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username']) + eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') + + def _scrub_eq(o): + m = eqre.match(o) + if m: + return m.group('key') + '=PRIVATE' + else: + return o + + opts = list(map(_scrub_eq, opts)) + for idx, opt in enumerate(opts): + if opt in PRIVATE_OPTS and idx + 1 < len(opts): + opts[idx + 1] = 'PRIVATE' + return opts + + def parseOpts(overrideArguments=None): def _readOptions(filename_bytes, default=[]): try: @@ -93,26 +111,6 @@ def parseOpts(overrideArguments=None): def _comma_separated_values_options_callback(option, opt_str, value, parser): setattr(parser.values, option.dest, value.split(',')) - def _hide_login_info(opts): - PRIVATE_OPTS = ['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'] - eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') - - def _scrub_eq(o): - m = eqre.match(o) - if m: - return m.group('key') + '=PRIVATE' - else: - return o - - opts = list(map(_scrub_eq, opts)) - for private_opt in PRIVATE_OPTS: - try: - i = opts.index(private_opt) - opts[i + 1] = 'PRIVATE' - except ValueError: - pass - return opts - # No need to wrap help messages if we're on a wide console columns = compat_get_terminal_size().columns max_width = columns if columns else 80 -- cgit v1.1 From 73095e013fb1bc4a1e676d7be77a103f0013a227 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 23 Jul 2017 16:24:18 +0800 Subject: [options] Typo --- test/test_options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_options.py b/test/test_options.py index 785281f..3a25a6b 100644 --- a/test/test_options.py +++ b/test/test_options.py @@ -12,7 +12,7 @@ from youtube_dl.options import _hide_login_info class TestOptions(unittest.TestCase): - def test_hide_login_inf(self): + def test_hide_login_info(self): self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']), ['-u', 'PRIVATE', '-p', 'PRIVATE']) self.assertEqual(_hide_login_info(['-u']), ['-u']) -- cgit v1.1 From e3ce912c3d767fcb1a1225d05ac64da1acab94aa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 23 Jul 2017 16:25:17 +0800 Subject: [niconico] improve error reporting (#13696) --- youtube_dl/extractor/niconico.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 695e32e..79b9952 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -147,6 +147,9 @@ class NiconicoIE(InfoExtractor): elif 'closed' in flv_info: raise ExtractorError('Niconico videos now require logging in', expected=True) + elif 'error' in flv_info: + raise ExtractorError('%s reports error: %s' % ( + self.IE_NAME, flv_info['error'][0]), expected=True) else: raise ExtractorError('Unable to find video URL') -- cgit v1.1 From 3150976669ef2ffc9f4eee9e99a6e70730bc22fb Mon Sep 17 00:00:00 2001 From: nyuszika7h <nyuszika7h@gmail.com> Date: Sun, 23 Jul 2017 15:33:18 +0200 Subject: [ISSUE_TEMPLATE_tmpl.md] Minor improvements --- .github/ISSUE_TEMPLATE_tmpl.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index df79503..26f61d3 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -1,16 +1,16 @@ ## Please follow the guide below - You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly -- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x]) -- Use *Preview* tab to see how your issue will actually look like +- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`) +- Use the *Preview* tab to see what your issue will actually look like --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. - [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s** ### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections +- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones ### What is the purpose of your *issue*? @@ -28,9 +28,9 @@ ### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: -Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): +Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v <your command line>`), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): + ``` -$ youtube-dl -v <your command line> [debug] System config: [] [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] -- cgit v1.1 From f0e31e32c940d8529353f40bd2426163c3199216 Mon Sep 17 00:00:00 2001 From: nyuszika7h <nyuszika7h@gmail.com> Date: Sun, 23 Jul 2017 15:40:04 +0200 Subject: [nick] Automate geo-restriction bypass (#13711) --- youtube_dl/extractor/nick.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 08a7592..1fa19cd 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -12,6 +12,7 @@ class NickIE(MTVServicesInfoExtractor): IE_NAME = 'nick.com' _VALID_URL = r'https?://(?:(?:www|beta)\.)?nick(?:jr)?\.com/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm' + _GEO_COUNTRIES = ['US'] _TESTS = [{ 'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html', 'playlist': [ -- cgit v1.1 From 70bfab0e9ac8ddb7da67d71633c8c4b0704054cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 21:00:19 +0700 Subject: [mtv] Improve thumbnal extraction --- youtube_dl/extractor/mtv.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index fc098cd..25af5dd 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -50,8 +50,7 @@ class MTVServicesInfoExtractor(InfoExtractor): thumb_node = itemdoc.find(search_path) if thumb_node is None: return None - else: - return thumb_node.attrib['url'] + return thumb_node.get('url') or thumb_node.text or None def _extract_mobile_video_formats(self, mtvn_id): webpage_url = self._MOBILE_TEMPLATE % mtvn_id -- cgit v1.1 From c99d6890cb46626870474e5c1092d9772096c4b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 21:00:56 +0700 Subject: [nickru] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nick.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index db7616c..2513f25 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -673,6 +673,7 @@ from .nick import ( NickIE, NickDeIE, NickNightIE, + NickRuIE, ) from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninecninemedia import ( diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 1fa19cd..b688637 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -125,3 +125,21 @@ class NickNightIE(NickDeIE): return self._search_regex( r'mrss\s*:\s*(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url') + + +class NickRuIE(MTVServicesInfoExtractor): + IE_NAME = 'nickelodeonru' + _VALID_URL = r'https?://(?:www\.)nickelodeon\.ru/(?:playlist|shows|videos)/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6', + 'only_matching': True, + }, { + 'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mgid = self._extract_mgid(webpage) + return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) -- cgit v1.1 From f9c48d895b5600c82e9b55f703e68b060f25de07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Jul 2017 23:12:43 +0700 Subject: [cloudy] Fix extraction (closes #13737) --- youtube_dl/extractor/cloudy.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index 9bc8dbe..85ca20e 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -30,7 +30,11 @@ class CloudyIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id) + 'https://www.cloudy.ec/embed.php', video_id, query={ + 'id': video_id, + 'playerPage': 1, + 'autoplay': 1, + }) info = self._parse_html5_media_entries(url, webpage, video_id)[0] -- cgit v1.1 From 9682666bdadec955fb8600fa3721f59b2a4b8099 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 27 Jul 2017 02:04:51 +0700 Subject: [amcnetworks] Make rating optional (closes #12453) --- youtube_dl/extractor/amcnetworks.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index 3a0ec67..dd3b18d 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -3,9 +3,10 @@ from __future__ import unicode_literals from .theplatform import ThePlatformIE from ..utils import ( - update_url_query, - parse_age_limit, int_or_none, + parse_age_limit, + try_get, + update_url_query, ) @@ -68,7 +69,8 @@ class AMCNetworksIE(ThePlatformIE): info = self._parse_theplatform_metadata(theplatform_metadata) video_id = theplatform_metadata['pid'] title = theplatform_metadata['title'] - rating = theplatform_metadata['ratings'][0]['rating'] + rating = try_get( + theplatform_metadata, lambda x: x['ratings'][0]['rating']) auth_required = self._search_regex( r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required') -- cgit v1.1 From 24e966e8dab954136dabbc497064ac63b252495b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 28 Jul 2017 12:13:19 +0200 Subject: [megaphone] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 8 ++++++ youtube_dl/extractor/megaphone.py | 55 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 youtube_dl/extractor/megaphone.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2513f25..6682486 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -558,6 +558,7 @@ from .matchtv import MatchTVIE from .mdr import MDRIE from .mediaset import MediasetIE from .medici import MediciIE +from .megaphone import MegaphoneIE from .meipai import MeipaiIE from .melonvod import MelonVODIE from .meta import METAIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 36c81ed..9678c32 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -97,6 +97,7 @@ from .washingtonpost import WashingtonPostIE from .wistia import WistiaIE from .mediaset import MediasetIE from .joj import JojIE +from .megaphone import MegaphoneIE class GenericIE(InfoExtractor): @@ -2790,6 +2791,13 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( joj_urls, video_id, video_title, ie=JojIE.ie_key()) + # Look for megaphone.fm embeds + mpfn_urls = MegaphoneIE._extract_urls(webpage) + if mpfn_urls: + return self.playlist_from_matches( + mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key()) + + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/megaphone.py b/youtube_dl/extractor/megaphone.py new file mode 100644 index 0000000..60e3caf --- /dev/null +++ b/youtube_dl/extractor/megaphone.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import js_to_json + + +class MegaphoneIE(InfoExtractor): + IE_NAME = 'megaphone.fm' + IE_DESC = 'megaphone.fm embedded players' + _VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)' + _TEST = { + 'url': 'https://player.megaphone.fm/GLT9749789991?"', + 'md5': '4816a0de523eb3e972dc0dda2c191f96', + 'info_dict': { + 'id': 'GLT9749789991', + 'ext': 'mp3', + 'title': '#97 What Kind Of Idiot Gets Phished?', + 'thumbnail': 're:^https://.*\.png.*$', + 'duration': 1776.26375, + 'author': 'Reply All', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_property('audio:title', webpage) + author = self._og_search_property('audio:artist', webpage) + thumbnail = self._og_search_thumbnail(webpage) + + episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON') + episode_data = self._parse_json(episode_json, video_id, js_to_json) + video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:') + + formats = [{ + 'url': video_url, + }] + + return { + 'id': video_id, + 'thumbnail': thumbnail, + 'title': title, + 'author': author, + 'duration': episode_data['duration'], + 'formats': formats, + } + + @classmethod + def _extract_urls(cls, webpage): + return [m[0] for m in re.findall( + r'<iframe[^>]*?\ssrc=["\'](%s)' % cls._VALID_URL, webpage)] -- cgit v1.1 From c5a49ff08413411174837f1034ef439b79ff774b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 15:02:41 +0700 Subject: [downloader/hls] Use redirect URL as manifest base (#13755) --- youtube_dl/downloader/hls.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 0e29c8a..46308cf 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -59,9 +59,9 @@ class HlsFD(FragmentFD): man_url = info_dict['url'] self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) - manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read() - - s = manifest.decode('utf-8', 'ignore') + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) + man_url = urlh.geturl() + s = urlh.read().decode('utf-8', 'ignore') if not self.can_download(s, info_dict): if info_dict.get('extra_param_to_segment_url'): -- cgit v1.1 From cbbe66635f3c23316f04a6f56ad57e025bc47263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 15:10:19 +0700 Subject: [yandexdisk] Add extractor (closes #13755) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/yandexdisk.py | 115 +++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 youtube_dl/extractor/yandexdisk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6682486..852942e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1298,6 +1298,7 @@ from .yandexmusic import ( YandexMusicAlbumIE, YandexMusicPlaylistIE, ) +from .yandexdisk import YandexDiskIE from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE from .ynet import YnetIE diff --git a/youtube_dl/extractor/yandexdisk.py b/youtube_dl/extractor/yandexdisk.py new file mode 100644 index 0000000..11729f0 --- /dev/null +++ b/youtube_dl/extractor/yandexdisk.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + float_or_none, + int_or_none, + try_get, + urlencode_postdata, +) + + +class YandexDiskIE(InfoExtractor): + _VALID_URL = r'https?://yadi\.sk/i/(?P<id>[^/?#&]+)' + + _TEST = { + 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y', + 'md5': '33955d7ae052f15853dc41f35f17581c', + 'info_dict': { + 'id': 'VdOeDou8eZs6Y', + 'ext': 'mp4', + 'title': '4.mp4', + 'duration': 168.6, + 'uploader': 'y.botova', + 'uploader_id': '300043621', + 'view_count': int, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + status = self._download_webpage( + 'https://disk.yandex.com/auth/status', video_id, query={ + 'urlOrigin': url, + 'source': 'public', + 'md5': 'false', + }) + + sk = self._search_regex( + r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2', + status, 'sk', group='value') + + webpage = self._download_webpage(url, video_id) + + models = self._parse_json( + self._search_regex( + r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script', + webpage, 'video JSON'), + video_id) + + data = next( + model['data'] for model in models + if model.get('model') == 'resource') + + video_hash = data['id'] + title = data['name'] + + models = self._download_json( + 'https://disk.yandex.com/models/', video_id, + data=urlencode_postdata({ + '_model.0': 'videoInfo', + 'id.0': video_hash, + '_model.1': 'do-get-resource-url', + 'id.1': video_hash, + 'version': '13.6', + 'sk': sk, + }), query={'_m': 'videoInfo'})['models'] + + videos = try_get(models, lambda x: x[0]['data']['videos'], list) or [] + source_url = try_get( + models, lambda x: x[1]['data']['file'], compat_str) + + formats = [] + if source_url: + formats.append({ + 'url': source_url, + 'format_id': 'source', + 'ext': determine_ext(title, 'mp4'), + 'quality': 1, + }) + for video in videos: + format_url = video.get('url') + if not format_url: + continue + if determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': format_url, + }) + self._sort_formats(formats) + + duration = float_or_none(try_get( + models, lambda x: x[0]['data']['duration']), 1000) + uploader = try_get( + data, lambda x: x['user']['display_name'], compat_str) + uploader_id = try_get( + data, lambda x: x['user']['uid'], compat_str) + view_count = int_or_none(try_get( + data, lambda x: x['meta']['views_counter'])) + + return { + 'id': video_id, + 'title': title, + 'duration': duration, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'formats': formats, + } -- cgit v1.1 From 95908ce45382a72be26e918f8db1809b0ce81190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 15:13:12 +0700 Subject: [extractor/generic] PEP 8 --- youtube_dl/extractor/generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9678c32..4b83e86 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2797,7 +2797,6 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key()) - def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): -- cgit v1.1 From 2a7a82321135bc59364c91caddde4211f378785b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 15:25:32 +0700 Subject: [svtplay] Update API URL (closes #13767) --- youtube_dl/extractor/svt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 1b5afb7..38a505f 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -181,7 +181,7 @@ class SVTPlayIE(SVTBaseIE): if video_id: data = self._download_json( - 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id) + 'https://api.svt.se/videoplayer-api/video/%s' % video_id, video_id) info_dict = self._extract_video(data, video_id) if not info_dict.get('title'): info_dict['title'] = re.sub( -- cgit v1.1 From c04017519da74a375d6c1c95733d921e96d8ee82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 15:30:53 +0700 Subject: [svtplay] Use geo verification proxy for API request --- youtube_dl/extractor/svt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 38a505f..48bc452 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -181,7 +181,8 @@ class SVTPlayIE(SVTBaseIE): if video_id: data = self._download_json( - 'https://api.svt.se/videoplayer-api/video/%s' % video_id, video_id) + 'https://api.svt.se/videoplayer-api/video/%s' % video_id, + video_id, headers=self.geo_verification_headers()) info_dict = self._extract_video(data, video_id) if not info_dict.get('title'): info_dict['title'] = re.sub( -- cgit v1.1 From 836ef2648613f4ca565b319af4769c02e35f60f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 18:41:42 +0700 Subject: [soundcloud:trackstation] Add extractor (closes #13733) --- youtube_dl/extractor/extractors.py | 3 +- youtube_dl/extractor/soundcloud.py | 141 +++++++++++++++++++++++-------------- 2 files changed, 89 insertions(+), 55 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 852942e..d2c5e80 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -935,8 +935,9 @@ from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE, + SoundcloudTrackStationIE, SoundcloudPlaylistIE, - SoundcloudSearchIE + SoundcloudSearchIE, ) from .soundgasm import ( SoundgasmIE, diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 3f1a46b..2f1b297 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -31,6 +31,7 @@ class SoundcloudIE(InfoExtractor): _VALID_URL = r'''(?x)^(?:https?://)? (?:(?:(?:www\.|m\.)?soundcloud\.com/ + (?!stations/track) (?P<uploader>[\w\d-]+)/ (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P<title>[\w\d-]+)/? @@ -330,7 +331,63 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): } -class SoundcloudUserIE(SoundcloudPlaylistBaseIE): +class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): + _API_BASE = 'https://api.soundcloud.com' + _API_V2_BASE = 'https://api-v2.soundcloud.com' + + def _extract_playlist(self, base_url, playlist_id, playlist_title): + COMMON_QUERY = { + 'limit': 50, + 'client_id': self._CLIENT_ID, + 'linked_partitioning': '1', + } + + query = COMMON_QUERY.copy() + query['offset'] = 0 + + next_href = base_url + '?' + compat_urllib_parse_urlencode(query) + + entries = [] + for i in itertools.count(): + response = self._download_json( + next_href, playlist_id, 'Downloading track page %s' % (i + 1)) + + collection = response['collection'] + if not collection: + break + + def resolve_permalink_url(candidates): + for cand in candidates: + if isinstance(cand, dict): + permalink_url = cand.get('permalink_url') + entry_id = self._extract_id(cand) + if permalink_url and permalink_url.startswith('http'): + return permalink_url, entry_id + + for e in collection: + permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) + if permalink_url: + entries.append(self.url_result(permalink_url, video_id=entry_id)) + + next_href = response.get('next_href') + if not next_href: + break + + parsed_next_href = compat_urlparse.urlparse(response['next_href']) + qs = compat_urlparse.parse_qs(parsed_next_href.query) + qs.update(COMMON_QUERY) + next_href = compat_urlparse.urlunparse( + parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': playlist_title, + 'entries': entries, + } + + +class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): _VALID_URL = r'''(?x) https?:// (?:(?:www|m)\.)?soundcloud\.com/ @@ -385,16 +442,13 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE): 'playlist_mincount': 1, }] - _API_BASE = 'https://api.soundcloud.com' - _API_V2_BASE = 'https://api-v2.soundcloud.com' - _BASE_URL_MAP = { - 'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE, - 'tracks': '%s/users/%%s/tracks' % _API_BASE, - 'sets': '%s/users/%%s/playlists' % _API_V2_BASE, - 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE, - 'likes': '%s/users/%%s/likes' % _API_V2_BASE, - 'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE, + 'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE, + 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, } _TITLE_MAP = { @@ -416,57 +470,36 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE): resolv_url, uploader, 'Downloading user info') resource = mobj.group('rsrc') or 'all' - base_url = self._BASE_URL_MAP[resource] % user['id'] - COMMON_QUERY = { - 'limit': 50, - 'client_id': self._CLIENT_ID, - 'linked_partitioning': '1', - } + return self._extract_playlist( + self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']), + '%s (%s)' % (user['username'], self._TITLE_MAP[resource])) - query = COMMON_QUERY.copy() - query['offset'] = 0 - next_href = base_url + '?' + compat_urllib_parse_urlencode(query) - - entries = [] - for i in itertools.count(): - response = self._download_json( - next_href, uploader, 'Downloading track page %s' % (i + 1)) - - collection = response['collection'] - if not collection: - break - - def resolve_permalink_url(candidates): - for cand in candidates: - if isinstance(cand, dict): - permalink_url = cand.get('permalink_url') - entry_id = self._extract_id(cand) - if permalink_url and permalink_url.startswith('http'): - return permalink_url, entry_id +class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): + _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)' + IE_NAME = 'soundcloud:trackstation' + _TESTS = [{ + 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text', + 'info_dict': { + 'id': '286017854', + 'title': 'Track station: your-text', + }, + 'playlist_mincount': 47, + }] - for e in collection: - permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) - if permalink_url: - entries.append(self.url_result(permalink_url, video_id=entry_id)) + def _real_extract(self, url): + track_name = self._match_id(url) - next_href = response.get('next_href') - if not next_href: - break + webpage = self._download_webpage(url, track_name) - parsed_next_href = compat_urlparse.urlparse(response['next_href']) - qs = compat_urlparse.parse_qs(parsed_next_href.query) - qs.update(COMMON_QUERY) - next_href = compat_urlparse.urlunparse( - parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) + track_id = self._search_regex( + r'soundcloud:track-stations:(\d+)', webpage, 'track id') - return { - '_type': 'playlist', - 'id': compat_str(user['id']), - 'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]), - 'entries': entries, - } + return self._extract_playlist( + '%s/stations/soundcloud:track-stations:%s/tracks' + % (self._API_V2_BASE, track_id), + track_id, 'Track station: %s' % track_name) class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): -- cgit v1.1 From e445850e69990502b171765343fc38317e932aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 18:45:57 +0700 Subject: [soundcloud] Update client id --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 2f1b297..2e52e09 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -122,7 +122,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z' + _CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' @staticmethod -- cgit v1.1 From ca127ab2c174cdee4428eb4e192393c6ca942ac8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 23:07:28 +0700 Subject: [ard] Add support for lives (closes #13771) --- youtube_dl/extractor/ard.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 2d55994..3f248b1 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -93,6 +93,7 @@ class ARDMediathekIE(InfoExtractor): duration = int_or_none(media_info.get('_duration')) thumbnail = media_info.get('_previewImage') + is_live = media_info.get('_isLive') is True subtitles = {} subtitle_url = media_info.get('_subtitleUrl') @@ -106,6 +107,7 @@ class ARDMediathekIE(InfoExtractor): 'id': video_id, 'duration': duration, 'thumbnail': thumbnail, + 'is_live': is_live, 'formats': formats, 'subtitles': subtitles, } @@ -166,9 +168,11 @@ class ARDMediathekIE(InfoExtractor): # determine video id from url m = re.match(self._VALID_URL, url) + document_id = None + numid = re.search(r'documentId=([0-9]+)', url) if numid: - video_id = numid.group(1) + document_id = video_id = numid.group(1) else: video_id = m.group('video_id') @@ -228,12 +232,16 @@ class ARDMediathekIE(InfoExtractor): 'formats': formats, } else: # request JSON file + if not document_id: + video_id = self._search_regex( + r'/play/(?:config|media)/(\d+)', webpage, 'media id') info = self._extract_media_info( - 'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id) + 'http://www.ardmediathek.de/play/media/%s' % video_id, + webpage, video_id) info.update({ 'id': video_id, - 'title': title, + 'title': self._live_title(title) if info.get('is_live') else title, 'description': description, 'thumbnail': thumbnail, }) -- cgit v1.1 From 198d4cb40ce9d819e8e4079058642ee96dae213b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Ruci=C5=84ski?= <grucin@gmail.com> Date: Sat, 29 Jul 2017 20:30:04 +0200 Subject: [generic] Add support for another ooyala embed pattern (closes #13727) --- youtube_dl/extractor/generic.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4b83e86..34e8149 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -575,6 +575,19 @@ class GenericIE(InfoExtractor): }, 'skip': 'movie expired', }, + # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js + { + 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/', + 'info_dict': { + 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2', + 'ext': 'mp4', + 'title': 'Steampunk Fest Comes to Honesdale', + 'duration': 43.276, + }, + 'params': { + 'skip_download': True, + } + }, # embed.ly video { 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', @@ -2293,6 +2306,7 @@ class GenericIE(InfoExtractor): # Look for Ooyala videos mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or + re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage)) if mobj is not None: -- cgit v1.1 From a0a477b885dc1dd688058924357c4935f3c935cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 30 Jul 2017 15:48:22 +0700 Subject: [youjizz] Fix extraction (closes #13744) --- youtube_dl/extractor/youjizz.py | 78 +++++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index b50f34e..f33fabe 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -1,39 +1,95 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, + parse_duration, +) class YouJizzIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])' + _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]*-(?P<id>\d+)\.html|embed/(?P<embed_id>\d+))' _TESTS = [{ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', - 'md5': '78fc1901148284c69af12640e01c6310', + 'md5': 'b1e1dfaa8bb9537d8b84eeda9cf4acf4', 'info_dict': { 'id': '2189178', 'ext': 'mp4', 'title': 'Zeichentrick 1', 'age_limit': 18, + 'duration': 2874, } }, { 'url': 'http://www.youjizz.com/videos/-2189178.html', 'only_matching': True, + }, { + 'url': 'https://www.youjizz.com/videos/embed/31991001', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') or mobj.group('embed_id') + webpage = self._download_webpage(url, video_id) - # YouJizz's HTML5 player has invalid HTML - webpage = webpage.replace('"controls', '" controls') - age_limit = self._rta_search(webpage) - video_title = self._html_search_regex( - r'<title>\s*(.*)\s*', webpage, 'title') - info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] + title = self._html_search_regex( + r'(.+?)', webpage, 'title') + + formats = [] + + encodings = self._parse_json( + self._search_regex( + r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings', + default='[]'), + video_id, fatal=False) + for encoding in encodings: + if not isinstance(encoding, dict): + continue + format_url = encoding.get('filename') + if not isinstance(format_url, compat_str): + continue + if determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + format_id = encoding.get('name') or encoding.get('quality') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]', format_id, 'height', default=None)) + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'height': height, + }) + + if formats: + info_dict = { + 'formats': formats, + } + else: + # YouJizz's HTML5 player has invalid HTML + webpage = webpage.replace('"controls', '" controls') + info_dict = self._parse_html5_media_entries( + url, webpage, video_id)[0] + + duration = parse_duration(self._search_regex( + r'Runtime:([^<]+)', webpage, 'duration', + default=None)) + uploader = self._search_regex( + r'Uploaded By:.*?]*>([^<]+)', webpage, 'uploader', + default=None) info_dict.update({ 'id': video_id, - 'title': video_title, - 'age_limit': age_limit, + 'title': title, + 'age_limit': self._rta_search(webpage), + 'duration': duration, + 'uploader': uploader, }) return info_dict -- cgit v1.1 From 0ed4758023ddfb4d9630ba9114ef70ef7e6ac09d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Jul 2017 19:08:44 +0700 Subject: [clipfish] Remove extractor --- youtube_dl/extractor/clipfish.py | 67 -------------------------------------- youtube_dl/extractor/extractors.py | 1 - 2 files changed, 68 deletions(-) delete mode 100644 youtube_dl/extractor/clipfish.py diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py deleted file mode 100644 index 0920f62..0000000 --- a/youtube_dl/extractor/clipfish.py +++ /dev/null @@ -1,67 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_strdate, -) - - -class ClipfishIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P[0-9]+)' - _TEST = { - 'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/', - 'md5': 'b9a5dc46294154c1193e2d10e0c95693', - 'info_dict': { - 'id': '4343170', - 'ext': 'mp4', - 'title': 'S01 E01 - Ugly Americans - Date in der Hölle', - 'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer Einbürgerung in die USA zur Seite stehen.', - 'upload_date': '20161005', - 'duration': 1291, - 'view_count': int, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - video_info = self._download_json( - 'http://www.clipfish.de/devapi/id/%s?format=json&apikey=hbbtv' % video_id, - video_id)['items'][0] - - formats = [] - - m3u8_url = video_info.get('media_videourl_hls') - if m3u8_url: - formats.append({ - 'url': m3u8_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'), - 'ext': 'mp4', - 'format_id': 'hls', - }) - - mp4_url = video_info.get('media_videourl') - if mp4_url: - formats.append({ - 'url': mp4_url, - 'format_id': 'mp4', - 'width': int_or_none(video_info.get('width')), - 'height': int_or_none(video_info.get('height')), - 'tbr': int_or_none(video_info.get('bitrate')), - }) - - descr = video_info.get('descr') - if descr: - descr = descr.strip() - - return { - 'id': video_id, - 'title': video_info['title'], - 'description': descr, - 'formats': formats, - 'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'), - 'duration': int_or_none(video_info.get('media_length')), - 'upload_date': unified_strdate(video_info.get('pubDate')), - 'view_count': int_or_none(video_info.get('media_views')) - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d2c5e80..bdc7370 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -186,7 +186,6 @@ from .chirbit import ( ) from .cinchcast import CinchcastIE from .cjsw import CJSWIE -from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .cliprs import ClipRsIE from .clipsyndicate import ClipsyndicateIE -- cgit v1.1 From 8b9f50d7cb4cfab5d505f4233c3e176a8106d6db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Jul 2017 19:09:44 +0700 Subject: [watchbox] Add extractor (#13739) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/watchbox.py | 151 +++++++++++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 youtube_dl/extractor/watchbox.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bdc7370..3489e86 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1244,6 +1244,7 @@ from .washingtonpost import ( WashingtonPostArticleIE, ) from .wat import WatIE +from .watchbox import WatchBoxIE from .watchindianporn import WatchIndianPornIE from .wdr import ( WDRIE, diff --git a/youtube_dl/extractor/watchbox.py b/youtube_dl/extractor/watchbox.py new file mode 100644 index 0000000..b382338 --- /dev/null +++ b/youtube_dl/extractor/watchbox.py @@ -0,0 +1,151 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + js_to_json, + strip_or_none, + try_get, + unified_timestamp, +) + + +class WatchBoxIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?watchbox\.de/(?Pserien|filme)/(?:[^/]+/)*[^/]+-(?P\d+)' + _TESTS = [{ + # film + 'url': 'https://www.watchbox.de/filme/free-jimmy-12325.html', + 'info_dict': { + 'id': '341368', + 'ext': 'mp4', + 'title': 'Free Jimmy', + 'description': 'md5:bcd8bafbbf9dc0ef98063d344d7cc5f6', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 4890, + 'age_limit': 16, + 'release_year': 2009, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + # episode + 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-1/date-in-der-hoelle-328286.html', + 'info_dict': { + 'id': '328286', + 'ext': 'mp4', + 'title': 'S01 E01 - Date in der Hölle', + 'description': 'md5:2f31c74a8186899f33cb5114491dae2b', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1291, + 'age_limit': 12, + 'release_year': 2010, + 'series': 'Ugly Americans', + 'season_number': 1, + 'episode': 'Date in der Hölle', + 'episode_number': 1, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-2/der-ring-des-powers-328270', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + kind, video_id = mobj.group('kind', 'id') + + webpage = self._download_webpage(url, video_id) + + source = self._parse_json( + self._search_regex( + r'(?s)source\s*:\s*({.+?})\s*,\s*\n', webpage, 'source', + default='{}'), + video_id, transform_source=js_to_json, fatal=False) or {} + + video_id = compat_str(source.get('videoId') or video_id) + + devapi = self._download_json( + 'http://api.watchbox.de/devapi/id/%s' % video_id, video_id, query={ + 'format': 'json', + 'apikey': 'hbbtv', + }, fatal=False) + + item = try_get(devapi, lambda x: x['items'][0], dict) or {} + + title = item.get('title') or try_get( + item, lambda x: x['movie']['headline_movie'], + compat_str) or source['title'] + + formats = [] + hls_url = item.get('media_videourl_hls') or source.get('hls') + if hls_url: + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + dash_url = item.get('media_videourl_wv') or source.get('dash') + if dash_url: + formats.extend(self._extract_mpd_formats( + dash_url, video_id, mpd_id='dash', fatal=False)) + mp4_url = item.get('media_videourl') + if mp4_url: + formats.append({ + 'url': mp4_url, + 'format_id': 'mp4', + 'width': int_or_none(item.get('width')), + 'height': int_or_none(item.get('height')), + 'tbr': int_or_none(item.get('bitrate')), + }) + self._sort_formats(formats) + + description = strip_or_none(item.get('descr')) + thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail') + duration = int_or_none(item.get('media_length') or source.get('length')) + timestamp = unified_timestamp(item.get('pubDate')) + view_count = int_or_none(item.get('media_views')) + age_limit = int_or_none(try_get(item, lambda x: x['movie']['fsk'])) + release_year = int_or_none(try_get(item, lambda x: x['movie']['rel_year'])) + + info = { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'view_count': view_count, + 'age_limit': age_limit, + 'release_year': release_year, + 'formats': formats, + } + + if kind.lower() == 'serien': + series = try_get( + item, lambda x: x['special']['title'], + compat_str) or source.get('format') + season_number = int_or_none(self._search_regex( + r'^S(\d{1,2})\s*E\d{1,2}', title, 'season number', + default=None) or self._search_regex( + r'/staffel-(\d+)/', url, 'season number', default=None)) + episode = source.get('title') + episode_number = int_or_none(self._search_regex( + r'^S\d{1,2}\s*E(\d{1,2})', title, 'episode number', + default=None)) + info.update({ + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + }) + + return info -- cgit v1.1 From f701827e319963ca783d012f3647aa44fc0efcd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Jul 2017 19:43:09 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index 8e63b5c..ca3ee8a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,28 @@ version Core +* [downloader/hls] Use redirect URL as manifest base (#13755) * [options] Correctly hide login info from debug outputs (#13696) +Extractors ++ [watchbox] Add support for watchbox.de (#13739) +- [clipfish] Remove extractor ++ [youjizz] Fix extraction (#13744) ++ [generic] Add support for another ooyala embed pattern (#13727) ++ [ard] Add support for lives (#13771) +* [soundcloud] Update client id ++ [soundcloud:trackstation] Add support for track stations (#13733) +* [svtplay] Use geo verification proxy for API request +* [svtplay] Update API URL (#13767) ++ [yandexdisk] Add support for yadi.sk (#13755) ++ [megaphone] Add support for megaphone.fm +* [amcnetworks] Make rating optional (#12453) +* [cloudy] Fix extraction (#13737) ++ [nickru] Add support for nickelodeon.ru +* [mtv] Improve thumbnal extraction +* [nick] Automate geo-restriction bypass (#13711) +* [niconico] Improve error reporting (#13696) + version 2017.07.23 -- cgit v1.1 From 5c9ea67bc0dedd15a0ed9ad05d8fcf09946ca461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Jul 2017 20:47:31 +0700 Subject: release 2017.07.30.1 --- .github/ISSUE_TEMPLATE.md | 16 ++++++++-------- ChangeLog | 2 +- docs/supportedsites.md | 6 +++++- youtube_dl/version.py | 2 +- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 37d09d7..0421de7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,16 +1,16 @@ ## Please follow the guide below - You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly -- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x]) -- Use *Preview* tab to see how your issue will actually look like +- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`) +- Use the *Preview* tab to see what your issue will actually look like --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.23** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.30.1** ### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections +- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones ### What is the purpose of your *issue*? @@ -28,14 +28,14 @@ ### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: -Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): +Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v `), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): + ``` -$ youtube-dl -v [debug] System config: [] [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.07.23 +[debug] youtube-dl version 2017.07.30.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ca3ee8a..4f03ef0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.07.30.1 Core * [downloader/hls] Use redirect URL as manifest base (#13755) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index eb09c47..77aac82 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -155,7 +155,6 @@ - **chirbit:profile** - **Cinchcast** - **CJSW** - - **Clipfish** - **cliphunter** - **ClipRs** - **Clipsyndicate** @@ -440,6 +439,7 @@ - **Medialaan** - **Mediaset** - **Medici** + - **megaphone.fm**: megaphone.fm embedded players - **Meipai**: 美拍 - **MelonVOD** - **META** @@ -533,6 +533,7 @@ - **nhl.com:videocenter:category**: NHL videocenter category - **nick.com** - **nick.de** + - **nickelodeonru** - **nicknight** - **niconico**: ニコニコ動画 - **NiconicoPlaylist** @@ -734,6 +735,7 @@ - **soundcloud:playlist** - **soundcloud:search**: Soundcloud search - **soundcloud:set** + - **soundcloud:trackstation** - **soundcloud:user** - **soundgasm** - **soundgasm:profile** @@ -968,6 +970,7 @@ - **washingtonpost** - **washingtonpost:article** - **wat.tv** + - **WatchBox** - **WatchIndianPorn**: Watch Indian Porn - **WDR** - **wdr:mobile** @@ -1003,6 +1006,7 @@ - **XVideos** - **XXXYMovies** - **Yahoo**: Yahoo screen and movies + - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:track**: Яндекс.Музыка - Трек diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a8dbb93..3816215 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.07.23' +__version__ = '2017.07.30.1' -- cgit v1.1 From 9118c9f18a43a2f3e4814fcc02ac8e5180077df3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Aug 2017 05:20:14 +0700 Subject: [nrktv] Update API host (closes #13796) --- youtube_dl/extractor/nrk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 3b4f51f..18ead94 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -237,7 +237,7 @@ class NRKTVIE(NRKBaseIE): (?:/\d{2}-\d{2}-\d{4})? (?:\#del=(?P\d+))? ''' % _EPISODE_RE - _API_HOST = 'psapi-we.nrk.no' + _API_HOST = 'psapi-ne.nrk.no' _TESTS = [{ 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', -- cgit v1.1 From 8cda78ef72c52c0424ddf90c22105dbc3b1d16f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Aug 2017 23:12:34 +0700 Subject: [test_YoutubeDL] Add a test for #10083 --- test/test_YoutubeDL.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 4af14f9..e70cbcd 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -371,6 +371,19 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL({'format': 'best[height>360]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + def test_format_selection_issue_10083(self): + # See https://github.com/rg3/youtube-dl/issues/10083 + formats = [ + {'format_id': 'regular', 'height': 360, 'url': TEST_URL}, + {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'}) + ydl.process_ie_result(info_dict.copy()) + self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio') + def test_invalid_format_specs(self): def assert_syntax_error(format_spec): ydl = YDL({'format': format_spec}) -- cgit v1.1 From 183062a4ab2f698f5096e69602fb2b5c861c01a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Aug 2017 23:19:59 +0700 Subject: [pbs] Add support for new URL schema (closes #13801) --- youtube_dl/extractor/pbs.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 16cc667..8889e4a 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -189,7 +189,7 @@ class PBSIE(InfoExtractor): # Direct video URL (?:%s)/(?:viralplayer|video)/(?P[0-9]+)/? | # Article with embedded player (or direct video) - (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P[^/]+?)(?:\.html)?/?(?:$|[?\#]) | + (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P[^/]+)/ ) @@ -346,6 +346,21 @@ class PBSIE(InfoExtractor): }, }, { + # https://github.com/rg3/youtube-dl/issues/13801 + 'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/', + 'info_dict': { + 'id': '3003333873', + 'ext': 'mp4', + 'title': 'PBS NewsHour - full episode July 31, 2017', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'duration': 3265, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, + }, + }, + { 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', 'only_matching': True, }, @@ -433,6 +448,9 @@ class PBSIE(InfoExtractor): if url: break + if not url: + url = self._og_search_url(webpage) + mobj = re.match(self._VALID_URL, url) player_id = mobj.group('player_id') -- cgit v1.1 From 1f03fef994e076a827cfd818eb4d76fe2eb85130 Mon Sep 17 00:00:00 2001 From: Justin Quan Date: Fri, 4 Aug 2017 08:43:44 -0700 Subject: [README.md] Improve grammar --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fe2bebc..0067184 100644 --- a/README.md +++ b/README.md @@ -584,7 +584,7 @@ If you are using an output template inside a Windows batch file then you must es #### Output template examples -Note on Windows you may need to use double quotes instead of single. +Note that on Windows you may need to use double quotes instead of single. ```bash $ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc @@ -671,7 +671,7 @@ If you want to preserve the old format selection behavior (prior to youtube-dl 2 #### Format selection examples -Note on Windows you may need to use double quotes instead of single. +Note that on Windows you may need to use double quotes instead of single. ```bash # Download best mp4 format available or any other best if no mp4 available -- cgit v1.1 From 11a6793f8013f37045d769e5b166f75e17f275d1 Mon Sep 17 00:00:00 2001 From: Matt Crupi Date: Fri, 4 Aug 2017 08:46:54 -0700 Subject: [mlb] Extend _VALID_URL (closes #13740) --- youtube_dl/extractor/mlb.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 59cd4b8..4d45f96 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -15,7 +15,7 @@ class MLBIE(InfoExtractor): (?:[\da-z_-]+\.)*mlb\.com/ (?: (?: - (?:.*?/)?video/(?:topic/[\da-z_-]+/)?v| + (?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)| (?: shared/video/embed/(?:embed|m-internal-embed)\.html| (?:[^/]+/)+(?:play|index)\.jsp| @@ -95,6 +95,10 @@ class MLBIE(InfoExtractor): } }, { + 'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694', + 'only_matching': True, + }, + { 'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb', 'only_matching': True, }, -- cgit v1.1 From 57a38a38c32ea2eb1ca54ee4ba3fcd31a9b7f328 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Aug 2017 23:44:07 +0700 Subject: [udemy] Fix subtitles extraction (closes #13812) --- youtube_dl/extractor/udemy.py | 47 +++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 160be1b..3b02f43 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -15,6 +15,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + js_to_json, sanitized_Request, unescapeHTML, urlencode_postdata, @@ -268,6 +269,25 @@ class UdemyIE(InfoExtractor): f = add_output_format_meta(f, format_id) formats.append(f) + def extract_subtitles(track_list): + if not isinstance(track_list, list): + return + for track in track_list: + if not isinstance(track, dict): + continue + if track.get('kind') != 'captions': + continue + src = track.get('src') + if not src or not isinstance(src, compat_str): + continue + lang = track.get('language') or track.get( + 'srclang') or track.get('label') + sub_dict = automatic_captions if track.get( + 'autogenerated') is True else subtitles + sub_dict.setdefault(lang, []).append({ + 'url': src, + }) + download_urls = asset.get('download_urls') if isinstance(download_urls, dict): extract_formats(download_urls.get('Video')) @@ -315,23 +335,16 @@ class UdemyIE(InfoExtractor): extract_formats(data.get('sources')) if not duration: duration = int_or_none(data.get('duration')) - tracks = data.get('tracks') - if isinstance(tracks, list): - for track in tracks: - if not isinstance(track, dict): - continue - if track.get('kind') != 'captions': - continue - src = track.get('src') - if not src or not isinstance(src, compat_str): - continue - lang = track.get('language') or track.get( - 'srclang') or track.get('label') - sub_dict = automatic_captions if track.get( - 'autogenerated') is True else subtitles - sub_dict.setdefault(lang, []).append({ - 'url': src, - }) + extract_subtitles(data.get('tracks')) + + if not subtitles and not automatic_captions: + text_tracks = self._parse_json( + self._search_regex( + r'text-tracks=(["\'])(?P\[.+?\])\1', view_html, + 'text tracks', default='{}', group='data'), video_id, + transform_source=lambda s: js_to_json(unescapeHTML(s)), + fatal=False) + extract_subtitles(text_tracks) self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) -- cgit v1.1 From b3b5870cba46d84b7482f120f550822d3b64c3d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Aug 2017 23:51:03 +0700 Subject: [pornhd] Fix extraction (closes #13783) --- youtube_dl/extractor/pornhd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 842317e..3676178 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor): r'(.+?) - .*?[Pp]ornHD.*?'], webpage, 'title') sources = self._parse_json(js_to_json(self._search_regex( - r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", + r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]", webpage, 'sources', default='{}')), video_id) if not sources: -- cgit v1.1 From 799802f368012f579750b26db117b3a9dfdcbe05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Aug 2017 23:54:28 +0700 Subject: [teamfour] Remove extractor (closes #13782) Now covered with generic extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/teamfourstar.py | 48 ------------------------------------ 2 files changed, 49 deletions(-) delete mode 100644 youtube_dl/extractor/teamfourstar.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3489e86..d0e04dd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -994,7 +994,6 @@ from .teachertube import ( ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE -from .teamfourstar import TeamFourStarIE from .techtalks import TechTalksIE from .ted import TEDIE from .tele13 import Tele13IE diff --git a/youtube_dl/extractor/teamfourstar.py b/youtube_dl/extractor/teamfourstar.py deleted file mode 100644 index a8c6ed7..0000000 --- a/youtube_dl/extractor/teamfourstar.py +++ /dev/null @@ -1,48 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from .jwplatform import JWPlatformIE -from ..utils import unified_strdate - - -class TeamFourStarIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P[a-z0-9\-]+)' - _TEST = { - 'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/', - 'info_dict': { - 'id': '0WdZO31W', - 'title': 'TFS Abridged Parody Episode 1', - 'description': 'md5:d60bc389588ebab2ee7ad432bda953ae', - 'ext': 'mp4', - 'timestamp': 1394168400, - 'upload_date': '20080508', - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - jwplatform_url = JWPlatformIE._extract_url(webpage) - - video_title = self._html_search_regex( - r']+class="entry-title"[^>]*>(?P.+?)</h1>', - webpage, 'title') - video_date = unified_strdate(self._html_search_regex( - r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>', - webpage, 'date', fatal=False)) - video_description = self._html_search_regex( - r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>', - webpage, 'description', fatal=False) - video_thumbnail = self._og_search_thumbnail(webpage) - - return { - '_type': 'url_transparent', - 'display_id': display_id, - 'title': video_title, - 'description': video_description, - 'upload_date': video_date, - 'thumbnail': video_thumbnail, - 'url': jwplatform_url, - } -- cgit v1.1 From f31fd0693b674e73f9273f0afba2a54853e4ca35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 00:00:21 +0700 Subject: [vidme] Extract DASH and HLS formats --- youtube_dl/extractor/vidme.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index e9ff336..a7971d7 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import itertools from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( ExtractorError, int_or_none, @@ -161,13 +164,28 @@ class VidmeIE(InfoExtractor): 'or for violating the terms of use.', expected=True) - formats = [{ - 'format_id': f.get('type'), - 'url': f['uri'], - 'width': int_or_none(f.get('width')), - 'height': int_or_none(f.get('height')), - 'preference': 0 if f.get('type', '').endswith('clip') else 1, - } for f in video.get('formats', []) if f.get('uri')] + formats = [] + for f in video.get('formats', []): + format_url = f.get('uri') + if not format_url or not isinstance(format_url, compat_str): + continue + format_type = f.get('type') + if format_type == 'dash': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + elif format_type == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': f.get('type'), + 'url': format_url, + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')), + 'preference': 0 if f.get('type', '').endswith( + 'clip') else 1, + }) if not formats and video.get('complete_url'): formats.append({ -- cgit v1.1 From bbbe1cebfce3cfb63e9c01d29105fcba693ba54c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 00:09:36 +0700 Subject: [mlb] Update test (closes #13777) --- youtube_dl/extractor/mlb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 4d45f96..675ff68 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -84,7 +84,7 @@ class MLBIE(InfoExtractor): }, { 'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer', - 'md5': 'b190e70141fb9a1552a85426b4da1b5d', + 'md5': 'aafaf5b0186fee8f32f20508092f8111', 'info_dict': { 'id': '75609783', 'ext': 'mp4', -- cgit v1.1 From 8519b88f67de9c0c11cd2edd8dc55b9a4f13d110 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 00:59:07 +0700 Subject: [yandexdisk] Relax _VALID_URL (closes #13824) --- youtube_dl/extractor/yandexdisk.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/yandexdisk.py b/youtube_dl/extractor/yandexdisk.py index 11729f0..e8f6ae1 100644 --- a/youtube_dl/extractor/yandexdisk.py +++ b/youtube_dl/extractor/yandexdisk.py @@ -13,9 +13,9 @@ from ..utils import ( class YandexDiskIE(InfoExtractor): - _VALID_URL = r'https?://yadi\.sk/i/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y', 'md5': '33955d7ae052f15853dc41f35f17581c', 'info_dict': { @@ -27,7 +27,10 @@ class YandexDiskIE(InfoExtractor): 'uploader_id': '300043621', 'view_count': int, }, - } + }, { + 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) -- cgit v1.1 From 1141e9104bc0f8d577f18cf28a1af58adea1248e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 06:57:19 +0700 Subject: Use relative paths for DASH fragments (closes #12990) 10x reduced JSON size refs #13810 --- youtube_dl/downloader/dash.py | 14 ++++++++++---- youtube_dl/extractor/common.py | 16 ++++++++++------ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 7491fda..576ece6 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .fragment import FragmentFD from ..compat import compat_urllib_error +from ..utils import urljoin class DashSegmentsFD(FragmentFD): @@ -12,12 +13,13 @@ class DashSegmentsFD(FragmentFD): FD_NAME = 'dashsegments' def real_download(self, filename, info_dict): - segments = info_dict['fragments'][:1] if self.params.get( + fragment_base_url = info_dict.get('fragment_base_url') + fragments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] ctx = { 'filename': filename, - 'total_frags': len(segments), + 'total_frags': len(fragments), } self._prepare_and_start_frag_download(ctx) @@ -26,7 +28,7 @@ class DashSegmentsFD(FragmentFD): skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) frag_index = 0 - for i, segment in enumerate(segments): + for i, fragment in enumerate(fragments): frag_index += 1 if frag_index <= ctx['fragment_index']: continue @@ -36,7 +38,11 @@ class DashSegmentsFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) if not success: return False self._append_fragment(ctx, frag_content) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 748b4d5..459e7ff 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1892,9 +1892,13 @@ class InfoExtractor(object): 'Bandwidth': bandwidth, } + def location_key(location): + return 'url' if re.match(r'^https?://', location) else 'path' + if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time')) + media_location_key = location_key(media_template) # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ # can't be used at the same time @@ -1904,7 +1908,7 @@ class InfoExtractor(object): segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) representation_ms_info['fragments'] = [{ - 'url': media_template % { + media_location_key: media_template % { 'Number': segment_number, 'Bandwidth': bandwidth, }, @@ -1928,7 +1932,7 @@ class InfoExtractor(object): 'Number': segment_number, } representation_ms_info['fragments'].append({ - 'url': segment_url, + media_location_key: segment_url, 'duration': float_or_none(segment_d, representation_ms_info['timescale']), }) @@ -1952,8 +1956,9 @@ class InfoExtractor(object): for s in representation_ms_info['s']: duration = float_or_none(s['d'], timescale) for r in range(s.get('r', 0) + 1): + segment_uri = representation_ms_info['segment_urls'][segment_index] fragments.append({ - 'url': representation_ms_info['segment_urls'][segment_index], + location_key(segment_uri): segment_uri, 'duration': duration, }) segment_index += 1 @@ -1962,6 +1967,7 @@ class InfoExtractor(object): # No fragments key is present in this case. if 'fragments' in representation_ms_info: f.update({ + 'fragment_base_url': base_url, 'fragments': [], 'protocol': 'http_dash_segments', }) @@ -1969,10 +1975,8 @@ class InfoExtractor(object): initialization_url = representation_ms_info['initialization_url'] if not f.get('url'): f['url'] = initialization_url - f['fragments'].append({'url': initialization_url}) + f['fragments'].append({location_key(initialization_url): initialization_url}) f['fragments'].extend(representation_ms_info['fragments']) - for fragment in f['fragments']: - fragment['url'] = urljoin(base_url, fragment['url']) try: existing_format = next( fo for fo in formats -- cgit v1.1 From c983cc3b71e3b2c80df920481dfa90bbc2ad7937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 08:17:01 +0700 Subject: [cinchcast] Extend _VALID_URL --- youtube_dl/extractor/cinchcast.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cinchcast.py b/youtube_dl/extractor/cinchcast.py index 562c9bb..b861d54 100644 --- a/youtube_dl/extractor/cinchcast.py +++ b/youtube_dl/extractor/cinchcast.py @@ -9,12 +9,20 @@ from ..utils import ( class CinchcastIE(InfoExtractor): - _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single', + 'info_dict': { + 'id': '5258197', + 'ext': 'mp3', + 'title': 'Train Your Brain to Up Your Game with Coach Mandy', + 'upload_date': '20130816', + }, + }, { # Actual test is run in generic, look for undergroundwellness 'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703', 'only_matching': True, - } + }] def _real_extract(self, url): video_id = self._match_id(url) -- cgit v1.1 From 1d5472290f8c426c13e3403cb95fd44cc71b5a9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 08:28:12 +0700 Subject: [podomatic] Extend _VALID_URL (closes #13827) --- youtube_dl/extractor/podomatic.py | 63 ++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/podomatic.py b/youtube_dl/extractor/podomatic.py index f20946a..25fcebf 100644 --- a/youtube_dl/extractor/podomatic.py +++ b/youtube_dl/extractor/podomatic.py @@ -9,39 +9,46 @@ from ..utils import int_or_none class PodomaticIE(InfoExtractor): IE_NAME = 'podomatic' - _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)' + _VALID_URL = r'''(?x) + (?P<proto>https?):// + (?: + (?P<channel>[^.]+)\.podomatic\.com/entry| + (?:www\.)?podomatic\.com/podcasts/(?P<channel_2>[^/]+)/episodes + )/ + (?P<id>[^/?#&]+) + ''' - _TESTS = [ - { - 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00', - 'md5': '84bb855fcf3429e6bf72460e1eed782d', - 'info_dict': { - 'id': '2009-01-02T16_03_35-08_00', - 'ext': 'mp3', - 'uploader': 'Science Teaching Tips', - 'uploader_id': 'scienceteachingtips', - 'title': '64. When the Moon Hits Your Eye', - 'duration': 446, - } - }, - { - 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00', - 'md5': 'd2cf443931b6148e27638650e2638297', - 'info_dict': { - 'id': '2013-11-15T16_31_21-08_00', - 'ext': 'mp3', - 'uploader': 'Ostbahnhof / Techno Mix', - 'uploader_id': 'ostbahnhof', - 'title': 'Einunddreizig', - 'duration': 3799, - } - }, - ] + _TESTS = [{ + 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00', + 'md5': '84bb855fcf3429e6bf72460e1eed782d', + 'info_dict': { + 'id': '2009-01-02T16_03_35-08_00', + 'ext': 'mp3', + 'uploader': 'Science Teaching Tips', + 'uploader_id': 'scienceteachingtips', + 'title': '64. When the Moon Hits Your Eye', + 'duration': 446, + } + }, { + 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00', + 'md5': 'd2cf443931b6148e27638650e2638297', + 'info_dict': { + 'id': '2013-11-15T16_31_21-08_00', + 'ext': 'mp3', + 'uploader': 'Ostbahnhof / Techno Mix', + 'uploader_id': 'ostbahnhof', + 'title': 'Einunddreizig', + 'duration': 3799, + } + }, { + 'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - channel = mobj.group('channel') + channel = mobj.group('channel') or mobj.group('channel_2') json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' + '?permalink=true&rtmp=0') % -- cgit v1.1 From f172c86dcdb46e484afc63732db56df5633028ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 21:17:55 +0700 Subject: [vlive:channel] Limit number of videos per page to 100 (closes #13830) --- youtube_dl/extractor/vlive.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 77c120a..64d0224 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -236,7 +236,12 @@ class VLiveChannelIE(InfoExtractor): query={ 'app_id': app_id, 'channelSeq': channel_seq, - 'maxNumOfRows': 1000, + # Large values of maxNumOfRows (~300 or above) may cause + # empty responses (see [1]), e.g. this happens for [2] that + # has more than 300 videos. + # 1. https://github.com/rg3/youtube-dl/issues/13830 + # 2. http://channels.vlive.tv/EDBF. + 'maxNumOfRows': 100, '_': int(time.time()), 'pageNo': page_num } -- cgit v1.1 From daaaf5f5942252e9fbc367957bd3b8a96d0dd5bb Mon Sep 17 00:00:00 2001 From: Ashutosh Chaudhary <ashutosh.chaudhary@gmail.com> Date: Mon, 23 Jan 2017 05:12:52 +0530 Subject: [voot] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/voot.py | 55 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/voot.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d0e04dd..48dda8b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1333,3 +1333,4 @@ from .zapiks import ZapiksIE from .zaq1 import Zaq1IE from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE +from .voot import VootIE diff --git a/youtube_dl/extractor/voot.py b/youtube_dl/extractor/voot.py new file mode 100644 index 0000000..db5bda6 --- /dev/null +++ b/youtube_dl/extractor/voot.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class VootIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/(?:.+?[/-]?)/1/(?:.+?[0-9]?)/(?:.+?[/-]?)/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353', + 'info_dict': { + 'id': '441353', + 'ext': 'mp4', + 'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340', + 'thumbnail': r're:^https?://.*\.jpg$', + } + } + + _GET_CONTENT_TEMPLATE = 'https://wapi.voot.com/ws/ott/getMediaInfo.json?platform=Web&pId=3&mediaId=%s' + + def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True): + json_data = super(VootIE, self)._download_json(url_or_request, video_id, note, fatal=fatal) + if json_data['status']['code'] != 0: + if fatal: + raise ExtractorError(json_data['status']['message']) + return None + return json_data['assets'] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + self._GET_CONTENT_TEMPLATE % video_id, + video_id) + + thumbnail = '' + formats = [] + + if video_data: + format_url = video_data.get('URL') + formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + + if video_data['Pictures']: + for picture in video_data['Pictures']: + #Get only first available thumbnail + thumbnail = picture.get('URL') + break + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_data.get('MediaName'), + 'thumbnail': thumbnail, + 'formats':formats, + } -- cgit v1.1 From e2b4808fd8ed49424deaa6d800daf0950e55ffff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Aug 2017 08:04:51 +0700 Subject: [voot] Improve extraction (#10255, closes #11814) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/voot.py | 111 +++++++++++++++++++++++++------------ 2 files changed, 78 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 48dda8b..ebe414d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1222,6 +1222,7 @@ from .vodlocker import VodlockerIE from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicerepublic import VoiceRepublicIE +from .voot import VootIE from .voxmedia import VoxMediaIE from .vporn import VpornIE from .vrt import VRTIE @@ -1333,4 +1334,3 @@ from .zapiks import ZapiksIE from .zaq1 import Zaq1IE from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE -from .voot import VootIE diff --git a/youtube_dl/extractor/voot.py b/youtube_dl/extractor/voot.py index db5bda6..5de3deb 100644 --- a/youtube_dl/extractor/voot.py +++ b/youtube_dl/extractor/voot.py @@ -2,54 +2,97 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import ( + ExtractorError, + int_or_none, + try_get, + unified_timestamp, +) class VootIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/(?:.+?[/-]?)/1/(?:.+?[0-9]?)/(?:.+?[/-]?)/(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?voot\.com/(?:[^/]+/)+(?P<id>\d+)' + _GEO_COUNTRIES = ['IN'] + _TESTS = [{ 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353', 'info_dict': { - 'id': '441353', + 'id': '0_8ledb18o', 'ext': 'mp4', 'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - _GET_CONTENT_TEMPLATE = 'https://wapi.voot.com/ws/ott/getMediaInfo.json?platform=Web&pId=3&mediaId=%s' - - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True): - json_data = super(VootIE, self)._download_json(url_or_request, video_id, note, fatal=fatal) - if json_data['status']['code'] != 0: - if fatal: - raise ExtractorError(json_data['status']['message']) - return None - return json_data['assets'] + 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1', + 'uploader_id': 'batchUser', + 'timestamp': 1472162937, + 'upload_date': '20160825', + 'duration': 1146, + 'series': 'Ishq Ka Rang Safed', + 'season_number': 1, + 'episode': 'Is this the end of Kamini?', + 'episode_number': 340, + 'view_count': int, + 'like_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + 'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925', + 'only_matching': True, + }, { + 'url': 'https://www.voot.com/movies/pandavas-5/424627', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json( - self._GET_CONTENT_TEMPLATE % video_id, - video_id) - thumbnail = '' - formats = [] + media_info = self._download_json( + 'https://wapi.voot.com/ws/ott/getMediaInfo.json', video_id, + query={ + 'platform': 'Web', + 'pId': 2, + 'mediaId': video_id, + }) + + status_code = try_get(media_info, lambda x: x['status']['code'], int) + if status_code != 0: + raise ExtractorError(media_info['status']['message'], expected=True) + + media = media_info['assets'] - if video_data: - format_url = video_data.get('URL') - formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + entry_id = media['EntryId'] + title = media['MediaName'] - if video_data['Pictures']: - for picture in video_data['Pictures']: - #Get only first available thumbnail - thumbnail = picture.get('URL') - break + description, series, season_number, episode, episode_number = [None] * 5 - self._sort_formats(formats) + for meta in try_get(media, lambda x: x['Metas'], list) or []: + key, value = meta.get('Key'), meta.get('Value') + if not key or not value: + continue + if key == 'ContentSynopsis': + description = value + elif key == 'RefSeriesTitle': + series = value + elif key == 'RefSeriesSeason': + season_number = int_or_none(value) + elif key == 'EpisodeMainTitle': + episode = value + elif key == 'EpisodeNo': + episode_number = int_or_none(value) return { - 'id': video_id, - 'title': video_data.get('MediaName'), - 'thumbnail': thumbnail, - 'formats':formats, + '_type': 'url_transparent', + 'url': 'kaltura:1982551:%s' % entry_id, + 'ie_key': KalturaIE.ie_key(), + 'title': title, + 'description': description, + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + 'timestamp': unified_timestamp(media.get('CreationDate')), + 'duration': int_or_none(media.get('Duration')), + 'view_count': int_or_none(media.get('ViewCounter')), + 'like_count': int_or_none(media.get('like_counter')), } -- cgit v1.1 From 16afce174ea71690844d37776d518ae374b896ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Aug 2017 08:18:16 +0700 Subject: [mpora] Remove extractor (closes #13826) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/mpora.py | 62 -------------------------------------- 2 files changed, 63 deletions(-) delete mode 100644 youtube_dl/extractor/mpora.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ebe414d..897557f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -584,7 +584,6 @@ from .mixcloud import ( ) from .mlb import MLBIE from .mnet import MnetIE -from .mpora import MporaIE from .moevideo import MoeVideoIE from .mofosex import MofosexIE from .mojvideo import MojvideoIE diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py deleted file mode 100644 index 5a1bee5..0000000 --- a/youtube_dl/extractor/mpora.py +++ /dev/null @@ -1,62 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import int_or_none - - -class MporaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)' - IE_NAME = 'MPORA' - - _TEST = { - 'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de', - 'md5': 'a7a228473eedd3be741397cf452932eb', - 'info_dict': { - 'id': 'AAdo8okx4wiz', - 'ext': 'mp4', - 'title': 'Katy Curd - Winter in the Forest', - 'duration': 416, - 'uploader': 'Peter Newman Media', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - data_json = self._search_regex( - [r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", - r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"], - webpage, 'json') - data = self._parse_json(data_json, video_id) - - uploader = data['info_overlay'].get('username') - duration = data['video']['duration'] // 1000 - thumbnail = data['video']['encodings']['sd']['poster'] - title = data['info_overlay']['title'] - - formats = [] - for encoding_id, edata in data['video']['encodings'].items(): - for src in edata['sources']: - width_str = self._search_regex( - r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'], - False, default=None) - vcodec = src['type'].partition('/')[2] - - formats.append({ - 'format_id': encoding_id + '-' + vcodec, - 'url': src['src'], - 'vcodec': vcodec, - 'width': int_or_none(width_str), - }) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'uploader': uploader, - 'duration': duration, - 'thumbnail': thumbnail, - } -- cgit v1.1 From fac188c6954edcccf3104abc3ac0155125a7d427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Aug 2017 08:44:28 +0700 Subject: [pluralsight] Fix format selection --- youtube_dl/extractor/pluralsight.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index e45d9fe..d35f54c 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -224,6 +224,7 @@ class PluralsightIE(PluralsightBaseIE): req_format_split = req_format.split('-', 1) if len(req_format_split) > 1: req_ext, req_quality = req_format_split + req_quality = '-'.join(req_quality.split('-')[:2]) for allowed_quality in ALLOWED_QUALITIES: if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities: return (AllowedQuality(req_ext, (req_quality, )), ) -- cgit v1.1 From 92740e42414cb47f785daf257b9726fa361977b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Aug 2017 09:02:14 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4f03ef0..e251586 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +version <unreleased> + +Core +* Use relative paths for DASH fragments (#12990) + +Extractors +* [pluralsight] Fix format selection +- [mpora] Remove extractor (#13826) ++ [voot] Add support for voot.com (#10255, #11644, #11814, #12350, #13218) +* [vlive:channel] Limit number of videos per page to 100 (#13830) +* [podomatic] Extend URL regular expression (#13827) +* [cinchcast] Extend URL regular expression +* [yandexdisk] Relax URL regular expression (#13824) +* [vidme] Extract DASH and HLS formats +- [teamfour] Remove extractor (#13782) +* [pornhd] Fix extraction (#13783) +* [udemy] Fix subtitles extraction (#13812) +* [mlb] Extend URL regular expression (#13740, #13773) ++ [pbs] Add support for new URL schema (#13801) +* [nrktv] Update API host (#13796) + + version 2017.07.30.1 Core -- cgit v1.1 From 903a183b6adc60808f04294a7003b6d4bd250304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Aug 2017 09:05:36 +0700 Subject: release 2017.08.06 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +-- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0421de7..5b72032 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.30.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.06** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.07.30.1 +[debug] youtube-dl version 2017.08.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e251586..18893bb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.08.06 Core * Use relative paths for DASH fragments (#12990) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 77aac82..a3bd077 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -472,7 +472,6 @@ - **MovieFap** - **Moviezine** - **MovingImage** - - **MPORA** - **MSN** - **mtg**: MTG services - **mtv** @@ -783,7 +782,6 @@ - **teachertube:user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** - - **TeamFourStar** - **TechTalks** - **techtv.mit.edu** - **ted** @@ -953,6 +951,7 @@ - **VODPl** - **VODPlatform** - **VoiceRepublic** + - **Voot** - **VoxMedia** - **Vporn** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3816215..11d3bf2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.07.30.1' +__version__ = '2017.08.06' -- cgit v1.1 From 463e7216c87814edf1453aa3a5bfad89474ba6b1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 6 Aug 2017 23:07:06 +0800 Subject: [niconico] Support HTML5-only videos (closes #13806) --- ChangeLog | 6 ++ youtube_dl/extractor/niconico.py | 130 ++++++++++++++++++++++++++------------- 2 files changed, 93 insertions(+), 43 deletions(-) diff --git a/ChangeLog b/ChangeLog index 18893bb..7cd385e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [niconico] Support HTML5-only videos (#13806) + + version 2017.08.06 Core diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 79b9952..b13dc00 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -11,10 +11,13 @@ from ..compat import ( ) from ..utils import ( determine_ext, + dict_get, ExtractorError, int_or_none, parse_duration, parse_iso8601, + try_get, + unified_timestamp, urlencode_postdata, xpath_text, ) @@ -31,12 +34,15 @@ class NiconicoIE(InfoExtractor): 'id': 'sm22312215', 'ext': 'mp4', 'title': 'Big Buck Bunny', + 'thumbnail': r're:https?://.*', 'uploader': 'takuya0301', 'uploader_id': '2698420', 'upload_date': '20131123', 'timestamp': 1385182762, 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', 'duration': 33, + 'view_count': int, + 'comment_count': int, }, 'skip': 'Requires an account', }, { @@ -48,6 +54,7 @@ class NiconicoIE(InfoExtractor): 'ext': 'swf', 'title': '【鏡音リン】Dance on media【オリジナル】take2!', 'description': 'md5:689f066d74610b3b22e0f1739add0f58', + 'thumbnail': r're:https?://.*', 'uploader': 'りょうた', 'uploader_id': '18822557', 'upload_date': '20110429', @@ -64,9 +71,11 @@ class NiconicoIE(InfoExtractor): 'ext': 'unknown_video', 'description': 'deleted', 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', + 'thumbnail': r're:https?://.*', 'upload_date': '20071224', 'timestamp': int, # timestamp field has different value if logged in 'duration': 304, + 'view_count': int, }, 'skip': 'Requires an account', }, { @@ -76,6 +85,7 @@ class NiconicoIE(InfoExtractor): 'ext': 'mp4', 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~', 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1', + 'thumbnail': r're:https?://.*', 'timestamp': 1388851200, 'upload_date': '20140104', 'uploader': 'アニメロチャンネル', @@ -83,6 +93,24 @@ class NiconicoIE(InfoExtractor): }, 'skip': 'The viewing period of the video you were searching for has expired.', }, { + # video not available via `getflv` + 'url': 'http://www.nicovideo.jp/watch/sm1151009', + 'info_dict': { + 'id': 'sm1151009', + 'ext': 'flv', + 'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)', + 'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7', + 'thumbnail': r're:https?://.*', + 'duration': 184, + 'timestamp': 1190868283, + 'upload_date': '20070927', + 'uploader': 'denden2', + 'uploader_id': '1392194', + 'view_count': int, + 'comment_count': int, + }, + 'skip': 'Requires an account', + }, { 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'only_matching': True, }] @@ -130,33 +158,51 @@ class NiconicoIE(InfoExtractor): if video_id.startswith('so'): video_id = self._match_id(handle.geturl()) - video_info = self._download_xml( - 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, - note='Downloading video info page') - - # Get flv info - flv_info_webpage = self._download_webpage( - 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', - video_id, 'Downloading flv info') - - flv_info = compat_urlparse.parse_qs(flv_info_webpage) - if 'url' not in flv_info: - if 'deleted' in flv_info: - raise ExtractorError('The video has been deleted.', - expected=True) - elif 'closed' in flv_info: - raise ExtractorError('Niconico videos now require logging in', - expected=True) - elif 'error' in flv_info: - raise ExtractorError('%s reports error: %s' % ( - self.IE_NAME, flv_info['error'][0]), expected=True) - else: - raise ExtractorError('Unable to find video URL') - - video_real_url = flv_info['url'][0] + api_data = self._parse_json(self._html_search_regex( + 'data-api-data="([^"]+)"', webpage, + 'API data', default='{}'), video_id) + video_real_url = try_get( + api_data, lambda x: x['video']['smileInfo']['url']) + + if video_real_url: + def get_video_info(items): + return dict_get(api_data['video'], items) + else: + # Get flv info + flv_info_webpage = self._download_webpage( + 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', + video_id, 'Downloading flv info') + + flv_info = compat_urlparse.parse_qs(flv_info_webpage) + if 'url' not in flv_info: + if 'deleted' in flv_info: + raise ExtractorError('The video has been deleted.', + expected=True) + elif 'closed' in flv_info: + raise ExtractorError('Niconico videos now require logging in', + expected=True) + elif 'error' in flv_info: + raise ExtractorError('%s reports error: %s' % ( + self.IE_NAME, flv_info['error'][0]), expected=True) + else: + raise ExtractorError('Unable to find video URL') + + video_real_url = flv_info['url'][0] + + video_info_xml = self._download_xml( + 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, + video_id, note='Downloading video info page') + + def get_video_info(items): + if not isinstance(items, list): + items = [items] + for item in items: + ret = xpath_text(video_info_xml, './/' + item) + if ret: + return ret # Start extracting information - title = xpath_text(video_info, './/title') + title = get_video_info('title') if not title: title = self._og_search_title(webpage, default=None) if not title: @@ -170,18 +216,19 @@ class NiconicoIE(InfoExtractor): watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {} video_detail = watch_api_data.get('videoDetail', {}) - extension = xpath_text(video_info, './/movie_type') + extension = get_video_info(['movie_type', 'movieType']) if not extension: extension = determine_ext(video_real_url) thumbnail = ( - xpath_text(video_info, './/thumbnail_url') or + get_video_info(['thumbnail_url', 'thumbnailURL']) or self._html_search_meta('image', webpage, 'thumbnail', default=None) or video_detail.get('thumbnail')) - description = xpath_text(video_info, './/description') + description = get_video_info('description') - timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve')) + timestamp = (parse_iso8601(get_video_info('first_retrieve')) or + unified_timestamp(get_video_info('postedDateTime'))) if not timestamp: match = self._html_search_meta('datePublished', webpage, 'date published', default=None) if match: @@ -191,7 +238,7 @@ class NiconicoIE(InfoExtractor): video_detail['postedAt'].replace('/', '-'), delimiter=' ', timezone=datetime.timedelta(hours=9)) - view_count = int_or_none(xpath_text(video_info, './/view_counter')) + view_count = int_or_none(get_video_info(['view_counter', 'viewCount'])) if not view_count: match = self._html_search_regex( r'>Views: <strong[^>]*>([^<]+)</strong>', @@ -200,31 +247,28 @@ class NiconicoIE(InfoExtractor): view_count = int_or_none(match.replace(',', '')) view_count = view_count or video_detail.get('viewCount') - comment_count = int_or_none(xpath_text(video_info, './/comment_num')) + comment_count = (int_or_none(get_video_info('comment_num')) or + video_detail.get('commentCount') or + try_get(api_data, lambda x: x['thread']['commentCount'])) if not comment_count: match = self._html_search_regex( r'>Comments: <strong[^>]*>([^<]+)</strong>', webpage, 'comment count', default=None) if match: comment_count = int_or_none(match.replace(',', '')) - comment_count = comment_count or video_detail.get('commentCount') duration = (parse_duration( - xpath_text(video_info, './/length') or + get_video_info('length') or self._html_search_meta( 'video:duration', webpage, 'video duration', default=None)) or - video_detail.get('length')) + video_detail.get('length') or + get_video_info('duration')) - webpage_url = xpath_text(video_info, './/watch_url') or url + webpage_url = get_video_info('watch_url') or url - if video_info.find('.//ch_id') is not None: - uploader_id = video_info.find('.//ch_id').text - uploader = video_info.find('.//ch_name').text - elif video_info.find('.//user_id') is not None: - uploader_id = video_info.find('.//user_id').text - uploader = video_info.find('.//user_nickname').text - else: - uploader_id = uploader = None + owner = api_data.get('owner', {}) + uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id') + uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname') return { 'id': video_id, -- cgit v1.1 From ee6a611665a1ee8583ce84bd9d36d03b6f697895 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 7 Aug 2017 00:19:46 +0800 Subject: [niconico] Support videos with multiple formats (closes #13522) --- ChangeLog | 1 + youtube_dl/extractor/niconico.py | 161 ++++++++++++++++++++++++++++++++++----- 2 files changed, 145 insertions(+), 17 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7cd385e..084e98c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [niconico] Support videos with multiple formats (#13522) + [niconico] Support HTML5-only videos (#13806) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index b13dc00..026329d 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -14,8 +14,10 @@ from ..utils import ( dict_get, ExtractorError, int_or_none, + float_or_none, parse_duration, parse_iso8601, + remove_start, try_get, unified_timestamp, urlencode_postdata, @@ -93,11 +95,12 @@ class NiconicoIE(InfoExtractor): }, 'skip': 'The viewing period of the video you were searching for has expired.', }, { - # video not available via `getflv` + # video not available via `getflv`; "old" HTML5 video 'url': 'http://www.nicovideo.jp/watch/sm1151009', + 'md5': '8fa81c364eb619d4085354eab075598a', 'info_dict': { 'id': 'sm1151009', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)', 'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7', 'thumbnail': r're:https?://.*', @@ -111,6 +114,25 @@ class NiconicoIE(InfoExtractor): }, 'skip': 'Requires an account', }, { + # "New" HTML5 video + 'url': 'http://www.nicovideo.jp/watch/sm31464864', + 'md5': '351647b4917660986dc0fa8864085135', + 'info_dict': { + 'id': 'sm31464864', + 'ext': 'mp4', + 'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質', + 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb', + 'timestamp': 1498514060, + 'upload_date': '20170626', + 'uploader': 'ゲス', + 'uploader_id': '40826363', + 'thumbnail': r're:https?://.*', + 'duration': 198, + 'view_count': int, + 'comment_count': int, + }, + 'skip': 'Requires an account', + }, { 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'only_matching': True, }] @@ -147,6 +169,84 @@ class NiconicoIE(InfoExtractor): self._downloader.report_warning('unable to log in: bad username or password') return login_ok + def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality): + def yesno(boolean): + return 'yes' if boolean else 'no' + + session_api_data = api_data['video']['dmcInfo']['session_api'] + session_api_endpoint = session_api_data['urls'][0] + + format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality])) + + session_response = self._download_json( + session_api_endpoint['url'], video_id, + query={'_format': 'json'}, + headers={'Content-Type': 'application/json'}, + note='Downloading JSON metadata for %s' % format_id, + data=json.dumps({ + 'session': { + 'client_info': { + 'player_id': session_api_data['player_id'], + }, + 'content_auth': { + 'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]], + 'content_key_timeout': session_api_data['content_key_timeout'], + 'service_id': 'nicovideo', + 'service_user_id': session_api_data['service_user_id'] + }, + 'content_id': session_api_data['content_id'], + 'content_src_id_sets': [{ + 'content_src_ids': [{ + 'src_id_to_mux': { + 'audio_src_ids': [audio_quality['id']], + 'video_src_ids': [video_quality['id']], + } + }] + }], + 'content_type': 'movie', + 'content_uri': '', + 'keep_method': { + 'heartbeat': { + 'lifetime': session_api_data['heartbeat_lifetime'] + } + }, + 'priority': session_api_data['priority'], + 'protocol': { + 'name': 'http', + 'parameters': { + 'http_parameters': { + 'parameters': { + 'http_output_download_parameters': { + 'use_ssl': yesno(session_api_endpoint['is_ssl']), + 'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']), + } + } + } + } + }, + 'recipe_id': session_api_data['recipe_id'], + 'session_operation_auth': { + 'session_operation_auth_by_signature': { + 'signature': session_api_data['signature'], + 'token': session_api_data['token'], + } + }, + 'timing_constraint': 'unlimited' + } + })) + + resolution = video_quality.get('resolution', {}) + + return { + 'url': session_response['data']['session']['content_uri'], + 'format_id': format_id, + 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4 + 'abr': float_or_none(audio_quality.get('bitrate'), 1000), + 'vbr': float_or_none(video_quality.get('bitrate'), 1000), + 'height': resolution.get('height'), + 'width': resolution.get('width'), + } + def _real_extract(self, url): video_id = self._match_id(url) @@ -161,13 +261,13 @@ class NiconicoIE(InfoExtractor): api_data = self._parse_json(self._html_search_regex( 'data-api-data="([^"]+)"', webpage, 'API data', default='{}'), video_id) - video_real_url = try_get( - api_data, lambda x: x['video']['smileInfo']['url']) - if video_real_url: - def get_video_info(items): - return dict_get(api_data['video'], items) - else: + def _format_id_from_url(video_url): + return 'economy' if video_real_url.endswith('low') else 'normal' + + try: + video_real_url = api_data['video']['smileInfo']['url'] + except KeyError: # Flash videos # Get flv info flv_info_webpage = self._download_webpage( 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', @@ -187,8 +287,6 @@ class NiconicoIE(InfoExtractor): else: raise ExtractorError('Unable to find video URL') - video_real_url = flv_info['url'][0] - video_info_xml = self._download_xml( 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, note='Downloading video info page') @@ -201,6 +299,41 @@ class NiconicoIE(InfoExtractor): if ret: return ret + video_real_url = flv_info['url'][0] + + extension = get_video_info('movie_type') + if not extension: + extension = determine_ext(video_real_url) + + formats = [{ + 'url': video_real_url, + 'ext': extension, + 'format_id': _format_id_from_url(video_real_url), + }] + else: + formats = [] + + dmc_info = api_data['video'].get('dmcInfo') + if dmc_info: # "New" HTML5 videos + quality_info = dmc_info['quality'] + for audio_quality in quality_info['audios']: + for video_quality in quality_info['videos']: + if not audio_quality['available'] or not video_quality['available']: + continue + formats.append(self._extract_format_for_quality( + api_data, video_id, audio_quality, video_quality)) + + self._sort_formats(formats) + else: # "Old" HTML5 videos + formats = [{ + 'url': video_real_url, + 'ext': 'mp4', + 'format_id': _format_id_from_url(video_real_url), + }] + + def get_video_info(items): + return dict_get(api_data['video'], items) + # Start extracting information title = get_video_info('title') if not title: @@ -216,10 +349,6 @@ class NiconicoIE(InfoExtractor): watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {} video_detail = watch_api_data.get('videoDetail', {}) - extension = get_video_info(['movie_type', 'movieType']) - if not extension: - extension = determine_ext(video_real_url) - thumbnail = ( get_video_info(['thumbnail_url', 'thumbnailURL']) or self._html_search_meta('image', webpage, 'thumbnail', default=None) or @@ -272,10 +401,8 @@ class NiconicoIE(InfoExtractor): return { 'id': video_id, - 'url': video_real_url, 'title': title, - 'ext': extension, - 'format_id': 'economy' if video_real_url.endswith('low') else 'normal', + 'formats': formats, 'thumbnail': thumbnail, 'description': description, 'uploader': uploader, -- cgit v1.1 From 15d1e8a23dbaa28635cae30ff6c5cfb095b4c7c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 7 Aug 2017 22:43:42 +0700 Subject: [dplayit] Fix extraction (closes #13851) --- youtube_dl/extractor/dplay.py | 66 +++++++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 1a41760..76e7841 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -7,16 +7,18 @@ import time from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_HTTPError, + compat_str, + compat_urlparse, ) from ..utils import ( - USER_AGENTS, ExtractorError, int_or_none, - unified_strdate, remove_end, + try_get, + unified_strdate, update_url_query, + USER_AGENTS, ) @@ -183,28 +185,44 @@ class DPlayItIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - info_url = self._search_regex( - r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)', - webpage, 'video id') - title = remove_end(self._og_search_title(webpage), ' | Dplay') - try: - info = self._download_json( - info_url, display_id, headers={ - 'Authorization': 'Bearer %s' % self._get_cookies(url).get( - 'dplayit_token').value, - 'Referer': url, - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): - info = self._parse_json(e.cause.read().decode('utf-8'), display_id) - error = info['errors'][0] - if error.get('code') == 'access.denied.geoblocked': - self.raise_geo_restricted( - msg=error.get('detail'), countries=self._GEO_COUNTRIES) - raise ExtractorError(info['errors'][0]['detail'], expected=True) - raise + video_id = None + + info = self._search_regex( + r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")', + webpage, 'playback JSON', default=None) + if info: + for _ in range(2): + info = self._parse_json(info, display_id, fatal=False) + if not info: + break + else: + video_id = try_get(info, lambda x: x['data']['id']) + + if not info: + info_url = self._search_regex( + r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)', + webpage, 'info url') + + video_id = info_url.rpartition('/')[-1] + + try: + info = self._download_json( + info_url, display_id, headers={ + 'Authorization': 'Bearer %s' % self._get_cookies(url).get( + 'dplayit_token').value, + 'Referer': url, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): + info = self._parse_json(e.cause.read().decode('utf-8'), display_id) + error = info['errors'][0] + if error.get('code') == 'access.denied.geoblocked': + self.raise_geo_restricted( + msg=error.get('detail'), countries=self._GEO_COUNTRIES) + raise ExtractorError(info['errors'][0]['detail'], expected=True) + raise hls_url = info['data']['attributes']['streaming']['hls']['url'] @@ -230,7 +248,7 @@ class DPlayItIE(InfoExtractor): season_number = episode_number = upload_date = None return { - 'id': info_url.rpartition('/')[-1], + 'id': compat_str(video_id or display_id), 'display_id': display_id, 'title': title, 'description': self._og_search_description(webpage), -- cgit v1.1 From 4bf22f7a1014c55e3358b5a419945071b152eafc Mon Sep 17 00:00:00 2001 From: Alex Seiler <seileralex@gmail.com> Date: Tue, 8 Aug 2017 00:41:38 +0200 Subject: [20min] Fix embeds extraction --- youtube_dl/extractor/twentymin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index 4fd1aa4..a42977f 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -50,7 +50,7 @@ class TwentyMinutenIE(InfoExtractor): @staticmethod def _extract_urls(webpage): return [m.group('url') for m in re.finditer( - r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1', + r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1', webpage)] def _real_extract(self, url): -- cgit v1.1 From 5b232f46dcbdc805507c02edd4fd598f31d544d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 9 Aug 2017 22:28:19 +0700 Subject: [utils] Skip missing params in cli_bool_option (closes #13865) --- test/test_utils.py | 4 ++++ youtube_dl/utils.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 7803e5b..2aab16b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1182,6 +1182,10 @@ part 3</font></u> cli_bool_option( {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), ['--check-certificate=true']) + self.assertEqual( + cli_bool_option( + {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), + []) def test_ohdave_rsa_encrypt(self): N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index fdf5e29..c9cbd58 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2733,6 +2733,8 @@ def cli_option(params, command_option, param): def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None): param = params.get(param) + if param is None: + return [] assert isinstance(param, bool) if separator: return [command_option + separator + (true_value if param else false_value)] -- cgit v1.1 From 5b3ddadcc3012c4ef390a7cf70dbcb8b83f07428 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 9 Aug 2017 22:55:13 +0700 Subject: [mixcloud] Fix play info decryption (closes #13867) --- youtube_dl/extractor/mixcloud.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 0efbe66..40cd2e3 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -54,15 +54,22 @@ class MixcloudIE(InfoExtractor): }] # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js - @staticmethod - def _decrypt_play_info(play_info): - KEY = 'pleasedontdownloadourmusictheartistswontgetpaid' - + def _decrypt_play_info(self, play_info, video_id): + KEYS = ( + 'pleasedontdownloadourmusictheartistswontgetpaid', + '(function() { return new Date().toLocaleDateString(); })()' + ) play_info = base64.b64decode(play_info.encode('ascii')) - - return ''.join([ - compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)])) - for idx, ch in enumerate(play_info)]) + for num, key in enumerate(KEYS, start=1): + try: + return self._parse_json( + ''.join([ + compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)])) + for idx, ch in enumerate(play_info)]), + video_id) + except ExtractorError: + if num == len(KEYS): + raise def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -78,8 +85,8 @@ class MixcloudIE(InfoExtractor): encrypted_play_info = self._search_regex( r'm-play-info="([^"]+)"', webpage, 'play info') - play_info = self._parse_json( - self._decrypt_play_info(encrypted_play_info), track_id) + + play_info = self._decrypt_play_info(encrypted_play_info, track_id) if message and 'stream_url' not in play_info: raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) -- cgit v1.1 From dee04d24a422c0ea5586d2f1d1f97f1e3e4ecf70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 9 Aug 2017 23:12:02 +0700 Subject: [nick] Add support for nick.com.pl (closes #13860) --- youtube_dl/extractor/nick.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index b688637..510b1c4 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -75,7 +75,7 @@ class NickIE(MTVServicesInfoExtractor): class NickDeIE(MTVServicesInfoExtractor): IE_NAME = 'nick.de' - _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', 'only_matching': True, @@ -88,6 +88,9 @@ class NickDeIE(MTVServicesInfoExtractor): }, { 'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht', 'only_matching': True, + }, { + 'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom', + 'only_matching': True, }] def _extract_mrss_url(self, webpage, host): -- cgit v1.1 From baba5f4d1daa29c42b2ad56c06e3880f10b7b03d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 9 Aug 2017 23:46:49 +0700 Subject: [xxxymovies] Fix title extraction (closes #13868) --- youtube_dl/extractor/xxxymovies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xxxymovies.py b/youtube_dl/extractor/xxxymovies.py index 5c8f17e..e34ebe3 100644 --- a/youtube_dl/extractor/xxxymovies.py +++ b/youtube_dl/extractor/xxxymovies.py @@ -39,8 +39,8 @@ class XXXYMoviesIE(InfoExtractor): r"video_url\s*:\s*'([^']+)'", webpage, 'video URL') title = self._html_search_regex( - [r'<div class="block_header">\s*<h1>([^<]+)</h1>', - r'<title>(.*?)\s*-\s*XXXYMovies\.com'], + [r']+\bclass="block_header"[^>]*>\s*

    ([^<]+)<', + r'(.*?)\s*-\s*(?:XXXYMovies\.com|XXX\s+Movies)'], webpage, 'title') thumbnail = self._search_regex( -- cgit v1.1 From 0e7dfa7d16e9e013bfaa085a59b9bbe4b4d1dfb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 9 Aug 2017 23:49:53 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 084e98c..7c8eb92 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,14 @@ version +Core +* [utils] Skip missing params in cli_bool_option (#13865) + Extractors +* [xxxymovies] Fix title extraction (#13868) ++ [nick] Add support for nick.com.pl (#13860) +* [mixcloud] Fix play info decryption (#13867) +* [20min] Fix embeds extraction (#13852) +* [dplayit] Fix extraction (#13851) + [niconico] Support videos with multiple formats (#13522) + [niconico] Support HTML5-only videos (#13806) -- cgit v1.1 From 6ed99754bb6074454b5d4875cc7b8b442e763ec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 9 Aug 2017 23:52:22 +0700 Subject: release 2017.08.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 5b72032..7ee704e 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.06** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.06 +[debug] youtube-dl version 2017.08.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 7c8eb92..b28ade4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.08.09 Core * [utils] Skip missing params in cli_bool_option (#13865) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 11d3bf2..0221723 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.06' +__version__ = '2017.08.09' -- cgit v1.1 From 41918eaa5ce1225f7e0a94882e7c77919342210d Mon Sep 17 00:00:00 2001 From: tetra-eder <30865771+tetra-eder@users.noreply.github.com> Date: Fri, 11 Aug 2017 17:00:39 +0200 Subject: [generic] Add support for vzaar embeds --- youtube_dl/extractor/generic.py | 17 +++++++++++++++++ youtube_dl/extractor/vzaar.py | 8 ++++++++ 2 files changed, 25 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 34e8149..51acead 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -98,6 +98,7 @@ from .wistia import WistiaIE from .mediaset import MediasetIE from .joj import JojIE from .megaphone import MegaphoneIE +from .vzaar import VzaarIE class GenericIE(InfoExtractor): @@ -1840,6 +1841,16 @@ class GenericIE(InfoExtractor): 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', }, }, + { + # vzaar embed + 'url': 'http://www.xruniversity.com/bdsm-lets-begin-melissa-moore/', + 'md5': 'cddc9fb8a8644a0a7742149eee95080b', + 'info_dict': { + 'id': '11002506', + 'ext': 'mp4', + 'title': 'XR-U SHOW: Ready Player Fuck - EP. 61', + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2781,6 +2792,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key()) + # Look for vzaar embeds + vzaar_urls = VzaarIE._extract_urls(webpage) + if vzaar_urls: + return self.playlist_from_matches( + vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key()) + # Look for Rutube embeds rutube_urls = RutubeIE._extract_urls(webpage) if rutube_urls: diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index b270f08..02fcd52 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -28,6 +30,12 @@ class VzaarIE(InfoExtractor): }, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)', + webpage) + def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( -- cgit v1.1 From 1663bd6e1c11bf6cbf290fcbbf12358207570faf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 11 Aug 2017 22:02:00 +0700 Subject: [generic] Replace vzaar embed test --- youtube_dl/extractor/generic.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 51acead..8362d9a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1843,12 +1843,12 @@ class GenericIE(InfoExtractor): }, { # vzaar embed - 'url': 'http://www.xruniversity.com/bdsm-lets-begin-melissa-moore/', - 'md5': 'cddc9fb8a8644a0a7742149eee95080b', + 'url': 'http://help.vzaar.com/article/165-embedding-video', + 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4', 'info_dict': { - 'id': '11002506', + 'id': '8707641', 'ext': 'mp4', - 'title': 'XR-U SHOW: Ready Player Fuck - EP. 61', + 'title': 'Building A Business Online: Principal Chairs Q & A', }, }, # { @@ -2792,12 +2792,6 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key()) - # Look for vzaar embeds - vzaar_urls = VzaarIE._extract_urls(webpage) - if vzaar_urls: - return self.playlist_from_matches( - vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key()) - # Look for Rutube embeds rutube_urls = RutubeIE._extract_urls(webpage) if rutube_urls: @@ -2828,6 +2822,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key()) + # Look for vzaar embeds + vzaar_urls = VzaarIE._extract_urls(webpage) + if vzaar_urls: + return self.playlist_from_matches( + vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): -- cgit v1.1 From 92a5c415328953851d0a6b7893de5387a1b7b469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 16:30:50 +0700 Subject: [mixcloud] Fix play info decryption (closes #13885) --- youtube_dl/extractor/mixcloud.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 40cd2e3..52f7428 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -57,7 +57,8 @@ class MixcloudIE(InfoExtractor): def _decrypt_play_info(self, play_info, video_id): KEYS = ( 'pleasedontdownloadourmusictheartistswontgetpaid', - '(function() { return new Date().toLocaleDateString(); })()' + 'window.addEventListener = window.addEventListener || function() {};', + '(function() { return new Date().toLocaleDateString(); })()', ) play_info = base64.b64decode(play_info.encode('ascii')) for num, key in enumerate(KEYS, start=1): -- cgit v1.1 From 82889d4ae517640df217f99e7744002e0deba47a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 16:48:11 +0700 Subject: [extractor/common] Respect source's type attribute for HTML5 media (closes #13892) --- youtube_dl/extractor/common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 459e7ff..4d61275 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2114,9 +2114,9 @@ class InfoExtractor(object): return f return {} - def _media_formats(src, cur_media_type): + def _media_formats(src, type_info, cur_media_type): full_url = absolute_url(src) - ext = determine_ext(full_url) + ext = type_info.get('ext') or determine_ext(full_url) if ext == 'm3u8': is_plain_url = False formats = self._extract_m3u8_formats( @@ -2165,9 +2165,9 @@ class InfoExtractor(object): src = source_attributes.get('src') if not src: continue - is_plain_url, formats = _media_formats(src, media_type) + f = parse_content_type(source_attributes.get('type')) + is_plain_url, formats = _media_formats(src, f, media_type) if is_plain_url: - f = parse_content_type(source_attributes.get('type')) f.update(formats[0]) media_info['formats'].append(f) else: -- cgit v1.1 From ac8491fcca6f9c0f6c7904e1cf13953f912eeb39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 17:11:35 +0700 Subject: [extractor/common] Make _family_friendly_search optional --- youtube_dl/extractor/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4d61275..e565901 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -940,7 +940,8 @@ class InfoExtractor(object): def _family_friendly_search(self, html): # See http://schema.org/VideoObject - family_friendly = self._html_search_meta('isFamilyFriendly', html) + family_friendly = self._html_search_meta( + 'isFamilyFriendly', html, default=None) if not family_friendly: return None -- cgit v1.1 From e74e3b63e3cdb31a61af1fc21c703e912c029b96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 17:14:11 +0700 Subject: [YoutubeDL] Make sure format id is not empty --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 367ae35..df7378f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1500,7 +1500,7 @@ class YoutubeDL(object): sanitize_string_field(format, 'format_id') sanitize_numeric_fields(format) format['url'] = sanitize_url(format['url']) - if format.get('format_id') is None: + if not format.get('format_id'): format['format_id'] = compat_str(i) else: # Sanitize format_id from characters used in format selector expression -- cgit v1.1 From 70851a95c307880f016fcb6f37427a8eeae73cad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 17:18:23 +0700 Subject: [aparat] Extract all formats (closes #13887) --- youtube_dl/extractor/aparat.py | 49 ++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 025e29a..e394cb6 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -3,13 +3,13 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - ExtractorError, - HEADRequest, + int_or_none, + mimetype2ext, ) class AparatIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)' _TEST = { 'url': 'http://www.aparat.com/v/wP8On', @@ -29,30 +29,41 @@ class AparatIE(InfoExtractor): # Note: There is an easier-to-parse configuration at # http://www.aparat.com/video/video/config/videohash/%video_id # but the URL in there does not work - embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id - webpage = self._download_webpage(embed_url, video_id) - - file_list = self._parse_json(self._search_regex( - r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id) - for i, item in enumerate(file_list[0]): - video_url = item['file'] - req = HEADRequest(video_url) - res = self._request_webpage( - req, video_id, note='Testing video URL %d' % i, errnote=False) - if res: - break - else: - raise ExtractorError('No working video URLs found') + webpage = self._download_webpage( + 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, + video_id) title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') + + file_list = self._parse_json( + self._search_regex( + r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, + 'file list'), + video_id) + + formats = [] + for item in file_list[0]: + file_url = item.get('file') + if not file_url: + continue + ext = mimetype2ext(item.get('type')) + label = item.get('label') + formats.append({ + 'url': file_url, + 'ext': ext, + 'format_id': label or ext, + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', label or '', 'height', default=None)), + }) + self._sort_formats(formats) + thumbnail = self._search_regex( r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) return { 'id': video_id, 'title': title, - 'url': video_url, - 'ext': 'mp4', 'thumbnail': thumbnail, 'age_limit': self._family_friendly_search(webpage), + 'formats': formats, } -- cgit v1.1 From 868f79db41a4d81a87ef12c8bd5ef73205c9c029 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 19:24:26 +0700 Subject: [extractor/common] Fix _media_formats --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e565901..7fe8884 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2115,7 +2115,7 @@ class InfoExtractor(object): return f return {} - def _media_formats(src, type_info, cur_media_type): + def _media_formats(src, cur_media_type, type_info={}): full_url = absolute_url(src) ext = type_info.get('ext') or determine_ext(full_url) if ext == 'm3u8': @@ -2167,7 +2167,7 @@ class InfoExtractor(object): if not src: continue f = parse_content_type(source_attributes.get('type')) - is_plain_url, formats = _media_formats(src, f, media_type) + is_plain_url, formats = _media_formats(src, media_type, f) if is_plain_url: f.update(formats[0]) media_info['formats'].append(f) -- cgit v1.1 From 0c43a481b91c657643eb42f72d293f245a410c52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 23:24:46 +0700 Subject: [reddit] Add extractors (closes #13847) --- youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/reddit.py | 114 +++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 youtube_dl/extractor/reddit.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 897557f..f1a9f6e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -840,6 +840,10 @@ from .rai import ( from .rbmaradio import RBMARadioIE from .rds import RDSIE from .redbulltv import RedBullTVIE +from .reddit import ( + RedditIE, + RedditRIE, +) from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py new file mode 100644 index 0000000..01c85ee --- /dev/null +++ b/youtube_dl/extractor/reddit.py @@ -0,0 +1,114 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + float_or_none, +) + + +class RedditIE(InfoExtractor): + _VALID_URL = r'https?://v\.redd\.it/(?P[^/?#&]+)' + _TEST = { + # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ + 'url': 'https://v.redd.it/zv89llsvexdz', + 'md5': '655d06ace653ea3b87bccfb1b27ec99d', + 'info_dict': { + 'id': 'zv89llsvexdz', + 'ext': 'mp4', + 'title': 'zv89llsvexdz', + }, + 'params': { + 'format': 'bestvideo', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + formats = self._extract_m3u8_formats( + 'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id, + 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + + formats.extend(self._extract_mpd_formats( + 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, + mpd_id='dash', fatal=False)) + + return { + 'id': video_id, + 'title': video_id, + 'formats': formats, + } + + +class RedditRIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P[^/]+)' + _TESTS = [{ + 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', + 'info_dict': { + 'id': 'zv89llsvexdz', + 'ext': 'mp4', + 'title': 'That small heart attack.', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1501941939, + 'upload_date': '20170805', + 'uploader': 'Antw87', + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 0, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, { + 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', + 'only_matching': True, + }, { + # imgur + 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', + 'only_matching': True, + }, { + # streamable + 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/', + 'only_matching': True, + }, { + # youtube + 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + data = self._download_json( + url + '.json', video_id)[0]['data']['children'][0]['data'] + + video_url = data['url'] + + # Avoid recursing into the same reddit URL + if 'reddit.com/' in video_url and '/%s/' % video_id in video_url: + raise ExtractorError('No media found', expected=True) + + over_18 = data.get('over_18') + if over_18 is True: + age_limit = 18 + elif over_18 is False: + age_limit = 0 + else: + age_limit = None + + return { + '_type': 'url_transparent', + 'url': video_url, + 'title': data.get('title'), + 'thumbnail': data.get('thumbnail'), + 'timestamp': float_or_none(data.get('created_utc')), + 'uploader': data.get('author'), + 'like_count': int_or_none(data.get('ups')), + 'dislike_count': int_or_none(data.get('downs')), + 'comment_count': int_or_none(data.get('num_comments')), + 'age_limit': age_limit, + } -- cgit v1.1 From 4ef9152428c4a000cb5fc76732fc579f1f4c1d69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Aug 2017 00:58:39 +0700 Subject: [limelight] Improve embeds detection (closes #13895) --- youtube_dl/extractor/limelight.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 0a5a395..ad65b27 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -26,14 +26,16 @@ class LimelightBaseIE(InfoExtractor): 'Channel': 'channel', 'ChannelList': 'channel_list', } + + def smuggle(url): + return smuggle_url(url, {'source_url': source_url}) + entries = [] for kind, video_id in re.findall( r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P[a-z0-9]{32})', webpage): entries.append(cls.url_result( - smuggle_url( - 'limelight:%s:%s' % (lm[kind], video_id), - {'source_url': source_url}), + smuggle('limelight:%s:%s' % (lm[kind], video_id)), 'Limelight%s' % kind, video_id)) for mobj in re.finditer( # As per [1] class attribute should be exactly equal to @@ -49,10 +51,15 @@ class LimelightBaseIE(InfoExtractor): ''', webpage): kind, video_id = mobj.group('kind'), mobj.group('id') entries.append(cls.url_result( - smuggle_url( - 'limelight:%s:%s' % (kind, video_id), - {'source_url': source_url}), + smuggle('limelight:%s:%s' % (kind, video_id)), 'Limelight%s' % kind.capitalize(), video_id)) + # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page) + for video_id in re.findall( + r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P[a-z0-9]{32})', + webpage): + entries.append(cls.url_result( + smuggle('limelight:media:%s' % video_id), + LimelightMediaIE.ie_key(), video_id)) return entries def _call_playlist_service(self, item_id, method, fatal=True, referer=None): -- cgit v1.1 From eb02940cc7dc2233f2d7873c12165245a3c3c14e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Aug 2017 01:11:27 +0700 Subject: [generic] Add test for #13895 --- youtube_dl/extractor/generic.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8362d9a..eff5fbf 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1786,6 +1786,21 @@ class GenericIE(InfoExtractor): 'playlist_mincount': 5, }, { + # Limelight embed (LimelightPlayerUtil.embed) + 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri', + 'info_dict': { + 'id': '95d035dc5c8a401588e9c0e6bd1e9c92', + 'ext': 'mp4', + 'title': '07448641', + 'timestamp': 1499890639, + 'upload_date': '20170712', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['LimelightMedia'], + }, + { 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/', 'info_dict': { 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest', -- cgit v1.1 From b3c6515365ed415bbf813c0c2e6c12585824b77a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Aug 2017 07:23:29 +0700 Subject: [fourtube] Add support for other sites (closes #6022, closes #7859, closes #13901) --- youtube_dl/extractor/extractors.py | 7 +- youtube_dl/extractor/fourtube.py | 174 +++++++++++++++++++++++++++++-------- 2 files changed, 146 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f1a9f6e..fb79a17 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -350,7 +350,12 @@ from .flipagram import FlipagramIE from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE -from .fourtube import FourTubeIE +from .fourtube import ( + FourTubeIE, + PornTubeIE, + PornerBrosIE, + FuxIE, +) from .fox import FOXIE from .fox9 import FOX9IE from .foxgay import FoxgayIE diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index e3fd08b..ad273a0 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -3,39 +3,22 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( parse_duration, parse_iso8601, - sanitized_Request, str_to_int, ) -class FourTubeIE(InfoExtractor): - IE_NAME = '4tube' - _VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P\d+)' +class FourTubeBaseIE(InfoExtractor): + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + kind, video_id, display_id = mobj.group('kind', 'id', 'display_id') - _TEST = { - 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', - 'md5': '6516c8ac63b03de06bc8eac14362db4f', - 'info_dict': { - 'id': '209733', - 'ext': 'mp4', - 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', - 'uploader': 'WCP Club', - 'uploader_id': 'wcp-club', - 'upload_date': '20131031', - 'timestamp': 1383263892, - 'duration': 583, - 'view_count': int, - 'like_count': int, - 'categories': list, - 'age_limit': 18, - } - } + if kind == 'm' or not display_id: + url = self._URL_TEMPLATE % video_id - def _real_extract(self, url): - video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_meta('name', webpage) @@ -43,10 +26,10 @@ class FourTubeIE(InfoExtractor): 'uploadDate', webpage)) thumbnail = self._html_search_meta('thumbnailUrl', webpage) uploader_id = self._html_search_regex( - r'', + r'', webpage, 'uploader id', fatal=False) uploader = self._html_search_regex( - r'', + r'', webpage, 'uploader', fatal=False) categories_html = self._search_regex( @@ -60,10 +43,10 @@ class FourTubeIE(InfoExtractor): view_count = str_to_int(self._search_regex( r']+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">', - webpage, 'view count', fatal=False)) + webpage, 'view count', default=None)) like_count = str_to_int(self._search_regex( r']+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">', - webpage, 'like count', fatal=False)) + webpage, 'like count', default=None)) duration = parse_duration(self._html_search_meta('duration', webpage)) media_id = self._search_regex( @@ -87,12 +70,12 @@ class FourTubeIE(InfoExtractor): token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format( media_id, '+'.join(sources)) - headers = { - b'Content-Type': b'application/x-www-form-urlencoded', - b'Origin': b'https://www.4tube.com', - } - token_req = sanitized_Request(token_url, b'{}', headers) - tokens = self._download_json(token_req, video_id) + + parsed_url = compat_urlparse.urlparse(url) + tokens = self._download_json(token_url, video_id, data=b'', headers={ + 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname), + 'Referer': url, + }) formats = [{ 'url': tokens[format]['token'], 'format_id': format + 'p', @@ -115,3 +98,126 @@ class FourTubeIE(InfoExtractor): 'duration': duration, 'age_limit': 18, } + + +class FourTubeIE(FourTubeBaseIE): + IE_NAME = '4tube' + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?4tube\.com/(?:videos|embed)/(?P\d+)(?:/(?P[^/?#&]+))?' + _URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video' + _TESTS = [{ + 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', + 'md5': '6516c8ac63b03de06bc8eac14362db4f', + 'info_dict': { + 'id': '209733', + 'ext': 'mp4', + 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', + 'uploader': 'WCP Club', + 'uploader_id': 'wcp-club', + 'upload_date': '20131031', + 'timestamp': 1383263892, + 'duration': 583, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + }, + }, { + 'url': 'http://www.4tube.com/embed/209733', + 'only_matching': True, + }, { + 'url': 'http://m.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', + 'only_matching': True, + }] + + +class FuxIE(FourTubeBaseIE): + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?fux\.com/(?:video|embed)/(?P\d+)(?:/(?P[^/?#&]+))?' + _URL_TEMPLATE = 'https://www.fux.com/video/%s/video' + _TESTS = [{ + 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow', + 'info_dict': { + 'id': '195359', + 'ext': 'mp4', + 'title': 'Awesome fucking in the kitchen ends with cum swallow', + 'uploader': 'alenci2342', + 'uploader_id': 'alenci2342', + 'upload_date': '20131230', + 'timestamp': 1388361660, + 'duration': 289, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.fux.com/embed/195359', + 'only_matching': True, + }, { + 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow', + 'only_matching': True, + }] + + +class PornTubeIE(FourTubeBaseIE): + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?porntube\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' + _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s' + _TESTS = [{ + 'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759', + 'info_dict': { + 'id': '7089759', + 'ext': 'mp4', + 'title': 'Teen couple doing anal', + 'uploader': 'Alexy', + 'uploader_id': 'Alexy', + 'upload_date': '20150606', + 'timestamp': 1433595647, + 'duration': 5052, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.porntube.com/embed/7089759', + 'only_matching': True, + }, { + 'url': 'https://m.porntube.com/videos/teen-couple-doing-anal_7089759', + 'only_matching': True, + }] + + +class PornerBrosIE(FourTubeBaseIE): + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?pornerbros\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' + _URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s' + _TESTS = [{ + 'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369', + 'md5': '6516c8ac63b03de06bc8eac14362db4f', + 'info_dict': { + 'id': '181369', + 'ext': 'mp4', + 'title': 'Skinny brunette takes big cock down her anal hole', + 'uploader': 'PornerBros HD', + 'uploader_id': 'pornerbros-hd', + 'upload_date': '20130130', + 'timestamp': 1359527401, + 'duration': 1224, + 'view_count': int, + 'categories': list, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.pornerbros.com/embed/181369', + 'only_matching': True, + }, { + 'url': 'https://m.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369', + 'only_matching': True, + }] -- cgit v1.1 From 475bcb225f6046e38b47594c504da6ec15bac113 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Aug 2017 07:53:02 +0700 Subject: [pornhub:playlistbase] Skip videos from drop-down menu for all playlists (closes #12819, closes #13902) --- youtube_dl/extractor/pornhub.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index e032817..f6777cd 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -227,13 +227,20 @@ class PornHubIE(InfoExtractor): class PornHubPlaylistBaseIE(InfoExtractor): def _extract_entries(self, webpage): + # Only process container div with main playlist content skipping + # drop-down menu that uses similar pattern for videos (see + # https://github.com/rg3/youtube-dl/issues/11594). + container = self._search_regex( + r'(?s)(]+class=["\']container.+)', webpage, + 'container', default=webpage) + return [ self.url_result( 'http://www.pornhub.com/%s' % video_url, PornHubIE.ie_key(), video_title=title) for video_url, title in orderedSet(re.findall( r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', - webpage)) + container)) ] def _real_extract(self, url): @@ -241,14 +248,7 @@ class PornHubPlaylistBaseIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - # Only process container div with main playlist content skipping - # drop-down menu that uses similar pattern for videos (see - # https://github.com/rg3/youtube-dl/issues/11594). - container = self._search_regex( - r'(?s)(]+class=["\']container.+)', webpage, - 'container', default=webpage) - - entries = self._extract_entries(container) + entries = self._extract_entries(webpage) playlist = self._parse_json( self._search_regex( -- cgit v1.1 From 4f049e4aa866aef89d0f4b735fd89eb2ba84e809 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Aug 2017 08:00:15 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ChangeLog b/ChangeLog index b28ade4..daa3601 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +version + +Core +* [YoutubeDL] Make sure format id is not empty +* [extractor/common] Make _family_friendly_search optional +* [extractor/common] Respect source's type attribute for HTML5 media (#13892) + +Extractors +* [pornhub:playlistbase] Skip videos from drop-down menu (#12819, #13902) ++ [fourtube] Add support pornerbros.com (#6022) ++ [fourtube] Add support porntube.com (#7859, #13901) ++ [fourtube] Add support fux.com +* [limelight] Improve embeds detection (#13895) ++ [reddit] Add support for v.redd.it and reddit.com (#13847) +* [aparat] Extract all formats (#13887) +* [mixcloud] Fix play info decryption (#13885) ++ [generic] Add support for vzaar embeds (#13876) + + version 2017.08.09 Core -- cgit v1.1 From 16393d65355cdb1118e528d6dcb6d82f5f1c2b6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Aug 2017 08:58:30 +0700 Subject: release 2017.08.13 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 5 +++++ youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7ee704e..3bd61e0 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.09** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.13*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.13** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.09 +[debug] youtube-dl version 2017.08.13 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index daa3601..6bafb1e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.08.13 Core * [YoutubeDL] Make sure format id is not empty diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a3bd077..cc44274 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -294,6 +294,7 @@ - **Funimation** - **FunnyOrDie** - **Fusion** + - **Fux** - **FXNetworks** - **GameInformer** - **GameOne** @@ -621,6 +622,7 @@ - **PolskieRadio** - **PolskieRadioCategory** - **PornCom** + - **PornerBros** - **PornFlip** - **PornHd** - **PornHub**: PornHub and Thumbzilla @@ -629,6 +631,7 @@ - **Pornotube** - **PornoVoisines** - **PornoXO** + - **PornTube** - **PressTV** - **PrimeShareTV** - **PromptFile** @@ -654,6 +657,8 @@ - **RBMARadio** - **RDS**: RDS.ca - **RedBullTV** + - **Reddit** + - **RedditR** - **RedTube** - **RegioTV** - **RENTV** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0221723..da855a6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.09' +__version__ = '2017.08.13' -- cgit v1.1 From da20951a57bddd4a0102cd776ff93a2adc6db77d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Aug 2017 22:39:05 +0700 Subject: [mixcloud] Extract decrypt key --- youtube_dl/extractor/mixcloud.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 52f7428..fcf7bee 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -9,6 +9,7 @@ from .common import InfoExtractor from ..compat import ( compat_chr, compat_ord, + compat_str, compat_urllib_parse_unquote, compat_urlparse, ) @@ -53,15 +54,18 @@ class MixcloudIE(InfoExtractor): 'only_matching': True, }] + _keys = [ + 'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };', + 'pleasedontdownloadourmusictheartistswontgetpaid', + 'window.addEventListener = window.addEventListener || function() {};', + '(function() { return new Date().toLocaleDateString(); })()' + ] + _current_key = None + # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js def _decrypt_play_info(self, play_info, video_id): - KEYS = ( - 'pleasedontdownloadourmusictheartistswontgetpaid', - 'window.addEventListener = window.addEventListener || function() {};', - '(function() { return new Date().toLocaleDateString(); })()', - ) play_info = base64.b64decode(play_info.encode('ascii')) - for num, key in enumerate(KEYS, start=1): + for num, key in enumerate(self._keys, start=1): try: return self._parse_json( ''.join([ @@ -69,7 +73,7 @@ class MixcloudIE(InfoExtractor): for idx, ch in enumerate(play_info)]), video_id) except ExtractorError: - if num == len(KEYS): + if num == len(self._keys): raise def _real_extract(self, url): @@ -80,6 +84,20 @@ class MixcloudIE(InfoExtractor): webpage = self._download_webpage(url, track_id) + if not self._current_key: + js_url = self._search_regex( + r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)', + webpage, 'js url', default=None) + if js_url: + js = self._download_webpage(js_url, track_id, fatal=False) + if js: + key = self._search_regex( + r'player\s*:\s*{.*?\bvalue\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + js, 'key', default=None, group='key') + if key and isinstance(key, compat_str): + self._keys.insert(0, key) + self._current_key = key + message = self._html_search_regex( r'(?s)]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', webpage, 'error message', default=None) -- cgit v1.1 From 19ada898dc80a04ae1a2590c8886c9ec13958b03 Mon Sep 17 00:00:00 2001 From: forDream Date: Wed, 2 Aug 2017 11:12:17 +0800 Subject: fix QQ Music Url changed --- youtube_dl/extractor/qqmusic.py | 43 +++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 17c27da..6bff6ba 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -2,8 +2,8 @@ from __future__ import unicode_literals import random -import time import re +import time from .common import InfoExtractor from ..utils import ( @@ -18,9 +18,9 @@ from ..utils import ( class QQMusicIE(InfoExtractor): IE_NAME = 'qqmusic' IE_DESC = 'QQ音乐' - _VALID_URL = r'https?://y\.qq\.com/#type=song&mid=(?P[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P[0-9A-Za-z]+)\.html' _TESTS = [{ - 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', + 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html', 'md5': '9ce1c1c8445f561506d2e3cfb0255705', 'info_dict': { 'id': '004295Et37taLD', @@ -33,7 +33,7 @@ class QQMusicIE(InfoExtractor): } }, { 'note': 'There is no mp3-320 version of this song.', - 'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV', + 'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html', 'md5': 'fa3926f0c585cda0af8fa4f796482e3e', 'info_dict': { 'id': '004MsGEo3DdNxV', @@ -46,7 +46,7 @@ class QQMusicIE(InfoExtractor): } }, { 'note': 'lyrics not in .lrc format', - 'url': 'http://y.qq.com/#type=song&mid=001JyApY11tIp6', + 'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html', 'info_dict': { 'id': '001JyApY11tIp6', 'ext': 'mp3', @@ -163,7 +163,8 @@ class QQPlaylistBaseIE(InfoExtractor): for item in re.findall(r'class="data"[^<>]*>([^<>]+)[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P[0-9A-Za-z]+)\.html' _TEST = { - 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', + 'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html', 'info_dict': { 'id': '001BLpXF2DyJe2', 'title': '林俊杰', @@ -217,10 +218,10 @@ class QQMusicSingerIE(QQPlaylistBaseIE): class QQMusicAlbumIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:album' IE_DESC = 'QQ音乐 - 专辑' - _VALID_URL = r'https?://y\.qq\.com/#type=album&mid=(?P[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P[0-9A-Za-z]+)\.html' _TESTS = [{ - 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1', + 'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html', 'info_dict': { 'id': '000gXCTb2AhRR1', 'title': '我们都是这样长大的', @@ -228,7 +229,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): }, 'playlist_count': 4, }, { - 'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3', + 'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html', 'info_dict': { 'id': '002Y5a3b3AlCu3', 'title': '그리고...', @@ -246,7 +247,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): entries = [ self.url_result( - 'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + ".html", 'QQMusic', song['songmid'] ) for song in album['list'] ] album_name = album.get('name') @@ -260,17 +261,17 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): class QQMusicToplistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:toplist' IE_DESC = 'QQ音乐 - 排行榜' - _VALID_URL = r'https?://y\.qq\.com/#type=toplist&p=(?P(top|global)_[0-9]+)' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P(top|global)_[0-9]+)\.html' _TESTS = [{ - 'url': 'http://y.qq.com/#type=toplist&p=global_123', + 'url': 'https://y.qq.com/n/yqq/toplist/123.html', 'info_dict': { 'id': 'global_123', 'title': '美国iTunes榜', }, 'playlist_count': 10, }, { - 'url': 'http://y.qq.com/#type=toplist&p=top_3', + 'url': 'https://y.qq.com/n/yqq/toplist/3.html', 'info_dict': { 'id': 'top_3', 'title': '巅峰榜·欧美', @@ -281,7 +282,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): }, 'playlist_count': 100, }, { - 'url': 'http://y.qq.com/#type=toplist&p=global_106', + 'url': 'https://y.qq.com/n/yqq/toplist/106.html', 'info_dict': { 'id': 'global_106', 'title': '韩国Mnet榜', @@ -301,7 +302,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): entries = [ self.url_result( - 'http://y.qq.com/#type=song&mid=' + song['data']['songmid'], 'QQMusic', song['data']['songmid'] + 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + ".html", 'QQMusic', song['data']['songmid'] ) for song in toplist_json['songlist'] ] @@ -314,10 +315,10 @@ class QQMusicToplistIE(QQPlaylistBaseIE): class QQMusicPlaylistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:playlist' IE_DESC = 'QQ音乐 - 歌单' - _VALID_URL = r'https?://y\.qq\.com/#type=taoge&id=(?P[0-9]+)' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P[0-9]+)\.html' _TESTS = [{ - 'url': 'http://y.qq.com/#type=taoge&id=3462654915', + 'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html', 'info_dict': { 'id': '3462654915', 'title': '韩国5月新歌精选下旬', @@ -326,7 +327,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): 'playlist_count': 40, 'skip': 'playlist gone', }, { - 'url': 'http://y.qq.com/#type=taoge&id=1374105607', + 'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html', 'info_dict': { 'id': '1374105607', 'title': '易入人心的华语民谣', @@ -352,7 +353,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): cdlist = list_json['cdlist'][0] entries = [ self.url_result( - 'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + ".html", 'QQMusic', song['songmid'] ) for song in cdlist['songlist'] ] -- cgit v1.1 From 5d1bd3b907d22eab7c47b8b408c07a26dbc358ea Mon Sep 17 00:00:00 2001 From: forDream Date: Wed, 2 Aug 2017 12:20:53 +0800 Subject: [qqmusic]update valid url --- youtube_dl/extractor/qqmusic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 6bff6ba..7513acb 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -261,7 +261,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): class QQMusicToplistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:toplist' IE_DESC = 'QQ音乐 - 排行榜' - _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P(top|global)_[0-9]+)\.html' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P[0-9]+)\.html' _TESTS = [{ 'url': 'https://y.qq.com/n/yqq/toplist/123.html', @@ -293,7 +293,9 @@ class QQMusicToplistIE(QQPlaylistBaseIE): def _real_extract(self, url): list_id = self._match_id(url) - list_type, num_id = list_id.split("_") + # list_type, num_id = list_id.split("_") + list_type = "toplist" + num_id = list_id toplist_json = self._download_json( 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json' -- cgit v1.1 From 5c037c0d1f155e951050533690d6e990654cfcc9 Mon Sep 17 00:00:00 2001 From: forDream Date: Wed, 2 Aug 2017 15:08:38 +0800 Subject: [qqmusic]support QQMusicSingerIE --- youtube_dl/extractor/qqmusic.py | 52 +++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 7513acb..42be6bc 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -156,16 +156,27 @@ class QQPlaylistBaseIE(InfoExtractor): def qq_static_url(category, mid): return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) - @classmethod - def get_entries_from_page(cls, page): + def get_singer_all_songs(self, singmid, num): + return self._download_webpage( + r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg?format=json&inCharset=utf8&outCharset=utf-8&platform=yqq&needNewCode=0&singermid=%s&order=listen&begin=0&num=%s&songstatus=1' % + (singmid, num), singmid) + + def get_entries_from_page(self, singmid): entries = [] - for item in re.findall(r'class="data"[^<>]*>([^<>]+) Date: Mon, 14 Aug 2017 08:28:41 +0800 Subject: [qqmusic] review --- youtube_dl/extractor/qqmusic.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 42be6bc..38f4b2c 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -166,15 +166,15 @@ class QQPlaylistBaseIE(InfoExtractor): default_num = 1 json_text = self.get_singer_all_songs(singmid, default_num) - json_obj = self._parse_json(json_text, singmid) + json_obj_all_songs = self._parse_json(json_text, singmid) - if json_obj['code'] == 0: - total = json_obj['data']['total'] + if json_obj_all_songs['code'] == 0: + total = json_obj_all_songs['data']['total'] json_text = self.get_singer_all_songs(singmid, total) - json_obj = self._parse_json(json_text, singmid) + json_obj_all_songs = self._parse_json(json_text, singmid) - for item in json_obj['data']['list']: - if not (item['musicData'].get('songmid') is None): + for item in json_obj_all_songs['data']['list']: + if item['musicData'].get('songmid') is not None: songmid = item['musicData']['songmid'] entries.append(self.url_result(r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid)) @@ -248,7 +248,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): entries = [ self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['songmid'] + ".html", 'QQMusic', song['songmid'] + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'] ) for song in album['list'] ] album_name = album.get('name') @@ -294,8 +294,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): def _real_extract(self, url): list_id = self._match_id(url) - # list_type, num_id = list_id.split("_") - list_type = "toplist" + list_type = 'toplist' num_id = list_id toplist_json = self._download_json( @@ -305,7 +304,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): entries = [ self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + ".html", 'QQMusic', + 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', song['data']['songmid'] ) for song in toplist_json['songlist'] ] @@ -357,7 +356,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): cdlist = list_json['cdlist'][0] entries = [ self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['songmid'] + ".html", 'QQMusic', song['songmid'] + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'] ) for song in cdlist['songlist'] ] -- cgit v1.1 From 485047854376465f95309daad4966971f56728ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 15 Aug 2017 23:58:00 +0700 Subject: [extractor/common] Add support for float durations in _parse_mpd_formats (closes #13919) --- test/test_InfoExtractor.py | 86 ++++++++++++++++++++++++++++++++++++ test/testdata/mpd/float_duration.mpd | 18 ++++++++ youtube_dl/extractor/common.py | 2 +- 3 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 test/testdata/mpd/float_duration.mpd diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 6f52e11..f18a823 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -10,6 +10,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, expect_dict, expect_value +from youtube_dl.compat import compat_etree_fromstring from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import YoutubeIE, get_info_extractor from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError @@ -488,6 +489,91 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + def test_parse_mpd_formats(self): + _TEST_CASES = [ + ( + # https://github.com/rg3/youtube-dl/issues/13919 + 'float_duration', + 'http://unknown/manifest.mpd', + [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '318597', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.42001f', + 'tbr': 318.597, + 'width': 340, + 'height': 192, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '638590', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.42001f', + 'tbr': 638.59, + 'width': 512, + 'height': 288, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '1022565', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.4d001f', + 'tbr': 1022.565, + 'width': 688, + 'height': 384, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '2046506', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.4d001f', + 'tbr': 2046.506, + 'width': 1024, + 'height': 576, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '3998017', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640029', + 'tbr': 3998.017, + 'width': 1280, + 'height': 720, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '5997485', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640032', + 'tbr': 5997.485, + 'width': 1920, + 'height': 1080, + }] + ), + ] + + for mpd_file, mpd_url, expected_formats in _TEST_CASES: + with io.open('./test/testdata/mpd/%s.mpd' % mpd_file, + mode='r', encoding='utf-8') as f: + formats = self.ie._parse_mpd_formats( + compat_etree_fromstring(f.read().encode('utf-8')), + mpd_url=mpd_url) + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/mpd/float_duration.mpd b/test/testdata/mpd/float_duration.mpd new file mode 100644 index 0000000..8dc1d2d --- /dev/null +++ b/test/testdata/mpd/float_duration.mpd @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7fe8884..e747258 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1786,7 +1786,7 @@ class InfoExtractor(object): ms_info['timescale'] = int(timescale) segment_duration = source.get('duration') if segment_duration: - ms_info['segment_duration'] = int(segment_duration) + ms_info['segment_duration'] = float(segment_duration) def extract_Initialization(source): initialization = source.find(_add_ns('Initialization')) -- cgit v1.1 From a1aa6596626a98d068780f092367b87398840c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 16 Aug 2017 23:03:42 +0700 Subject: [periscope] Renew HLS extraction (closes #13917) --- youtube_dl/extractor/periscope.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index bfa12ed..e5e0853 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -80,18 +80,24 @@ class PeriscopeIE(PeriscopeBaseIE): stream = self._call_api( 'getAccessPublic', {'broadcast_id': token}, token) + video_urls = set() formats = [] - for format_id in ('replay', 'rtmp', 'hls', 'https_hls'): + for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'): video_url = stream.get(format_id + '_url') - if not video_url: + if not video_url or video_url in video_urls: continue - f = { + video_urls.add(video_url) + if format_id != 'rtmp': + formats.extend(self._extract_m3u8_formats( + video_url, token, 'mp4', + entry_protocol='m3u8_native' + if state in ('ended', 'timed_out') else 'm3u8', + m3u8_id=format_id, fatal=False)) + continue + formats.append({ 'url': video_url, 'ext': 'flv' if format_id == 'rtmp' else 'mp4', - } - if format_id != 'rtmp': - f['protocol'] = 'm3u8_native' if state in ('ended', 'timed_out') else 'm3u8' - formats.append(f) + }) self._sort_formats(formats) return { -- cgit v1.1 From 25a6e769a1af3a79f439369fb683a1d487777cb9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 17 Aug 2017 16:39:57 +0800 Subject: [qqmusic] Fix tests and cleanup --- youtube_dl/extractor/qqmusic.py | 89 ++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 38f4b2c..62f9860 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -7,11 +7,10 @@ import time from .common import InfoExtractor from ..utils import ( - sanitized_Request, - strip_jsonp, - unescapeHTML, clean_html, ExtractorError, + strip_jsonp, + unescapeHTML, ) @@ -21,14 +20,14 @@ class QQMusicIE(InfoExtractor): _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P[0-9A-Za-z]+)\.html' _TESTS = [{ 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html', - 'md5': '9ce1c1c8445f561506d2e3cfb0255705', + 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8', 'info_dict': { 'id': '004295Et37taLD', 'ext': 'mp3', 'title': '可惜没如果', 'release_date': '20141227', 'creator': '林俊杰', - 'description': 'md5:d327722d0361576fde558f1ac68a7065', + 'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac', 'thumbnail': r're:^https?://.*\.jpg$', } }, { @@ -53,7 +52,7 @@ class QQMusicIE(InfoExtractor): 'title': 'Shadows Over Transylvania', 'release_date': '19970225', 'creator': 'Dark Funeral', - 'description': 'md5:ed14d5bd7ecec19609108052c25b2c11', + 'description': 'md5:c9b20210587cbcd6836a1c597bab4525', 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { @@ -105,7 +104,7 @@ class QQMusicIE(InfoExtractor): [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], detail_info_page, 'album mid', default=None) if albummid: - thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \ + thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \ % (albummid[-2:-1], albummid[-1], albummid) guid = self.m_r_get_ruin() @@ -158,8 +157,19 @@ class QQPlaylistBaseIE(InfoExtractor): def get_singer_all_songs(self, singmid, num): return self._download_webpage( - r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg?format=json&inCharset=utf8&outCharset=utf-8&platform=yqq&needNewCode=0&singermid=%s&order=listen&begin=0&num=%s&songstatus=1' % - (singmid, num), singmid) + r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid, + query={ + 'format': 'json', + 'inCharset': 'utf8', + 'outCharset': 'utf-8', + 'platform': 'yqq', + 'needNewCode': 0, + 'singermid': singmid, + 'order': 'listen', + 'begin': 0, + 'num': num, + 'songstatus': 1, + }) def get_entries_from_page(self, singmid): entries = [] @@ -176,7 +186,8 @@ class QQPlaylistBaseIE(InfoExtractor): for item in json_obj_all_songs['data']['list']: if item['musicData'].get('songmid') is not None: songmid = item['musicData']['songmid'] - entries.append(self.url_result(r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid)) + entries.append(self.url_result( + r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid)) return entries @@ -192,7 +203,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE): 'title': '林俊杰', 'description': 'md5:870ec08f7d8547c29c93010899103751', }, - 'playlist_count': 12, + 'playlist_mincount': 12, } def _real_extract(self, url): @@ -200,16 +211,16 @@ class QQMusicSingerIE(QQPlaylistBaseIE): entries = self.get_entries_from_page(mid) singer_page = self._download_webpage(url, mid, 'Download singer page') - singer_name = self._html_search_regex(r"singername : '(.*?)'", singer_page, 'singer name', default=None) + singer_name = self._html_search_regex( + r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None) singer_desc = None if mid: - req = sanitized_Request( - 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singermid=%s' % mid) - req.add_header( - 'Referer', 'https://y.qq.com/n/yqq/singer/') singer_desc_page = self._download_xml( - req, mid, 'Donwload singer description XML') + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid, + 'Donwload singer description XML', + query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid}, + headers={'Referer': 'https://y.qq.com/n/yqq/singer/'}) singer_desc = singer_desc_page.find('./data/info/desc').text @@ -267,26 +278,25 @@ class QQMusicToplistIE(QQPlaylistBaseIE): _TESTS = [{ 'url': 'https://y.qq.com/n/yqq/toplist/123.html', 'info_dict': { - 'id': 'global_123', + 'id': '123', 'title': '美国iTunes榜', + 'description': 'md5:89db2335fdbb10678dee2d43fe9aba08', }, - 'playlist_count': 10, + 'playlist_count': 100, }, { 'url': 'https://y.qq.com/n/yqq/toplist/3.html', 'info_dict': { - 'id': 'top_3', + 'id': '3', 'title': '巅峰榜·欧美', - 'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成,集结当下最流行的欧美新歌!:更新时间:每周四22点|统' - '计周期:一周(上周四至本周三)|统计对象:三个月内发行的欧美歌曲|统计数量:100首|统计算法:根据' - '歌曲在一周内的有效播放次数,由高到低取前100名(同一歌手最多允许5首歌曲同时上榜)|有效播放次数:' - '登录用户完整播放一首歌曲,记为一次有效播放;同一用户收听同一首歌曲,每天记录为1次有效播放' + 'description': 'md5:5a600d42c01696b26b71f8c4d43407da', }, 'playlist_count': 100, }, { 'url': 'https://y.qq.com/n/yqq/toplist/106.html', 'info_dict': { - 'id': 'global_106', + 'id': '106', 'title': '韩国Mnet榜', + 'description': 'md5:cb84b325215e1d21708c615cac82a6e7', }, 'playlist_count': 50, }] @@ -298,16 +308,14 @@ class QQMusicToplistIE(QQPlaylistBaseIE): num_id = list_id toplist_json = self._download_json( - 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json' - % (list_type, num_id), - list_id, 'Download toplist page') + 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id, + note='Download toplist page', + query={'type': 'toplist', 'topid': list_id, 'format': 'json'}) - entries = [ - self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', - song['data']['songmid'] - ) for song in toplist_json['songlist'] - ] + entries = [self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', + song['data']['songmid']) + for song in toplist_json['songlist']] topinfo = toplist_json.get('topinfo', {}) list_name = topinfo.get('ListName') @@ -343,8 +351,9 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): list_id = self._match_id(url) list_json = self._download_json( - 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s' - % list_id, list_id, 'Download list page', + 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg', + list_id, 'Download list page', + query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id}, transform_source=strip_jsonp) if not len(list_json.get('cdlist', [])): if list_json.get('code'): @@ -354,11 +363,9 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): raise ExtractorError('Unable to get playlist info') cdlist = list_json['cdlist'][0] - entries = [ - self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'] - ) for song in cdlist['songlist'] - ] + entries = [self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']) + for song in cdlist['songlist']] list_name = cdlist.get('dissname') list_description = clean_html(unescapeHTML(cdlist.get('desc'))) -- cgit v1.1 From 12f5304556343fafb6a38ad5b4d5ef9fc908f15c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 17 Aug 2017 16:40:56 +0800 Subject: [ChangeLog] Add entry for #13805 --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 6bafb1e..5b89773 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +version + +Extractors + ++ [qqmusic] Support new URL schemes (#13805) + + version 2017.08.13 Core -- cgit v1.1 From bfabd17b33d47f1e973121483623768010880845 Mon Sep 17 00:00:00 2001 From: Genki Sky Date: Tue, 8 Aug 2017 22:49:57 -0400 Subject: Add new extractor --- youtube_dl/extractor/clippit.py | 74 ++++++++++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 75 insertions(+) create mode 100644 youtube_dl/extractor/clippit.py diff --git a/youtube_dl/extractor/clippit.py b/youtube_dl/extractor/clippit.py new file mode 100644 index 0000000..a1a7a77 --- /dev/null +++ b/youtube_dl/extractor/clippit.py @@ -0,0 +1,74 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + qualities, +) + +import re + + +class ClippitIE(InfoExtractor): + + _VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P[a-z]+)' + _TEST = { + 'url': 'https://www.clippituser.tv/c/evmgm', + 'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09', + 'info_dict': { + 'id': 'evmgm', + 'ext': 'mp4', + 'title': 'Bye bye Brutus. #BattleBots - Clippit', + 'uploader': 'lizllove', + 'uploader_url': 'https://www.clippituser.tv/p/lizllove', + 'timestamp': 1472183818, + 'upload_date': '20160826', + 'description': 'BattleBots | ABC', + 'thumbnail': r're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'(.+?)', webpage, 'title') + + FORMATS = ('sd', 'hd') + quality = qualities(FORMATS) + formats = [] + for format_id in FORMATS: + url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id, + webpage, 'url', fatal=False) + if not url: + continue + match = re.search(r'/(?P\d+)\.mp4', url) + formats.append({ + 'url': url, + 'format_id': format_id, + 'quality': quality(format_id), + 'height': int(match.group('height')) if match else None, + }) + + uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n', + webpage, 'uploader', fatal=False) + uploader_url = ('https://www.clippituser.tv/p/' + uploader + if uploader else None) + + timestamp = self._html_search_regex(r'datetime="(.+?)"', + webpage, 'date', fatal=False) + thumbnail = self._html_search_regex(r'data-image="(.+?)"', + webpage, 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'uploader': uploader, + 'uploader_url': uploader_url, + 'timestamp': parse_iso8601(timestamp), + 'description': self._og_search_description(webpage), + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fb79a17..ccfa14e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -187,6 +187,7 @@ from .chirbit import ( from .cinchcast import CinchcastIE from .cjsw import CJSWIE from .cliphunter import CliphunterIE +from .clippit import ClippitIE from .cliprs import ClipRsIE from .clipsyndicate import ClipsyndicateIE from .closertotruth import CloserToTruthIE -- cgit v1.1 From 7ddab7742cad2ff04ec087e3e1d19422c931782b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 17 Aug 2017 16:56:37 +0800 Subject: [ChangeLog] Add an entry for Genki Sky's patch --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 5b89773..cf7d1be 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,7 @@ version Extractors - ++ [clippit] Add support for clippituser.tv + [qqmusic] Support new URL schemes (#13805) -- cgit v1.1 From 5d28169747e34850fcb53760c77eccb7f3195ef2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 17 Aug 2017 21:21:17 +0800 Subject: Credit Genki Sky for clippit (bfabd17b33d) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 053159c..478c787 100644 --- a/AUTHORS +++ b/AUTHORS @@ -223,3 +223,4 @@ Jan Kundrát Giuseppe Fabiano Örn Guðjónsson Parmjit Virk +Genki Sky -- cgit v1.1 From 93d0583e34b0cd826f081a766b00381bb5fed52d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 17 Aug 2017 22:45:40 +0700 Subject: [pluralsight] Use RPC API for course extraction (closes #13937) --- youtube_dl/extractor/pluralsight.py | 52 +++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index d35f54c..f6a9131 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -18,6 +18,7 @@ from ..utils import ( parse_duration, qualities, srt_subtitles_timecode, + try_get, update_url_query, urlencode_postdata, ) @@ -26,6 +27,39 @@ from ..utils import ( class PluralsightBaseIE(InfoExtractor): _API_BASE = 'https://app.pluralsight.com' + def _download_course(self, course_id, url, display_id): + try: + return self._download_course_rpc(course_id, url, display_id) + except ExtractorError: + # Old API fallback + return self._download_json( + 'https://app.pluralsight.com/player/user/api/v1/player/payload', + display_id, data=urlencode_postdata({'courseId': course_id}), + headers={'Referer': url}) + + def _download_course_rpc(self, course_id, url, display_id): + response = self._download_json( + '%s/player/functions/rpc' % self._API_BASE, display_id, + 'Downloading course JSON', + data=json.dumps({ + 'fn': 'bootstrapPlayer', + 'payload': { + 'courseId': course_id, + }, + }).encode('utf-8'), + headers={ + 'Content-Type': 'application/json;charset=utf-8', + 'Referer': url, + }) + + course = try_get(response, lambda x: x['payload']['course'], dict) + if course: + return course + + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, response['error']['message']), + expected=True) + class PluralsightIE(PluralsightBaseIE): IE_NAME = 'pluralsight' @@ -162,10 +196,7 @@ class PluralsightIE(PluralsightBaseIE): display_id = '%s-%s' % (name, clip_id) - course = self._download_json( - 'https://app.pluralsight.com/player/user/api/v1/player/payload', - display_id, data=urlencode_postdata({'courseId': course_name}), - headers={'Referer': url}) + course = self._download_course(course_name, url, display_id) collection = course['modules'] @@ -331,18 +362,7 @@ class PluralsightCourseIE(PluralsightBaseIE): # TODO: PSM cookie - course = self._download_json( - '%s/player/functions/rpc' % self._API_BASE, course_id, - 'Downloading course JSON', - data=json.dumps({ - 'fn': 'bootstrapPlayer', - 'payload': { - 'courseId': course_id, - } - }).encode('utf-8'), - headers={ - 'Content-Type': 'application/json;charset=utf-8' - })['payload']['course'] + course = self._download_course(course_id, url, course_id) title = course['title'] course_name = course['name'] -- cgit v1.1 From 5f5c7b92dda1da6a0f15af7e3999a6ff298a8c92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 17 Aug 2017 23:14:46 +0700 Subject: [udemy] Fix paid course detection (#13943) --- youtube_dl/extractor/udemy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 3b02f43..207c4a6 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -74,7 +74,7 @@ class UdemyIE(InfoExtractor): return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url checkout_url = unescapeHTML(self._search_regex( - r'href=(["\'])(?P(?:https?://(?:www\.)?udemy\.com)?/payment/checkout/.+?)\1', + r'href=(["\'])(?P(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1', webpage, 'checkout url', group='url', default=None)) if checkout_url: raise ExtractorError( -- cgit v1.1 From 5551d7714d53caaaae32cdedad11a0bdc95efcf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 17 Aug 2017 23:57:48 +0700 Subject: [generic] Convert redirect URLs to unicode strings (closes #13951) --- youtube_dl/extractor/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index eff5fbf..d2fb262 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2015,7 +2015,7 @@ class GenericIE(InfoExtractor): if head_response is not False: # Check for redirect - new_url = head_response.geturl() + new_url = compat_str(head_response.geturl()) if url != new_url: self.report_following_redirect(new_url) if force_videoid: @@ -2116,7 +2116,7 @@ class GenericIE(InfoExtractor): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( doc, video_id, - mpd_base_url=full_response.geturl().rpartition('/')[0], + mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0], mpd_url=url) self._sort_formats(info_dict['formats']) return info_dict -- cgit v1.1 From a5ac0c475589fd1dcd3ba04802f28828c24be6c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 17 Aug 2017 23:59:12 +0700 Subject: [YoutubeDL] Sanitize byte string format URLs (#13951) --- youtube_dl/YoutubeDL.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index df7378f..5f4c93e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1483,12 +1483,14 @@ class YoutubeDL(object): def is_wellformed(f): url = f.get('url') - valid_url = url and isinstance(url, compat_str) - if not valid_url: + if not url: self.report_warning( '"url" field is missing or empty - skipping format, ' 'there is an error in extractor') - return valid_url + return False + if isinstance(url, bytes): + sanitize_string_field(f, 'url') + return True # Filter out malformed formats for better extraction robustness formats = list(filter(is_wellformed, formats)) -- cgit v1.1 From c0892b2b465cff95d392eaa725e39bd47e4dff58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Aug 2017 00:58:23 +0700 Subject: [arte] Detect unavailable videos (closes #13945) --- youtube_dl/extractor/arte.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 56baef2..02613cf 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -9,12 +9,13 @@ from ..compat import ( compat_urllib_parse_urlparse, ) from ..utils import ( + ExtractorError, find_xpath_attr, - unified_strdate, get_element_by_attribute, int_or_none, NO_DEFAULT, qualities, + unified_strdate, ) # There are different sources of video in arte.tv, the extraction process @@ -79,6 +80,13 @@ class ArteTVBaseIE(InfoExtractor): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] + vsr = player_info['VSR'] + + if not vsr and not player_info.get('VRU'): + raise ExtractorError( + 'Video %s is not available' % player_info.get('VID') or video_id, + expected=True) + upload_date_str = player_info.get('shootingDate') if not upload_date_str: upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0] @@ -107,7 +115,7 @@ class ArteTVBaseIE(InfoExtractor): langcode = LANGS.get(lang, lang) formats = [] - for format_id, format_dict in player_info['VSR'].items(): + for format_id, format_dict in vsr.items(): f = dict(format_dict) versionCode = f.get('versionCode') l = re.escape(langcode) -- cgit v1.1 From 4a919103651905d4e5954c5d655b45055384e283 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Aug 2017 01:00:07 +0700 Subject: [qqmusic:toplist] PEP 8 --- youtube_dl/extractor/qqmusic.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 62f9860..084308a 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -304,9 +304,6 @@ class QQMusicToplistIE(QQPlaylistBaseIE): def _real_extract(self, url): list_id = self._match_id(url) - list_type = 'toplist' - num_id = list_id - toplist_json = self._download_json( 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id, note='Download toplist page', -- cgit v1.1 From 2738965d98e1883a781a1e9743de0af086c5acd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Aug 2017 01:03:20 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ChangeLog b/ChangeLog index cf7d1be..298e0b0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,19 @@ version +Core +* [YoutubeDL] Sanitize byte string format URLs (#13951) ++ [extractor/common] Add support for float durations in _parse_mpd_formats + (#13919) + Extractors +* [arte] Detect unavailable videos (#13945) +* [generic] Convert redirect URLs to unicode strings (#13951) +* [udemy] Fix paid course detection (#13943) +* [pluralsight] Use RPC API for course extraction (#13937) + [clippit] Add support for clippituser.tv + [qqmusic] Support new URL schemes (#13805) +* [periscope] Renew HLS extraction (#13917) +* [mixcloud] Extract decrypt key version 2017.08.13 -- cgit v1.1 From ea004d34f83fd7dd9a00fc3e2deb5a101aff6ea7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Aug 2017 01:05:27 +0700 Subject: release 2017.08.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3bd61e0..66dd4c4 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.13*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.13** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.13 +[debug] youtube-dl version 2017.08.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 298e0b0..9a0fad6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.08.18 Core * [YoutubeDL] Sanitize byte string format URLs (#13951) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index cc44274..1991975 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -156,6 +156,7 @@ - **Cinchcast** - **CJSW** - **cliphunter** + - **Clippit** - **ClipRs** - **Clipsyndicate** - **CloserToTruth** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index da855a6..4358cd3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.13' +__version__ = '2017.08.18' -- cgit v1.1 From d14d9d8903a532e346dffc3b83730045f18f2c28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Aug 2017 23:31:42 +0700 Subject: [mixcloud] Fix extraction (closes #13958) --- youtube_dl/extractor/mixcloud.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index fcf7bee..798968a 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -91,12 +91,14 @@ class MixcloudIE(InfoExtractor): if js_url: js = self._download_webpage(js_url, track_id, fatal=False) if js: - key = self._search_regex( - r'player\s*:\s*{.*?\bvalue\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', - js, 'key', default=None, group='key') - if key and isinstance(key, compat_str): - self._keys.insert(0, key) - self._current_key = key + KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' + for key_name in ('value', 'key_value'): + key = self._search_regex( + KEY_RE_TEMPLATE % key_name, js, 'key', + default=None, group='key') + if key and isinstance(key, compat_str): + self._keys.insert(0, key) + self._current_key = key message = self._html_search_regex( r'(?s)]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', -- cgit v1.1 From f5469da9e6e259c1690c7ef54f1da1c19f65036f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Aug 2017 19:48:20 +0700 Subject: [laola1tv] Add support for tv.ittf.com (closes #13965) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/laola1tv.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ccfa14e..bda6826 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -509,6 +509,7 @@ from .la7 import LA7IE from .laola1tv import ( Laola1TvEmbedIE, Laola1TvIE, + ITTFIE, ) from .lci import LCIIE from .lcp import ( diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index 1f91ba0..c7f8133 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -215,3 +215,21 @@ class Laola1TvIE(Laola1TvEmbedIE): 'formats': formats, 'is_live': is_live, } + + +class ITTFIE(InfoExtractor): + _VALID_URL = r'https?://tv\.ittf\.com/video/[^/]+/(?P\d+)' + _TEST = { + 'url': 'https://tv.ittf.com/video/peng-wang-wei-matsudaira-kenta/951802', + 'only_matching': True, + } + + def _real_extract(self, url): + return self.url_result( + update_url_query('https://www.laola1.tv/titanplayer.php', { + 'videoid': self._match_id(url), + 'type': 'V', + 'lang': 'en', + 'portal': 'int', + 'customer': 1024, + }), Laola1TvEmbedIE.ie_key()) -- cgit v1.1 From 95f3f7c20a05e7ac490e768b8470b20538ef8581 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 19 Aug 2017 21:40:53 +0800 Subject: [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) --- ChangeLog | 6 ++++++ test/test_utils.py | 1 + youtube_dl/utils.py | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 9a0fad6..9eab4d1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) + + version 2017.08.18 Core diff --git a/test/test_utils.py b/test/test_utils.py index 2aab16b..e50f376 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -279,6 +279,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescapeHTML('�'), '�') + self.assertEqual(unescapeHTML('&a"'), '&a"') # HTML5 entities self.assertEqual(unescapeHTML('.''), '.\'') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c9cbd58..2554a2a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -596,7 +596,7 @@ def unescapeHTML(s): assert type(s) == compat_str return re.sub( - r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) + r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) def get_subprocess_encoding(): -- cgit v1.1 From f8f18f332f235bcfa2f8fc161887e0eef283fec0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 19 Aug 2017 21:44:47 +0800 Subject: [cda] Fix extraction (closes #13935) --- ChangeLog | 1 + youtube_dl/extractor/cda.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 9eab4d1..6c32747 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core +* [cda] Fix extraction (closes #13935) * [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 78b7a92..0c3af23 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -124,7 +124,7 @@ class CDAIE(InfoExtractor): } def extract_format(page, version): - json_str = self._search_regex( + json_str = self._html_search_regex( r'player_data=(\\?["\'])(?P.+?)\1', page, '%s player_json' % version, fatal=False, group='player_data') if not json_str: -- cgit v1.1 From 09747ba7663a9c6f89530c7ffbd95cb4776db6bf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 19 Aug 2017 22:27:53 +0800 Subject: [liveleak] Support another liveleak embedding pattern (closes #13336) --- ChangeLog | 3 +++ youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/generic.py | 23 ++++++++++++++++++----- youtube_dl/extractor/liveleak.py | 35 ++++++++++++++++++++++++++++++----- 4 files changed, 55 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6c32747..4c7997b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,9 @@ Core * [cda] Fix extraction (closes #13935) * [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) +Extractors ++ [liveleak] Support another liveleak embedding pattern (#13336) + version 2017.08.18 diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bda6826..17048fd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -537,7 +537,10 @@ from .limelight import ( LimelightChannelListIE, ) from .litv import LiTVIE -from .liveleak import LiveLeakIE +from .liveleak import ( + LiveLeakIE, + LiveLeakEmbedIE, +) from .livestream import ( LivestreamIE, LivestreamOriginalIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d2fb262..49b00b8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1519,14 +1519,27 @@ class GenericIE(InfoExtractor): # LiveLeak embed { 'url': 'http://www.wykop.pl/link/3088787/', - 'md5': 'ace83b9ed19b21f68e1b50e844fdf95d', + 'md5': '7619da8c820e835bef21a1efa2a0fc71', 'info_dict': { 'id': '874_1459135191', 'ext': 'mp4', 'title': 'Man shows poor quality of new apartment building', 'description': 'The wall is like a sand pile.', 'uploader': 'Lake8737', - } + }, + 'add_ie': [LiveLeakIE.ie_key()], + }, + # Another LiveLeak embed pattern (#13336) + { + 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/', + 'info_dict': { + 'id': '2eb_1496309988', + 'ext': 'mp4', + 'title': 'Thief robs place where everyone was armed', + 'description': 'md5:694d73ee79e535953cf2488562288eee', + 'uploader': 'brazilwtf', + }, + 'add_ie': [LiveLeakIE.ie_key()], }, # Duplicated embedded video URLs { @@ -2757,9 +2770,9 @@ class GenericIE(InfoExtractor): self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key()) # Look for LiveLeak embeds - liveleak_url = LiveLeakIE._extract_url(webpage) - if liveleak_url: - return self.url_result(liveleak_url, 'LiveLeak') + liveleak_urls = LiveLeakIE._extract_urls(webpage) + if liveleak_urls: + return self.playlist_from_matches(liveleak_urls, video_id, video_title) # Look for 3Q SDN embeds threeqsdn_url = ThreeQSDNIE._extract_url(webpage) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index b2247a8..d23eaa3 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -75,12 +75,10 @@ class LiveLeakIE(InfoExtractor): }] @staticmethod - def _extract_url(webpage): - mobj = re.search( - r']+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P[\w_]+)(?:.*)', + def _extract_urls(webpage): + return re.findall( + r']+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"', webpage) - if mobj: - return 'http://www.liveleak.com/view?i=%s' % mobj.group('id') def _real_extract(self, url): video_id = self._match_id(url) @@ -131,3 +129,30 @@ class LiveLeakIE(InfoExtractor): }) return info_dict + + +class LiveLeakEmbedIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P[if])=(?P[\w_]+)' + + # See generic.py for actual test cases + _TESTS = [{ + 'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191', + 'only_matching': True, + }, { + 'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + kind, video_id = mobj.group('kind', 'id') + + if kind == 'f': + webpage = self._download_webpage(url, video_id) + liveleak_url = self._search_regex( + r'logourl\s*:\s*(?P[\'"])(?P%s)(?P=q1)' % LiveLeakIE._VALID_URL, + webpage, 'LiveLeak URL', group='url') + elif kind == 'i': + liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id + + return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key()) -- cgit v1.1 From e2481b9b6e621e43fd77e395fd2283ce262b71f3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 19 Aug 2017 22:28:58 +0800 Subject: [ChangeLog] Fix --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 4c7997b..320609a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,11 +1,11 @@ version Core -* [cda] Fix extraction (closes #13935) * [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) Extractors + [liveleak] Support another liveleak embedding pattern (#13336) +* [cda] Fix extraction (#13935) version 2017.08.18 -- cgit v1.1 From 381ad4f30998443fabc4c8633caa548685f49c6b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 19 Aug 2017 22:48:00 +0800 Subject: [liveleak] Support multi-video pages (closes #6542) --- ChangeLog | 1 + youtube_dl/extractor/liveleak.py | 43 +++++++++++++++++++++++++--------------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/ChangeLog b/ChangeLog index 320609a..c07cb96 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) Extractors ++ [liveleak] Support multi-video pages (#6542) + [liveleak] Support another liveleak embedding pattern (#13336) * [cda] Fix extraction (#13935) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index d23eaa3..246aac5 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -72,6 +72,13 @@ class LiveLeakIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.liveleak.com/view?i=677_1439397581', + 'info_dict': { + 'id': '677_1439397581', + 'title': 'Fuel Depot in China Explosion caught on video', + }, + 'playlist_count': 3, }] @staticmethod @@ -109,26 +116,30 @@ class LiveLeakIE(InfoExtractor): 'age_limit': age_limit, } - info_dict = entries[0] + for idx, info_dict in enumerate(entries): + for a_format in info_dict['formats']: + if not a_format.get('height'): + a_format['height'] = int_or_none(self._search_regex( + r'([0-9]+)p\.mp4', a_format['url'], 'height label', + default=None)) - for a_format in info_dict['formats']: - if not a_format.get('height'): - a_format['height'] = int_or_none(self._search_regex( - r'([0-9]+)p\.mp4', a_format['url'], 'height label', - default=None)) + self._sort_formats(info_dict['formats']) - self._sort_formats(info_dict['formats']) + # Don't append entry ID for one-video pages to keep backward compatibility + if len(entries) > 1: + info_dict['id'] = '%s_%s' % (video_id, idx + 1) + else: + info_dict['id'] = video_id - info_dict.update({ - 'id': video_id, - 'title': video_title, - 'description': video_description, - 'uploader': video_uploader, - 'age_limit': age_limit, - 'thumbnail': video_thumbnail, - }) + info_dict.update({ + 'title': video_title, + 'description': video_description, + 'uploader': video_uploader, + 'age_limit': age_limit, + 'thumbnail': video_thumbnail, + }) - return info_dict + return self.playlist_result(entries, video_id, video_title) class LiveLeakEmbedIE(InfoExtractor): -- cgit v1.1 From d3d45e0a451bab2cc36181bb50bf3c129a7a5ec4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Aug 2017 23:54:15 +0700 Subject: [bbccouk] Add support for events URLs (closes #13893) --- youtube_dl/extractor/bbc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 79ded6b..911ae67 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -37,7 +37,8 @@ class BBCCoUkIE(InfoExtractor): programmes/(?!articles/)| iplayer(?:/[^/]+)?/(?:episode/|playlist/)| music/(?:clips|audiovideo/popular)[/#]| - radio/player/ + radio/player/| + events/[^/]+/play/[^/]+/ ) (?P%s)(?!/(?:episodes|broadcasts|clips)) ''' % _ID_REGEX -- cgit v1.1 From 305d99f0bd1effc0e164792199bf93a872da2962 Mon Sep 17 00:00:00 2001 From: "Bernhard M. Wiedemann" Date: Mon, 17 Jul 2017 13:49:09 +0200 Subject: [build] Override timestamps in zip file to make build reproducible. See https://reproducible-builds.org/ for why this is good Copying files to not interfere with freshness detection. --- Makefile | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 84ccce2..41e3a68 100644 --- a/Makefile +++ b/Makefile @@ -46,8 +46,15 @@ tar: youtube-dl.tar.gz pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish youtube-dl: youtube_dl/*.py youtube_dl/*/*.py - zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py - zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py + mkdir -p zip + for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \ + mkdir -p zip/$$d ;\ + cp -a $$d/*.py zip/$$d/ ;\ + done + touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py + mv zip/youtube_dl/__main__.py zip/ + cd zip ; zip --quiet ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py + rm -rf zip echo '#!$(PYTHON)' > youtube-dl cat youtube-dl.zip >> youtube-dl rm youtube-dl.zip -- cgit v1.1 From b359e977b9bdff704cd58f6f3b34185ecbe450e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Aug 2017 14:16:58 +0700 Subject: [extractor/common] Make HLS and DASH extraction non fatal in _parse_html5_media_entries (closes #13970) --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e747258..ceba4ca 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2123,11 +2123,11 @@ class InfoExtractor(object): formats = self._extract_m3u8_formats( full_url, video_id, ext='mp4', entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id, - preference=preference) + preference=preference, fatal=False) elif ext == 'mpd': is_plain_url = False formats = self._extract_mpd_formats( - full_url, video_id, mpd_id=mpd_id) + full_url, video_id, mpd_id=mpd_id, fatal=False) else: is_plain_url = True formats = [{ -- cgit v1.1 From 8239c6791a36813cacc337c5c4a8801d181b8b54 Mon Sep 17 00:00:00 2001 From: Luca Steeb Date: Sun, 20 Aug 2017 09:32:33 -0700 Subject: [bandcamp:album] Extract track titles --- youtube_dl/extractor/bandcamp.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 9ddb9af..be41bd5 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -242,7 +242,12 @@ class BandcampAlbumIE(InfoExtractor): raise ExtractorError('The page doesn\'t contain any tracks') # Only tracks with duration info have songs entries = [ - self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) + self.url_result( + compat_urlparse.urljoin(url, t_path), + ie=BandcampIE.ie_key(), + video_title=self._search_regex( + r']+\bitemprop=["\']name["\'][^>]*>([^<]+)', + elem_content, 'track title', fatal=False)) for elem_content, t_path in track_elements if self._html_search_meta('duration', elem_content, default=None)] -- cgit v1.1 From 903d4d1625f59b6fb359a898fbb512cb2d6181e9 Mon Sep 17 00:00:00 2001 From: Alan Yee Date: Sun, 20 Aug 2017 09:35:39 -0700 Subject: [README.md] Switch to HTTPS URLs --- README.md | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 0067184..6f5d00d 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget: sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl -Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). +Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](https://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). You can also use pip: @@ -33,7 +33,7 @@ You can also use pip: This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information. -OS X users can install youtube-dl with [Homebrew](http://brew.sh/): +OS X users can install youtube-dl with [Homebrew](https://brew.sh/): brew install youtube-dl @@ -458,7 +458,7 @@ You can also use `--config-location` if you want to use custom configuration fil ### Authentication with `.netrc` file -You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you: +You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you: ``` touch $HOME/.netrc chmod a-rwx,u+rw $HOME/.netrc @@ -485,7 +485,7 @@ The `-o` option allows users to indicate a template for the output file names. **tl;dr:** [navigate me to examples](#output-template-examples). -The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are: +The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are: - `id` (string): Video identifier - `title` (string): Video title @@ -603,7 +603,7 @@ $ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext) $ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/ # Download entire series season keeping each series and each season in separate directory under C:/MyVideos -$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617 +$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617 # Stream the video being downloaded to stdout $ youtube-dl -o - BaW_jenozKc @@ -716,17 +716,17 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231 ### How do I update youtube-dl? -If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`). +If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`). If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update. -If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum. +If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to https://yt-dl.org to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum. As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like sudo apt-get remove -y youtube-dl -Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html): +Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html): ``` sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl @@ -766,11 +766,11 @@ Apparently YouTube requires you to pass a CAPTCHA test if you download too much. youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option. -Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](http://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed. +Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed. ### I have downloaded a video but how can I play it? -Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/). +Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/). ### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser. @@ -845,10 +845,10 @@ Use the `-o` to specify an [output template](#output-template), for example `-o ### How do I download a video starting with a `-`? -Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`: +Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`: youtube-dl -- -wNyEUrxzFU - youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU" + youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU" ### How do I pass cookies to youtube-dl? @@ -862,9 +862,9 @@ Passing cookies to youtube-dl is a good way to workaround login when a particula ### How do I stream directly to media player? -You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with: +You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with: - youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc - + youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc - ### How do I download only new videos from a playlist? @@ -884,7 +884,7 @@ When youtube-dl detects an HLS video, it can download it either with the built-i When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg. -In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader. +In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader. If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case. @@ -910,7 +910,7 @@ Feel free to bump the issue from time to time by writing a small comment ("Issue ### How can I detect whether a given URL is supported by youtube-dl? -For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. +For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor. @@ -924,7 +924,7 @@ youtube-dl is an open-source project manned by too few volunteers, so we'd rathe # DEVELOPER INSTRUCTIONS -Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. +Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution. To run youtube-dl as a developer, you don't need to build anything either. Simply execute @@ -972,7 +972,7 @@ After you have ensured this site is distributing its content legally, you can fo class YourExtractorIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' _TEST = { - 'url': 'http://yourextractor.com/watch/42', + 'url': 'https://yourextractor.com/watch/42', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { 'id': '42', @@ -1005,8 +1005,8 @@ After you have ensured this site is distributing its content legally, you can fo 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. -8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. -9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: +8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. +9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/yourextractor.py @@ -1162,7 +1162,7 @@ import youtube_dl ydl_opts = {} with youtube_dl.YoutubeDL(ydl_opts) as ydl: - ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) + ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object. @@ -1201,19 +1201,19 @@ ydl_opts = { 'progress_hooks': [my_hook], } with youtube_dl.YoutubeDL(ydl_opts) as ydl: - ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) + ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` # BUGS -Bugs and suggestions should be reported at: . Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). +Bugs and suggestions should be reported at: . Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: ``` $ youtube-dl -v [debug] System config: [] [debug] User config: [] -[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] +[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] youtube-dl version 2015.12.06 [debug] Git HEAD: 135392e @@ -1244,7 +1244,7 @@ For bug reports, this means that your report should contain the *complete* outpu If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). -**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL. +**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. ### Are you using the latest version? -- cgit v1.1 From 8d9c2a681a1dcf99ab949e79b1b9da17513e11d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 21 Aug 2017 23:06:27 +0700 Subject: [pornhub] Relax uploader regex (closes #13906, closes #13975) --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index f6777cd..3428458 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -186,7 +186,7 @@ class PornHubIE(InfoExtractor): title, thumbnail, duration = [None] * 3 video_uploader = self._html_search_regex( - r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', + r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', webpage, 'uploader', fatal=False) view_count = self._extract_count( -- cgit v1.1 From 05915e379a2406988f752722dfaa815804fb7fb8 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Tue, 22 Aug 2017 11:48:59 -0500 Subject: [googledrive] Add support for subtitles (fixes #13619) --- youtube_dl/extractor/googledrive.py | 104 +++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index c40da85..35edc74 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -7,6 +7,8 @@ from ..utils import ( ExtractorError, int_or_none, lowercase_escape, + error_to_compat_str, + update_url_query, ) @@ -24,7 +26,14 @@ class GoogleDriveIE(InfoExtractor): }, { # video id is longer than 28 characters 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit', - 'only_matching': True, + 'md5': 'c230c67252874fddd8170e3fd1a45886', + 'info_dict': { + 'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ', + 'ext': 'mp4', + 'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4', + 'duration': 189, + }, + 'only_matching': True }] _FORMATS_EXT = { '5': 'flv', @@ -44,6 +53,13 @@ class GoogleDriveIE(InfoExtractor): '46': 'webm', '59': 'mp4', } + _BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext' + _CAPTIONS_ENTRY_TAG = { + 'subtitles': 'track', + 'automatic_captions': 'target', + } + _caption_formats_ext = [] + _captions_by_country_xml = None @staticmethod def _extract_url(webpage): @@ -53,6 +69,81 @@ class GoogleDriveIE(InfoExtractor): if mobj: return 'https://drive.google.com/file/d/%s' % mobj.group('id') + def _set_captions_data(self, video_id, video_subtitles_id, hl): + try: + self._captions_by_country_xml = self._download_xml(self._BASE_URL_CAPTIONS, video_id, query={ + 'id': video_id, + 'vid': video_subtitles_id, + 'hl': hl, + 'v': video_id, + 'type': 'list', + 'tlangs': '1', + 'fmts': '1', + 'vssids': '1', + }) + except ExtractorError as ee: + self.report_warning('unable to download video subtitles: %s' % error_to_compat_str(ee)) + if self._captions_by_country_xml is not None: + caption_available_extensions = self._captions_by_country_xml.findall('format') + for caption_extension in caption_available_extensions: + if caption_extension.attrib.get('fmt_code') and not caption_extension.attrib.get('default'): + self._caption_formats_ext.append(caption_extension.attrib['fmt_code']) + + def _get_captions_by_type(self, video_id, video_subtitles_id, caption_type, caption_original_lang_code=None): + if not video_subtitles_id or not caption_type: + return None + captions = {} + for caption_entry in self._captions_by_country_xml.findall(self._CAPTIONS_ENTRY_TAG[caption_type]): + caption_lang_code = caption_entry.attrib.get('lang_code') + if not caption_lang_code: + continue + caption_format_data = [] + for caption_format in self._caption_formats_ext: + query = { + 'vid': video_subtitles_id, + 'v': video_id, + 'fmt': caption_format, + 'lang': caption_lang_code if caption_original_lang_code is None else caption_original_lang_code, + 'type': 'track', + 'name': '', + 'kind': '', + } + if caption_original_lang_code is not None: + query.update({'tlang': caption_lang_code}) + caption_format_data.append({ + 'url': update_url_query(self._BASE_URL_CAPTIONS, query), + 'ext': caption_format, + }) + captions[caption_lang_code] = caption_format_data + if not captions: + self.report_warning('video doesn\'t have %s' % caption_type.replace('_', ' ')) + return captions + + def _get_subtitles(self, video_id, video_subtitles_id, hl): + if not video_subtitles_id or not hl: + return None + if self._captions_by_country_xml is None: + self._set_captions_data(video_id, video_subtitles_id, hl) + if self._captions_by_country_xml is None: + return None + return self._get_captions_by_type(video_id, video_subtitles_id, 'subtitles') + + def _get_automatic_captions(self, video_id, video_subtitles_id, hl): + if not video_subtitles_id or not hl: + return None + if self._captions_by_country_xml is None: + self._set_captions_data(video_id, video_subtitles_id, hl) + if self._captions_by_country_xml is None: + return None + self.to_screen('%s: Looking for automatic captions' % video_id) + subtitle_original_track = self._captions_by_country_xml.find('track') + if subtitle_original_track is None: + return None + subtitle_original_lang_code = subtitle_original_track.attrib.get('lang_code') + if not subtitle_original_lang_code: + return None + return self._get_captions_by_type(video_id, video_subtitles_id, 'automatic_captions', subtitle_original_lang_code) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( @@ -97,10 +188,21 @@ class GoogleDriveIE(InfoExtractor): formats.append(f) self._sort_formats(formats) + hl = self._search_regex( + r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None) + video_subtitles_id = None + ttsurl = self._search_regex( + r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None) + if ttsurl: + # the video Id for subtitles will be the last value in the ttsurl query string + video_subtitles_id = ttsurl.encode('utf-8').decode('unicode_escape').split('=')[-1] + return { 'id': video_id, 'title': title, 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'duration': duration, 'formats': formats, + 'subtitles': self.extract_subtitles(video_id, video_subtitles_id, hl), + 'automatic_captions': self.extract_automatic_captions(video_id, video_subtitles_id, hl), } -- cgit v1.1 From e01c3d2ef7264b5d3d6f99e7e0b61340885ed661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 00:32:41 +0700 Subject: [extractor/common] Introduce _parse_xml --- youtube_dl/extractor/common.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ceba4ca..1804c4d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -27,6 +27,7 @@ from ..compat import ( compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, + compat_xml_parse_error, ) from ..downloader.f4m import remove_encrypted_media from ..utils import ( @@ -646,15 +647,29 @@ class InfoExtractor(object): def _download_xml(self, url_or_request, video_id, note='Downloading XML', errnote='Unable to download XML', - transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}): + transform_source=None, fatal=True, encoding=None, + data=None, headers={}, query={}): """Return the xml as an xml.etree.ElementTree.Element""" xml_string = self._download_webpage( - url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query) + url_or_request, video_id, note, errnote, fatal=fatal, + encoding=encoding, data=data, headers=headers, query=query) if xml_string is False: return xml_string + return self._parse_xml( + xml_string, video_id, transform_source=transform_source, + fatal=fatal) + + def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): if transform_source: xml_string = transform_source(xml_string) - return compat_etree_fromstring(xml_string.encode('utf-8')) + try: + return compat_etree_fromstring(xml_string.encode('utf-8')) + except compat_xml_parse_error as ve: + errmsg = '%s: Failed to parse XML ' % video_id + if fatal: + raise ExtractorError(errmsg, cause=ve) + else: + self.report_warning(errmsg + str(ve)) def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', -- cgit v1.1 From 37d9af306a928ce2184dcb60883e98ec0dd570ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 00:33:53 +0700 Subject: [googledrive] Simplify and carry long lines (#13638) --- youtube_dl/extractor/googledrive.py | 119 +++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 57 deletions(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 35edc74..97ff282 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -7,7 +7,6 @@ from ..utils import ( ExtractorError, int_or_none, lowercase_escape, - error_to_compat_str, update_url_query, ) @@ -59,7 +58,7 @@ class GoogleDriveIE(InfoExtractor): 'automatic_captions': 'target', } _caption_formats_ext = [] - _captions_by_country_xml = None + _captions_xml = None @staticmethod def _extract_url(webpage): @@ -69,96 +68,99 @@ class GoogleDriveIE(InfoExtractor): if mobj: return 'https://drive.google.com/file/d/%s' % mobj.group('id') - def _set_captions_data(self, video_id, video_subtitles_id, hl): - try: - self._captions_by_country_xml = self._download_xml(self._BASE_URL_CAPTIONS, video_id, query={ + def _download_subtitles_xml(self, video_id, subtitles_id, hl): + if self._captions_xml: + return + self._captions_xml = self._download_xml( + self._BASE_URL_CAPTIONS, video_id, query={ 'id': video_id, - 'vid': video_subtitles_id, + 'vid': subtitles_id, 'hl': hl, 'v': video_id, 'type': 'list', 'tlangs': '1', 'fmts': '1', 'vssids': '1', - }) - except ExtractorError as ee: - self.report_warning('unable to download video subtitles: %s' % error_to_compat_str(ee)) - if self._captions_by_country_xml is not None: - caption_available_extensions = self._captions_by_country_xml.findall('format') - for caption_extension in caption_available_extensions: - if caption_extension.attrib.get('fmt_code') and not caption_extension.attrib.get('default'): - self._caption_formats_ext.append(caption_extension.attrib['fmt_code']) - - def _get_captions_by_type(self, video_id, video_subtitles_id, caption_type, caption_original_lang_code=None): - if not video_subtitles_id or not caption_type: - return None + }, note='Downloading subtitles XML', + errnote='Unable to download subtitles XML', fatal=False) + if self._captions_xml: + for f in self._captions_xml.findall('format'): + if f.attrib.get('fmt_code') and not f.attrib.get('default'): + self._caption_formats_ext.append(f.attrib['fmt_code']) + + def _get_captions_by_type(self, video_id, subtitles_id, caption_type, + origin_lang_code=None): + if not subtitles_id or not caption_type: + return captions = {} - for caption_entry in self._captions_by_country_xml.findall(self._CAPTIONS_ENTRY_TAG[caption_type]): + for caption_entry in self._captions_xml.findall( + self._CAPTIONS_ENTRY_TAG[caption_type]): caption_lang_code = caption_entry.attrib.get('lang_code') if not caption_lang_code: continue caption_format_data = [] for caption_format in self._caption_formats_ext: query = { - 'vid': video_subtitles_id, + 'vid': subtitles_id, 'v': video_id, 'fmt': caption_format, - 'lang': caption_lang_code if caption_original_lang_code is None else caption_original_lang_code, + 'lang': (caption_lang_code if origin_lang_code is None + else origin_lang_code), 'type': 'track', 'name': '', 'kind': '', } - if caption_original_lang_code is not None: + if origin_lang_code is not None: query.update({'tlang': caption_lang_code}) caption_format_data.append({ 'url': update_url_query(self._BASE_URL_CAPTIONS, query), 'ext': caption_format, }) captions[caption_lang_code] = caption_format_data - if not captions: - self.report_warning('video doesn\'t have %s' % caption_type.replace('_', ' ')) return captions - def _get_subtitles(self, video_id, video_subtitles_id, hl): - if not video_subtitles_id or not hl: - return None - if self._captions_by_country_xml is None: - self._set_captions_data(video_id, video_subtitles_id, hl) - if self._captions_by_country_xml is None: - return None - return self._get_captions_by_type(video_id, video_subtitles_id, 'subtitles') - - def _get_automatic_captions(self, video_id, video_subtitles_id, hl): - if not video_subtitles_id or not hl: - return None - if self._captions_by_country_xml is None: - self._set_captions_data(video_id, video_subtitles_id, hl) - if self._captions_by_country_xml is None: - return None - self.to_screen('%s: Looking for automatic captions' % video_id) - subtitle_original_track = self._captions_by_country_xml.find('track') - if subtitle_original_track is None: - return None - subtitle_original_lang_code = subtitle_original_track.attrib.get('lang_code') - if not subtitle_original_lang_code: - return None - return self._get_captions_by_type(video_id, video_subtitles_id, 'automatic_captions', subtitle_original_lang_code) + def _get_subtitles(self, video_id, subtitles_id, hl): + if not subtitles_id or not hl: + return + self._download_subtitles_xml(video_id, subtitles_id, hl) + if not self._captions_xml: + return + return self._get_captions_by_type(video_id, subtitles_id, 'subtitles') + + def _get_automatic_captions(self, video_id, subtitles_id, hl): + if not subtitles_id or not hl: + return + self._download_subtitles_xml(video_id, subtitles_id, hl) + if not self._captions_xml: + return + track = self._captions_xml.find('track') + if track is None: + return + origin_lang_code = track.attrib.get('lang_code') + if not origin_lang_code: + return + return self._get_captions_by_type( + video_id, subtitles_id, 'automatic_captions', origin_lang_code) def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( 'http://docs.google.com/file/d/%s' % video_id, video_id) - reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) + reason = self._search_regex( + r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) if reason: raise ExtractorError(reason) title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title') duration = int_or_none(self._search_regex( - r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None)) + r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', + default=None)) fmt_stream_map = self._search_regex( - r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',') - fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') + r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, + 'fmt stream map').split(',') + fmt_list = self._search_regex( + r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') resolutions = {} for fmt in fmt_list: @@ -190,12 +192,14 @@ class GoogleDriveIE(InfoExtractor): hl = self._search_regex( r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None) - video_subtitles_id = None + subtitles_id = None ttsurl = self._search_regex( r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None) if ttsurl: - # the video Id for subtitles will be the last value in the ttsurl query string - video_subtitles_id = ttsurl.encode('utf-8').decode('unicode_escape').split('=')[-1] + # the video Id for subtitles will be the last value in the ttsurl + # query string + subtitles_id = ttsurl.encode('utf-8').decode( + 'unicode_escape').split('=')[-1] return { 'id': video_id, @@ -203,6 +207,7 @@ class GoogleDriveIE(InfoExtractor): 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'duration': duration, 'formats': formats, - 'subtitles': self.extract_subtitles(video_id, video_subtitles_id, hl), - 'automatic_captions': self.extract_automatic_captions(video_id, video_subtitles_id, hl), + 'subtitles': self.extract_subtitles(video_id, subtitles_id, hl), + 'automatic_captions': self.extract_automatic_captions( + video_id, subtitles_id, hl), } -- cgit v1.1 From 8d7a24aff60a57e651bab40f16a81eb7dffb405c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 22:28:09 +0700 Subject: [toutv] Relax DRM check (closes #13994) --- youtube_dl/extractor/toutv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 26d7709..071388d 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -78,8 +78,10 @@ class TouTvIE(InfoExtractor): def _real_extract(self, url): path = self._match_id(url) metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path) + # IsDrm does not necessarily mean the video is DRM protected (see + # https://github.com/rg3/youtube-dl/issues/13994). if metadata.get('IsDrm'): - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_warning('This video is probably DRM protected.', path) video_id = metadata['IdMedia'] details = metadata['Details'] title = details['OriginalTitle'] -- cgit v1.1 From 0830f3e04842a58eb563962940ceb2bed27aac1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 22:45:45 +0700 Subject: [cbc:watch] Bypass geo-restriction (closes #13993) --- youtube_dl/extractor/cbc.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 87ad14e..9faf402 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -200,6 +200,7 @@ class CBCWatchBaseIE(InfoExtractor): 'media': 'http://search.yahoo.com/mrss/', 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/', } + _GEO_COUNTRIES = ['CA'] def _call_api(self, path, video_id): url = path if path.startswith('http') else self._API_BASE_URL + path @@ -287,6 +288,11 @@ class CBCWatchBaseIE(InfoExtractor): class CBCWatchVideoIE(CBCWatchBaseIE): IE_NAME = 'cbc.ca:watch:video' _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TEST = { + # geo-restricted to Canada, bypassable + 'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235', + 'only_matching': True, + } def _real_extract(self, url): video_id = self._match_id(url) @@ -323,9 +329,10 @@ class CBCWatchIE(CBCWatchBaseIE): IE_NAME = 'cbc.ca:watch' _VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P[0-9a-f-]+)' _TESTS = [{ + # geo-restricted to Canada, bypassable 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4', 'info_dict': { - 'id': '38e815a-009e3ab12e4', + 'id': '9673749a-5e77-484c-8b62-a1092a6b5168', 'ext': 'mp4', 'title': 'Customer (Dis)Service', 'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87', @@ -337,8 +344,8 @@ class CBCWatchIE(CBCWatchBaseIE): 'skip_download': True, 'format': 'bestvideo', }, - 'skip': 'Geo-restricted to Canada', }, { + # geo-restricted to Canada, bypassable 'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057', 'info_dict': { 'id': '1ed4b385-cd84-49cf-95f0-80f004680057', @@ -346,7 +353,6 @@ class CBCWatchIE(CBCWatchBaseIE): 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.', }, 'playlist_mincount': 30, - 'skip': 'Geo-restricted to Canada', }] def _real_extract(self, url): -- cgit v1.1 From 5bae33485c223fdf230254fa424f972b3c51e77f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 22:50:00 +0700 Subject: [toutv] PEP 8 --- youtube_dl/extractor/toutv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 071388d..e59ed26 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -5,7 +5,6 @@ from .common import InfoExtractor from ..utils import ( int_or_none, js_to_json, - ExtractorError, urlencode_postdata, extract_attributes, smuggle_url, -- cgit v1.1 From c4bdc6811307c002a399b59860d311591145397f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 23:21:19 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c07cb96..4104b6d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,23 @@ version Core ++ [extractor/common] Introduce _parse_xml +* [extractor/common] Make HLS and DASH extraction in_parse_html5_media_entries + non fatal (#13970) * [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) Extractors +* [cbc:watch] Bypass geo restriction (#13993) +* [toutv] Relax DRM check (#13994) ++ [googledrive] Add support for subtitles (#13619, #13638) +* [pornhub] Relax uploader regular expression (#13906, #13975) +* [bandcamp:album] Extract track titles (#13962) ++ [bbccouk] Add support for events URLs (#13893) + [liveleak] Support multi-video pages (#6542) + [liveleak] Support another liveleak embedding pattern (#13336) * [cda] Fix extraction (#13935) ++ [laola1tv] Add support for tv.ittf.com (#13965) +* [mixcloud] Fix extraction (#13958, #13974, #13980, #14003) version 2017.08.18 @@ -129,7 +140,7 @@ Extractors * [youku:show] Fix playlist extraction (#13248) + [dispeak] Recognize sevt subdomain (#13276) * [adn] Improve error reporting (#13663) -* [crunchyroll] Relax series and season regex (#13659) +* [crunchyroll] Relax series and season regular expression (#13659) + [spiegel:article] Add support for nexx iframe embeds (#13029) + [nexx:embed] Add support for iframe embeds * [nexx] Improve JS embed extraction -- cgit v1.1 From df235dbba8d8ae3b51ad3432f67d0cb661dadd75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 23:23:13 +0700 Subject: release 2017.08.23 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 12 ++++++------ ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 66dd4c4..3e1ff15 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.23** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.18 +[debug] youtube-dl version 2017.08.23 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d606eab..a8091e7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -3,7 +3,7 @@ $ youtube-dl -v [debug] System config: [] [debug] User config: [] -[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] +[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] youtube-dl version 2015.12.06 [debug] Git HEAD: 135392e @@ -34,7 +34,7 @@ For bug reports, this means that your report should contain the *complete* outpu If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). -**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL. +**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. ### Are you using the latest version? @@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t # DEVELOPER INSTRUCTIONS -Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. +Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution. To run youtube-dl as a developer, you don't need to build anything either. Simply execute @@ -118,7 +118,7 @@ After you have ensured this site is distributing its content legally, you can fo class YourExtractorIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' _TEST = { - 'url': 'http://yourextractor.com/watch/42', + 'url': 'https://yourextractor.com/watch/42', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { 'id': '42', @@ -151,8 +151,8 @@ After you have ensured this site is distributing its content legally, you can fo 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. -8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. -9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: +8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. +9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/yourextractor.py diff --git a/ChangeLog b/ChangeLog index 4104b6d..a60bd5f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.08.23 Core + [extractor/common] Introduce _parse_xml diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1991975..dbec6c8 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -363,6 +363,7 @@ - **IPrima** - **iqiyi**: 爱奇艺 - **Ir90Tv** + - **ITTF** - **ITV** - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations @@ -419,6 +420,7 @@ - **limelight:channel_list** - **LiTV** - **LiveLeak** + - **LiveLeakEmbed** - **livestream** - **livestream:original** - **LnkGo** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4358cd3..94d35a6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.18' +__version__ = '2017.08.23' -- cgit v1.1 From 745968bc72f5dcf1271559d58abd3f0d9a2ea01e Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Thu, 24 Aug 2017 20:58:44 +0530 Subject: [mixcloud] Fix extraction (closes #14015) --- youtube_dl/extractor/mixcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 798968a..25d3fc4 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -92,7 +92,7 @@ class MixcloudIE(InfoExtractor): js = self._download_webpage(js_url, track_id, fatal=False) if js: KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' - for key_name in ('value', 'key_value'): + for key_name in ('value', 'key_value', 'key_value_two'): key = self._search_regex( KEY_RE_TEMPLATE % key_name, js, 'key', default=None, group='key') -- cgit v1.1 From c7121fa7b804d2b9e35dec05f8861e4ebba8afd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Aug 2017 15:38:38 +0700 Subject: [youtube] Fix controversy videos extraction (closes #14027, closes #14029) --- youtube_dl/extractor/youtube.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2e71795..3d0f5a5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1004,6 +1004,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ], }, { + # The following content has been identified by the YouTube community + # as inappropriate or offensive to some audiences. + 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI', + 'info_dict': { + 'id': '6SJNVb0GnPI', + 'ext': 'mp4', + 'title': 'Race Differences in Intelligence', + 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1', + 'duration': 965, + 'upload_date': '20140124', + 'uploader': 'New Century Foundation', + 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg', + 'license': 'Standard YouTube License', + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, + }, + { # itag 212 'url': '1t24XAntNCY', 'only_matching': True, @@ -1437,9 +1458,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if dash_mpd and dash_mpd[0] not in dash_mpds: dash_mpds.append(dash_mpd[0]) + is_live = None + view_count = None + + def extract_view_count(v_info): + return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) + # Get video info embed_webpage = None - is_live = None if re.search(r'player-age-gate-content">', video_webpage) is not None: age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} @@ -1509,6 +1535,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue get_video_info = compat_parse_qs(video_info_webpage) add_dash_mpd(get_video_info) + if view_count is None: + view_count = extract_view_count(get_video_info) if not video_info: video_info = get_video_info if 'token' in get_video_info: @@ -1592,10 +1620,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return self.playlist_result(entries, video_id, video_title, video_description) self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - if 'view_count' in video_info: - view_count = int(video_info['view_count'][0]) - else: - view_count = None + if view_count is None: + view_count = extract_view_count(get_video_info) # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: -- cgit v1.1 From 151978f38a915afd0169e68955b3b7394c54e367 Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Sat, 26 Aug 2017 18:02:57 +0530 Subject: [mixcloud] Fix extraction (closes #14020) --- youtube_dl/extractor/mixcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 25d3fc4..f6360cc 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -92,7 +92,7 @@ class MixcloudIE(InfoExtractor): js = self._download_webpage(js_url, track_id, fatal=False) if js: KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' - for key_name in ('value', 'key_value', 'key_value_two'): + for key_name in ('value', 'key_value', 'key_value.*?', '.*?value.*?'): key = self._search_regex( KEY_RE_TEMPLATE % key_name, js, 'key', default=None, group='key') -- cgit v1.1 From 085d9dd9bebfd1692cfe07e8bcb844780bfe4700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Aug 2017 22:02:49 +0700 Subject: [rai] Fix audio formats extraction (closes #14024) --- youtube_dl/extractor/rai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index e11bf8f..5bf64a5 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -345,11 +345,11 @@ class RaiIE(RaiBaseIE): media_type = media['type'] if 'Audio' in media_type: relinker_info = { - 'formats': { + 'formats': [{ 'format_id': media.get('formatoAudio'), 'url': media['audioUrl'], 'ext': media.get('formatoAudio'), - } + }] } elif 'Video' in media_type: relinker_info = self._extract_relinker_info(media['mediaUri'], content_id) -- cgit v1.1 From a3c3a1e12898a57fc2323e4c7cc37ace35482ecf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Aug 2017 23:55:48 +0700 Subject: [http] Rework HTTP downloader * Simplify code and split into separate routines to facilitate maintaining * Make retry mechanism work on errors during actual download not only during connection establishment phase * Retry on ECONNRESET and ETIMEDOUT during reading data from network * Retry on content too short and various timeout errors * Show error description on retry * Closes #506, closes #809, closes #2849, closes #4240, closes #6023, closes #8625, closes #9483 --- youtube_dl/downloader/common.py | 6 +- youtube_dl/downloader/http.py | 347 +++++++++++++++++++++++----------------- 2 files changed, 199 insertions(+), 154 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 77242da..75b8166 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -304,11 +304,11 @@ class FileDownloader(object): """Report attempt to resume at given byte.""" self.to_screen('[download] Resuming download at byte %s' % resume_len) - def report_retry(self, count, retries): + def report_retry(self, err, count, retries): """Report retry in case of HTTP error 5xx""" self.to_screen( - '[download] Got server HTTP error. Retrying (attempt %d of %s)...' - % (count, self.format_retries(retries))) + '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...' + % (error_to_compat_str(err), count, self.format_retries(retries))) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index af405b9..8a6638c 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -22,8 +22,16 @@ from ..utils import ( class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] - tmpfilename = self.temp_name(filename) - stream = None + + class DownloadContext(dict): + __getattr__ = dict.get + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__ + + ctx = DownloadContext() + ctx.filename = filename + ctx.tmpfilename = self.temp_name(filename) + ctx.stream = None # Do not include the Accept-Encoding header headers = {'Youtubedl-no-compression': 'True'} @@ -38,46 +46,51 @@ class HttpFD(FileDownloader): if is_test: request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1)) - # Establish possible resume length - if os.path.isfile(encodeFilename(tmpfilename)): - resume_len = os.path.getsize(encodeFilename(tmpfilename)) - else: - resume_len = 0 - - open_mode = 'wb' - if resume_len != 0: - if self.params.get('continuedl', True): - self.report_resuming_byte(resume_len) - request.add_header('Range', 'bytes=%d-' % resume_len) - open_mode = 'ab' - else: - resume_len = 0 + ctx.open_mode = 'wb' + ctx.resume_len = 0 + + if self.params.get('continuedl', True): + # Establish possible resume length + if os.path.isfile(encodeFilename(ctx.tmpfilename)): + ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename)) count = 0 retries = self.params.get('retries', 0) - while count <= retries: + + class SucceedDownload(Exception): + pass + + class RetryDownload(Exception): + def __init__(self, source_error): + self.source_error = source_error + + def establish_connection(): + if ctx.resume_len != 0: + self.report_resuming_byte(ctx.resume_len) + request.add_header('Range', 'bytes=%d-' % ctx.resume_len) + ctx.open_mode = 'ab' # Establish connection try: - data = self.ydl.urlopen(request) + ctx.data = self.ydl.urlopen(request) # When trying to resume, Content-Range HTTP header of response has to be checked # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range # set in response despite of requested Range (see # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799) - if resume_len > 0: - content_range = data.headers.get('Content-Range') + if ctx.resume_len > 0: + content_range = ctx.data.headers.get('Content-Range') if content_range: content_range_m = re.search(r'bytes (\d+)-', content_range) # Content-Range is present and matches requested Range, resume is possible - if content_range_m and resume_len == int(content_range_m.group(1)): - break + if content_range_m and ctx.resume_len == int(content_range_m.group(1)): + return # Content-Range is either not present or invalid. Assuming remote webserver is # trying to send the whole file, resume is not possible, so wiping the local file # and performing entire redownload self.report_unable_to_resume() - resume_len = 0 - open_mode = 'wb' - break + ctx.resume_len = 0 + ctx.open_mode = 'wb' + return except (compat_urllib_error.HTTPError, ) as err: if (err.code < 500 or err.code >= 600) and err.code != 416: # Unexpected HTTP error @@ -86,15 +99,15 @@ class HttpFD(FileDownloader): # Unable to resume (requested range not satisfiable) try: # Open the connection again without the range header - data = self.ydl.urlopen(basic_request) - content_length = data.info()['Content-Length'] + ctx.data = self.ydl.urlopen(basic_request) + content_length = ctx.data.info()['Content-Length'] except (compat_urllib_error.HTTPError, ) as err: if err.code < 500 or err.code >= 600: raise else: # Examine the reported length if (content_length is not None and - (resume_len - 100 < int(content_length) < resume_len + 100)): + (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, @@ -102,152 +115,184 @@ class HttpFD(FileDownloader): # I decided to implement a suggested change and consider the file # completely downloaded if the file size differs less than 100 bytes from # the one in the hard drive. - self.report_file_already_downloaded(filename) - self.try_rename(tmpfilename, filename) + self.report_file_already_downloaded(ctx.filename) + self.try_rename(ctx.tmpfilename, ctx.filename) self._hook_progress({ - 'filename': filename, + 'filename': ctx.filename, 'status': 'finished', - 'downloaded_bytes': resume_len, - 'total_bytes': resume_len, + 'downloaded_bytes': ctx.resume_len, + 'total_bytes': ctx.resume_len, }) - return True + raise SucceedDownload() else: # The length does not match, we start the download over self.report_unable_to_resume() - resume_len = 0 - open_mode = 'wb' - break - except socket.error as e: - if e.errno != errno.ECONNRESET: + ctx.resume_len = 0 + ctx.open_mode = 'wb' + return + raise RetryDownload(err) + except socket.error as err: + if err.errno != errno.ECONNRESET: # Connection reset is no problem, just retry raise + raise RetryDownload(err) + + def download(): + data_len = ctx.data.info().get('Content-length', None) + + # Range HTTP header may be ignored/unsupported by a webserver + # (e.g. extractor/scivee.py, extractor/bambuser.py). + # However, for a test we still would like to download just a piece of a file. + # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control + # block size when downloading a file. + if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): + data_len = self._TEST_FILE_SIZE + + if data_len is not None: + data_len = int(data_len) + ctx.resume_len + min_data_len = self.params.get('min_filesize') + max_data_len = self.params.get('max_filesize') + if min_data_len is not None and data_len < min_data_len: + self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) + return False + if max_data_len is not None and data_len > max_data_len: + self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) + return False - # Retry - count += 1 - if count <= retries: - self.report_retry(count, retries) - - if count > retries: - self.report_error('giving up after %s retries' % retries) - return False - - data_len = data.info().get('Content-length', None) - - # Range HTTP header may be ignored/unsupported by a webserver - # (e.g. extractor/scivee.py, extractor/bambuser.py). - # However, for a test we still would like to download just a piece of a file. - # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control - # block size when downloading a file. - if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): - data_len = self._TEST_FILE_SIZE - - if data_len is not None: - data_len = int(data_len) + resume_len - min_data_len = self.params.get('min_filesize') - max_data_len = self.params.get('max_filesize') - if min_data_len is not None and data_len < min_data_len: - self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) - return False - if max_data_len is not None and data_len > max_data_len: - self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) - return False - - byte_counter = 0 + resume_len - block_size = self.params.get('buffersize', 1024) - start = time.time() + byte_counter = 0 + ctx.resume_len + block_size = self.params.get('buffersize', 1024) + start = time.time() - # measure time over whole while-loop, so slow_down() and best_block_size() work together properly - now = None # needed for slow_down() in the first loop run - before = start # start measuring - while True: + # measure time over whole while-loop, so slow_down() and best_block_size() work together properly + now = None # needed for slow_down() in the first loop run + before = start # start measuring - # Download and write - data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) - byte_counter += len(data_block) + def retry(e): + if ctx.tmpfilename != '-': + ctx.stream.close() + ctx.stream = None + ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename)) + raise RetryDownload(e) - # exit loop when download is finished - if len(data_block) == 0: - break + while True: + try: + # Download and write + data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) + # socket.timeout is a subclass of socket.error but may not have + # errno set + except socket.timeout as e: + retry(e) + except socket.error as e: + if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT): + raise + retry(e) + + byte_counter += len(data_block) + + # exit loop when download is finished + if len(data_block) == 0: + break + + # Open destination file just in time + if ctx.stream is None: + try: + ctx.stream, ctx.tmpfilename = sanitize_open( + ctx.tmpfilename, ctx.open_mode) + assert ctx.stream is not None + ctx.filename = self.undo_temp_name(ctx.tmpfilename) + self.report_destination(ctx.filename) + except (OSError, IOError) as err: + self.report_error('unable to open for writing: %s' % str(err)) + return False + + if self.params.get('xattr_set_filesize', False) and data_len is not None: + try: + write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) + except (XAttrUnavailableError, XAttrMetadataError) as err: + self.report_error('unable to set filesize xattr: %s' % str(err)) - # Open destination file just in time - if stream is None: try: - (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) - assert stream is not None - filename = self.undo_temp_name(tmpfilename) - self.report_destination(filename) - except (OSError, IOError) as err: - self.report_error('unable to open for writing: %s' % str(err)) + ctx.stream.write(data_block) + except (IOError, OSError) as err: + self.to_stderr('\n') + self.report_error('unable to write data: %s' % str(err)) return False - if self.params.get('xattr_set_filesize', False) and data_len is not None: - try: - write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) - except (XAttrUnavailableError, XAttrMetadataError) as err: - self.report_error('unable to set filesize xattr: %s' % str(err)) - - try: - stream.write(data_block) - except (IOError, OSError) as err: + # Apply rate limit + self.slow_down(start, now, byte_counter - ctx.resume_len) + + # end measuring of one loop run + now = time.time() + after = now + + # Adjust block size + if not self.params.get('noresizebuffer', False): + block_size = self.best_block_size(after - before, len(data_block)) + + before = after + + # Progress message + speed = self.calc_speed(start, now, byte_counter - ctx.resume_len) + if data_len is None: + eta = None + else: + eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len) + + self._hook_progress({ + 'status': 'downloading', + 'downloaded_bytes': byte_counter, + 'total_bytes': data_len, + 'tmpfilename': ctx.tmpfilename, + 'filename': ctx.filename, + 'eta': eta, + 'speed': speed, + 'elapsed': now - start, + }) + + if is_test and byte_counter == data_len: + break + + if ctx.stream is None: self.to_stderr('\n') - self.report_error('unable to write data: %s' % str(err)) + self.report_error('Did not get any data blocks') return False + if ctx.tmpfilename != '-': + ctx.stream.close() - # Apply rate limit - self.slow_down(start, now, byte_counter - resume_len) + if data_len is not None and byte_counter != data_len: + err = ContentTooShortError(byte_counter, int(data_len)) + if count <= retries: + retry(err) + raise err - # end measuring of one loop run - now = time.time() - after = now + self.try_rename(ctx.tmpfilename, ctx.filename) - # Adjust block size - if not self.params.get('noresizebuffer', False): - block_size = self.best_block_size(after - before, len(data_block)) - - before = after - - # Progress message - speed = self.calc_speed(start, now, byte_counter - resume_len) - if data_len is None: - eta = None - else: - eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) + # Update file modification time + if self.params.get('updatetime', True): + info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None)) self._hook_progress({ - 'status': 'downloading', 'downloaded_bytes': byte_counter, - 'total_bytes': data_len, - 'tmpfilename': tmpfilename, - 'filename': filename, - 'eta': eta, - 'speed': speed, - 'elapsed': now - start, + 'total_bytes': byte_counter, + 'filename': ctx.filename, + 'status': 'finished', + 'elapsed': time.time() - start, }) - if is_test and byte_counter == data_len: - break - - if stream is None: - self.to_stderr('\n') - self.report_error('Did not get any data blocks') - return False - if tmpfilename != '-': - stream.close() - - if data_len is not None and byte_counter != data_len: - raise ContentTooShortError(byte_counter, int(data_len)) - self.try_rename(tmpfilename, filename) - - # Update file modification time - if self.params.get('updatetime', True): - info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None)) - - self._hook_progress({ - 'downloaded_bytes': byte_counter, - 'total_bytes': byte_counter, - 'filename': filename, - 'status': 'finished', - 'elapsed': time.time() - start, - }) - - return True + return True + + while count <= retries: + try: + establish_connection() + download() + return True + except RetryDownload as e: + count += 1 + if count <= retries: + self.report_retry(e.source_error, count, retries) + continue + except SucceedDownload: + return True + + self.report_error('giving up after %s retries' % retries) + return False -- cgit v1.1 From dd121cc1cab2f077cbf68ee432e83d4d094f0f9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 03:12:56 +0700 Subject: [extractor/common] Extract height from res attribute of source tag for HTML5 videos (closes #14034) --- youtube_dl/extractor/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1804c4d..b4af3f9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2184,6 +2184,9 @@ class InfoExtractor(object): f = parse_content_type(source_attributes.get('type')) is_plain_url, formats = _media_formats(src, media_type, f) if is_plain_url: + # res attribute is not standard but seen several times + # in the wild + f['height'] = int_or_none(source_attributes.get('res')) f.update(formats[0]) media_info['formats'].append(f) else: -- cgit v1.1 From 1ed4549942c34cce52b9c641cf9f532c38866149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 03:27:05 +0700 Subject: [extractor/common] Extract format id from label attribute of source tag for HTML5 videos (#14034) --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b4af3f9..74d30ec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2186,7 +2186,10 @@ class InfoExtractor(object): if is_plain_url: # res attribute is not standard but seen several times # in the wild - f['height'] = int_or_none(source_attributes.get('res')) + f.update({ + 'height': int_or_none(source_attributes.get('res')), + 'format_id': source_attributes.get('label'), + }) f.update(formats[0]) media_info['formats'].append(f) else: -- cgit v1.1 From ff17be3ac921910e5cab6f54a579f379dacae068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 03:27:20 +0700 Subject: [extractor/generic] Extract from LD-JSON last of all Previous sources may contain several formats, e.g. http://tamasha.com/v/PgGZ --- youtube_dl/extractor/generic.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 49b00b8..c81efdc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2871,12 +2871,6 @@ class GenericIE(InfoExtractor): merged[k] = v return merged - # Looking for http://schema.org/VideoObject - json_ld = self._search_json_ld( - webpage, video_id, default={}, expected_type='VideoObject') - if json_ld.get('url'): - return merge_dicts(json_ld, info_dict) - # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: @@ -2895,6 +2889,12 @@ class GenericIE(InfoExtractor): jwplayer_data, video_id, require_title=False, base_url=url) return merge_dicts(info, info_dict) + # Looking for http://schema.org/VideoObject + json_ld = self._search_json_ld( + webpage, video_id, default={}, expected_type='VideoObject') + if json_ld.get('url'): + return merge_dicts(json_ld, info_dict) + def check_video(vurl): if YoutubeIE.suitable(vurl): return True -- cgit v1.1 From 62c06c593d19c1314c84639128bb3354665399bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 04:24:41 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index a60bd5f..46d76ff 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version + +Core ++ [extractor/common] Extract height and format id for HTML5 videos (#14034) +* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023, + #8625, #9483) + * Simplify code and split into separate routines to facilitate maintaining + * Make retry mechanism work on errors during actual download not only + during connection establishment phase + * Retry on ECONNRESET and ETIMEDOUT during reading data from network + * Retry on content too short + * Show error description on retry + +Extractors +* [generic] Lower preference for extraction from LD-JSON +* [rai] Fix audio formats extraction (#14024) +* [youtube] Fix controversy videos extraction (#14027, #14029) +* [mixcloud] Fix extraction (#14015, #14020) + + version 2017.08.23 Core -- cgit v1.1 From f031b760650f57672deecacb3f1991bd6fcd778e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 04:28:04 +0700 Subject: release 2017.08.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3e1ff15..777932f 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.23** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.23 +[debug] youtube-dl version 2017.08.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 46d76ff..a997759 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.08.27 Core + [extractor/common] Extract height and format id for HTML5 videos (#14034) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 94d35a6..6a24b70 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.23' +__version__ = '2017.08.27' -- cgit v1.1 From 1c9c8de29e83bbfc38027dabcd9642f1c41a64b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 05:59:08 +0700 Subject: [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (closes #14037) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3d0f5a5..5a6b735 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1621,7 +1621,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.to_screen('Downloading just video %s because of --no-playlist' % video_id) if view_count is None: - view_count = extract_view_count(get_video_info) + view_count = extract_view_count(video_info) # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: -- cgit v1.1 From cc0412ef915047233391c978fa1f8d4d1479ddfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 06:06:49 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index a997759..c38a836 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +version + +Extractors + +* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037) + + version 2017.08.27 Core -- cgit v1.1 From 2cfa7cbdd09353c8f8b135ba1c0b3a325bb03a16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 06:09:29 +0700 Subject: release 2017.08.27.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 777932f..7959d91 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.27.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.27.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.27 +[debug] youtube-dl version 2017.08.27.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c38a836..ef9ac46 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.08.27.1 Extractors diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6a24b70..7504ef0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.27' +__version__ = '2017.08.27.1' -- cgit v1.1 From 3902cdd0e3309364809407b895e4060199b89d11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 22:36:57 +0700 Subject: [pornhd] Fix extraction (closes #14005) --- youtube_dl/extractor/pornhd.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 3676178..b52879c 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor): r'(.+?) - .*?[Pp]ornHD.*?'], webpage, 'title') sources = self._parse_json(js_to_json(self._search_regex( - r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]", + r"(?s)sources'?\s*[:=]\s*(\{.+?\})", webpage, 'sources', default='{}')), video_id) if not sources: @@ -82,7 +82,8 @@ class PornHdIE(InfoExtractor): view_count = int_or_none(self._html_search_regex( r'(\d+) views\s*<', webpage, 'view count', fatal=False)) thumbnail = self._search_regex( - r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) + r"poster'?\s*:\s*([\"'])(?P(?:(?!\1).)+)\1", webpage, + 'thumbnail', fatal=False, group='url') return { 'id': video_id, -- cgit v1.1 From fea82c1780cc751267fd2b9b4145996bfc0c1994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 28 Aug 2017 00:39:22 +0700 Subject: [googledrive] Add support for source format (closes #14046) --- youtube_dl/extractor/googledrive.py | 115 +++++++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 97ff282..37d3739 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, ExtractorError, int_or_none, lowercase_escape, @@ -15,7 +16,7 @@ class GoogleDriveIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P[a-zA-Z0-9_-]{28,})' _TESTS = [{ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', - 'md5': 'd109872761f7e7ecf353fa108c0dbe1e', + 'md5': '5c602afbbf2c1db91831f5d82f678554', 'info_dict': { 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ', 'ext': 'mp4', @@ -23,9 +24,18 @@ class GoogleDriveIE(InfoExtractor): 'duration': 45, } }, { + # video can't be watched anonymously due to view count limit reached, + # but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046) + 'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view', + 'md5': 'bfbd670d03a470bb1e6d4a257adec12e', + 'info_dict': { + 'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ', + 'ext': 'mp4', + 'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4', + } + }, { # video id is longer than 28 characters 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit', - 'md5': 'c230c67252874fddd8170e3fd1a45886', 'info_dict': { 'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ', 'ext': 'mp4', @@ -147,47 +157,84 @@ class GoogleDriveIE(InfoExtractor): webpage = self._download_webpage( 'http://docs.google.com/file/d/%s' % video_id, video_id) - reason = self._search_regex( - r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) - if reason: - raise ExtractorError(reason) - - title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title') + title = self._search_regex( + r'"title"\s*,\s*"([^"]+)', webpage, 'title', + default=None) or self._og_search_title(webpage) duration = int_or_none(self._search_regex( r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None)) + + formats = [] fmt_stream_map = self._search_regex( r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, - 'fmt stream map').split(',') + 'fmt stream map', default='').split(',') fmt_list = self._search_regex( - r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') + r'"fmt_list"\s*,\s*"([^"]+)', webpage, + 'fmt_list', default='').split(',') + if fmt_stream_map and fmt_list: + resolutions = {} + for fmt in fmt_list: + mobj = re.search( + r'^(?P\d+)/(?P\d+)[xX](?P\d+)', fmt) + if mobj: + resolutions[mobj.group('format_id')] = ( + int(mobj.group('width')), int(mobj.group('height'))) - resolutions = {} - for fmt in fmt_list: - mobj = re.search( - r'^(?P\d+)/(?P\d+)[xX](?P\d+)', fmt) - if mobj: - resolutions[mobj.group('format_id')] = ( - int(mobj.group('width')), int(mobj.group('height'))) + for fmt_stream in fmt_stream_map: + fmt_stream_split = fmt_stream.split('|') + if len(fmt_stream_split) < 2: + continue + format_id, format_url = fmt_stream_split[:2] + f = { + 'url': lowercase_escape(format_url), + 'format_id': format_id, + 'ext': self._FORMATS_EXT[format_id], + } + resolution = resolutions.get(format_id) + if resolution: + f.update({ + 'width': resolution[0], + 'height': resolution[1], + }) + formats.append(f) - formats = [] - for fmt_stream in fmt_stream_map: - fmt_stream_split = fmt_stream.split('|') - if len(fmt_stream_split) < 2: - continue - format_id, format_url = fmt_stream_split[:2] - f = { - 'url': lowercase_escape(format_url), - 'format_id': format_id, - 'ext': self._FORMATS_EXT[format_id], - } - resolution = resolutions.get(format_id) - if resolution: - f.update({ - 'width': resolution[0], - 'height': resolution[1], + source_url = update_url_query( + 'https://drive.google.com/uc', { + 'id': video_id, + 'export': 'download', + }) + urlh = self._request_webpage( + source_url, video_id, note='Requesting source file', + errnote='Unable to request source file', fatal=False) + if urlh: + def add_source_format(src_url): + formats.append({ + 'url': src_url, + 'ext': determine_ext(title, 'mp4').lower(), + 'format_id': 'source', + 'quality': 1, }) - formats.append(f) + if urlh.headers.get('Content-Disposition'): + add_source_format(source_url) + else: + confirmation_webpage = self._webpage_read_content( + urlh, url, video_id, note='Downloading confirmation page', + errnote='Unable to confirm download', fatal=False) + if confirmation_webpage: + confirm = self._search_regex( + r'confirm=([^&"\']+)', confirmation_webpage, + 'confirmation code', fatal=False) + if confirm: + add_source_format(update_url_query(source_url, { + 'confirm': confirm, + })) + + if not formats: + reason = self._search_regex( + r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) + if reason: + raise ExtractorError(reason, expected=True) + self._sort_formats(formats) hl = self._search_regex( -- cgit v1.1 From 1b41da488df64bd41463a79691330e555f79ac2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 28 Aug 2017 00:50:41 +0700 Subject: [googledrive] Extend _VALID_URL (closes #9785) --- youtube_dl/extractor/googledrive.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 37d3739..3bf462d 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -13,7 +13,18 @@ from ..utils import ( class GoogleDriveIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P[a-zA-Z0-9_-]{28,})' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:docs|drive)\.google\.com/ + (?: + (?:uc|open)\?.*?id=| + file/d/ + )| + video\.google\.com/get_player\?.*?docid= + ) + (?P[a-zA-Z0-9_-]{28,}) + ''' _TESTS = [{ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', 'md5': '5c602afbbf2c1db91831f5d82f678554', @@ -42,7 +53,13 @@ class GoogleDriveIE(InfoExtractor): 'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4', 'duration': 189, }, - 'only_matching': True + 'only_matching': True, + }, { + 'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28', + 'only_matching': True, + }, { + 'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28', + 'only_matching': True, }] _FORMATS_EXT = { '5': 'flv', -- cgit v1.1 From c75c384fb6d64f8d44c10d798cb64c8c00c61175 Mon Sep 17 00:00:00 2001 From: Ryan Schmidt Date: Sun, 27 Aug 2017 18:07:09 -0500 Subject: Fix build failures with old cp and zip --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 41e3a68..c74eea7 100644 --- a/Makefile +++ b/Makefile @@ -49,11 +49,11 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py mkdir -p zip for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \ mkdir -p zip/$$d ;\ - cp -a $$d/*.py zip/$$d/ ;\ + cp -pPR $$d/*.py zip/$$d/ ;\ done touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py mv zip/youtube_dl/__main__.py zip/ - cd zip ; zip --quiet ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py + cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py rm -rf zip echo '#!$(PYTHON)' > youtube-dl cat youtube-dl.zip >> youtube-dl -- cgit v1.1 From 53647dfd0ae5f3d369e01f04acd5eff9713cc91d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 30 Aug 2017 05:27:56 +0700 Subject: [bbccouk] Add support for w-prefixed ids (closes #14056) --- youtube_dl/extractor/bbc.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 911ae67..8b20c03 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -29,7 +29,7 @@ from ..compat import ( class BBCCoUkIE(InfoExtractor): IE_NAME = 'bbc.co.uk' IE_DESC = 'BBC iPlayer' - _ID_REGEX = r'[pb][\da-z]{7}' + _ID_REGEX = r'[pbw][\da-z]{7}' _VALID_URL = r'''(?x) https?:// (?:www\.)?bbc\.co\.uk/ @@ -233,6 +233,9 @@ class BBCCoUkIE(InfoExtractor): }, { 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55', 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9', + 'only_matching': True, }] _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' -- cgit v1.1 From 5b4bfbfc3bba2174234834a08a97e261e492a2f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 30 Aug 2017 23:50:33 +0700 Subject: [charlierose] Add support for episodes (closes #14062) --- youtube_dl/extractor/charlierose.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py index 2d517f2..42c9af2 100644 --- a/youtube_dl/extractor/charlierose.py +++ b/youtube_dl/extractor/charlierose.py @@ -5,7 +5,7 @@ from ..utils import remove_end class CharlieRoseIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P\d+)' _TESTS = [{ 'url': 'https://charlierose.com/videos/27996', 'md5': 'fda41d49e67d4ce7c2411fd2c4702e09', @@ -24,6 +24,9 @@ class CharlieRoseIE(InfoExtractor): }, { 'url': 'https://charlierose.com/videos/27996', 'only_matching': True, + }, { + 'url': 'https://charlierose.com/episodes/30887?autoplay=true', + 'only_matching': True, }] _PLAYER_BASE = 'https://charlierose.com/video/player/%s' -- cgit v1.1 From 7998520933db70625ff3859c464fa4bbf2ff7fac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 31 Aug 2017 00:47:58 +0700 Subject: [youtube] Fix upload date extraction (closes #14065) --- youtube_dl/extractor/youtube.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5a6b735..563cf62 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1665,10 +1665,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not upload_date: upload_date = self._search_regex( [r'(?s)id="eow-date.*?>(.*?)', - r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)'], + r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'], video_webpage, 'upload date', default=None) - if upload_date: - upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) upload_date = unified_strdate(upload_date) video_license = self._html_search_regex( -- cgit v1.1 From 8d81f3e36d3caaeabcdff99d3340d4075d30741e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 Sep 2017 00:57:14 +0700 Subject: [youtube] Force old layout for each webpage (closes #14083) --- youtube_dl/extractor/youtube.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 563cf62..953e382 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter from ..swfinterp import SWFInterpreter from ..compat import ( compat_chr, + compat_kwargs, compat_parse_qs, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, @@ -245,6 +246,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return True + def _download_webpage(self, *args, **kwargs): + kwargs.setdefault('query', {})['disable_polymer'] = 'true' + return super(YoutubeBaseInfoExtractor, self)._download_webpage( + *args, **compat_kwargs(kwargs)) + def _real_initialize(self): if self._downloader is None: return @@ -2052,7 +2058,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): | (%(playlist_id)s) )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} - _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true' + _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})&[^"]*?index=(?P\d+)(?:[^>]+>(?P[^<]+))?' IE_NAME = 'youtube:playlist' _TESTS = [{ -- cgit v1.1 From 8681ed7fc80a6b4b99e3a57adf5ec8d4177a601c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Sep 2017 01:04:22 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index ef9ac46..d89fb3d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +version <unreleased> + +Extractors +* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076, + #14077, #14079, #14082, #14083, #14094, #14095, #14096) +* [youtube] Fix upload date extraction (#14065) ++ [charlierose] Add support for episodes (#14062) ++ [bbccouk] Add support for w-prefixed ids (#14056) +* [googledrive] Extend URL regular expression (#9785) ++ [googledrive] Add support for source format (#14046) +* [pornhd] Fix extraction (#14005) + + version 2017.08.27.1 Extractors -- cgit v1.1 From a2022b0c406286aa3d5101f1c85e7d11453e89d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Sep 2017 01:08:32 +0700 Subject: release 2017.09.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7959d91..bd9e219 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.27.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.27.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.27.1 +[debug] youtube-dl version 2017.09.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d89fb3d..c439c8e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.09.02 Extractors * [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076, diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7504ef0..60ed35d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.27.1' +__version__ = '2017.09.02' -- cgit v1.1 From a3431e12249530aa7a6962e54bcdbfce190c4c4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Sep 2017 15:33:54 +0700 Subject: [radiocanada] Skip unsupported platforms (closes #14100) --- youtube_dl/extractor/radiocanada.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 3b40002..6bbc278 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -59,6 +59,7 @@ class RadioCanadaIE(InfoExtractor): device_types.append('android') formats = [] + error = None # TODO: extract f4m formats # f4m formats can be extracted using flashhd device_type but they produce unplayable file for device_type in device_types: @@ -84,8 +85,8 @@ class RadioCanadaIE(InfoExtractor): if not v_url: continue if v_url == 'null': - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, xpath_text(v_data, 'message')), expected=True) + error = xpath_text(v_data, 'message') + continue ext = determine_ext(v_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( @@ -129,6 +130,9 @@ class RadioCanadaIE(InfoExtractor): formats.extend(self._extract_f4m_formats( base_url + '/manifest.f4m', video_id, f4m_id='hds', fatal=False)) + if not formats and error: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error), expected=True) self._sort_formats(formats) subtitles = {} -- cgit v1.1 From 64f0e30b93db804323a6db382d4ccdf1dac4e38b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Sep 2017 15:44:49 +0700 Subject: [viidea] Capture and output lecture error message (#14099) --- youtube_dl/extractor/viidea.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/viidea.py b/youtube_dl/extractor/viidea.py index 4adcd18..a0abbae 100644 --- a/youtube_dl/extractor/viidea.py +++ b/youtube_dl/extractor/viidea.py @@ -4,12 +4,14 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urlparse, + compat_HTTPError, compat_str, + compat_urlparse, ) from ..utils import ( - parse_duration, + ExtractorError, js_to_json, + parse_duration, parse_iso8601, ) @@ -128,9 +130,16 @@ class ViideaIE(InfoExtractor): base_url = self._proto_relative_url(cfg['livepipe'], 'http:') - lecture_data = self._download_json( - '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id), - lecture_id)['lecture'][0] + try: + lecture_data = self._download_json( + '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id), + lecture_id)['lecture'][0] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + msg = self._parse_json( + e.cause.read().decode('utf-8'), lecture_id) + raise ExtractorError(msg['detail'], expected=True) + raise lecture_info = { 'id': lecture_id, -- cgit v1.1 From 503115540d8f135dc944ae48e40ba78f36238867 Mon Sep 17 00:00:00 2001 From: dubber0 <rexa.mose@gmail.com> Date: Sat, 22 Jul 2017 21:32:51 +0200 Subject: [aliexpress:live] Add extractor --- youtube_dl/extractor/aliexpress.py | 40 ++++++++++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 41 insertions(+) create mode 100644 youtube_dl/extractor/aliexpress.py diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dl/extractor/aliexpress.py new file mode 100644 index 0000000..3997213 --- /dev/null +++ b/youtube_dl/extractor/aliexpress.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +import re + +from .common import InfoExtractor +from ..utils import try_get, float_or_none +from ..compat import compat_str + + +class AliExpressLiveIE(InfoExtractor): + + _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>[0-9]{16})' + _TEST = { + 'url': 'https://live.aliexpress.com/live/2800002704436634', + 'md5': '7ac2bc46afdd18f0b45a0a340fc47ffe', + 'info_dict': { + 'id': '2800002704436634', + 'ext': 'm3u8', + 'title': 'CASIMA7.22', + 'uploader': 'CASIMA Official Store', + 'upload_date': '20170714', + 'timestamp': 1500027138, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + page = self._download_webpage(url, video_id) + run_params_json = self._search_regex(r'runParams = (.+)[\s+]var myCtl', page, 'runParams', flags=re.DOTALL) + run_params = self._parse_json(run_params_json, video_id) + + return { + 'id': video_id, + 'title': run_params['title'], + 'url': run_params['replyStreamUrl'], + 'uploader': try_get(run_params, lambda x: x['followBar']['name'], compat_str), + 'timestamp': float_or_none(try_get(run_params, lambda x: x['followBar']['createTime']) / 1000), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 17048fd..d335f9f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -45,6 +45,7 @@ from .anvato import AnvatoIE from .anysex import AnySexIE from .aol import AolIE from .allocine import AllocineIE +from .aliexpress import AliExpressLiveIE from .aparat import AparatIE from .appleconnect import AppleConnectIE from .appletrailers import ( -- cgit v1.1 From 23b2df82c70a832e485aaf52befa26e27a904995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Sep 2017 16:04:36 +0700 Subject: [aliexpress:live] Fix issues (closes #13698, closes #13707) --- youtube_dl/extractor/aliexpress.py | 47 ++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dl/extractor/aliexpress.py index 3997213..6f241e6 100644 --- a/youtube_dl/extractor/aliexpress.py +++ b/youtube_dl/extractor/aliexpress.py @@ -1,40 +1,53 @@ # coding: utf-8 from __future__ import unicode_literals - -import re - from .common import InfoExtractor -from ..utils import try_get, float_or_none from ..compat import compat_str +from ..utils import ( + float_or_none, + try_get, +) class AliExpressLiveIE(InfoExtractor): - - _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>[0-9]{16})' + _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)' _TEST = { 'url': 'https://live.aliexpress.com/live/2800002704436634', - 'md5': '7ac2bc46afdd18f0b45a0a340fc47ffe', + 'md5': 'e729e25d47c5e557f2630eaf99b740a5', 'info_dict': { 'id': '2800002704436634', - 'ext': 'm3u8', + 'ext': 'mp4', 'title': 'CASIMA7.22', + 'thumbnail': r're:http://.*\.jpg', 'uploader': 'CASIMA Official Store', - 'upload_date': '20170714', - 'timestamp': 1500027138, + 'timestamp': 1500717600, + 'upload_date': '20170722', }, } def _real_extract(self, url): video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - run_params_json = self._search_regex(r'runParams = (.+)[\s+]var myCtl', page, 'runParams', flags=re.DOTALL) - run_params = self._parse_json(run_params_json, video_id) + + webpage = self._download_webpage(url, video_id) + + data = self._parse_json( + self._search_regex( + r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var', + webpage, 'runParams'), + video_id) + + title = data['title'] + + formats = self._extract_m3u8_formats( + data['replyStreamUrl'], video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') return { 'id': video_id, - 'title': run_params['title'], - 'url': run_params['replyStreamUrl'], - 'uploader': try_get(run_params, lambda x: x['followBar']['name'], compat_str), - 'timestamp': float_or_none(try_get(run_params, lambda x: x['followBar']['createTime']) / 1000), + 'title': title, + 'thumbnail': data.get('coverUrl'), + 'uploader': try_get( + data, lambda x: x['followBar']['name'], compat_str), + 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), + 'formats': formats, } -- cgit v1.1 From 73602bcd0c254b735cc93ce5ffeca9e98228190e Mon Sep 17 00:00:00 2001 From: Tatsuyuki Ishi <ishitatsuyuki@gmail.com> Date: Fri, 1 Sep 2017 17:08:24 +0900 Subject: [soundcloud] Fix download URL with private tracks --- youtube_dl/extractor/soundcloud.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 2e52e09..23dcac8 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -1,8 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals -import re import itertools +import re from .common import ( InfoExtractor, @@ -17,7 +17,7 @@ from ..utils import ( ExtractorError, int_or_none, unified_strdate, -) + update_url_query) class SoundcloudIE(InfoExtractor): @@ -160,11 +160,13 @@ class SoundcloudIE(InfoExtractor): 'license': info.get('license'), } formats = [] + query = {'client_id': self._CLIENT_ID} + if secret_token is not None: + query['secret_token'] = secret_token if info.get('downloadable', False): # We can build a direct link to the song - format_url = ( - 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format( - track_id, self._CLIENT_ID)) + format_url = update_url_query( + 'https://api.soundcloud.com/tracks/{0}/download'.format(track_id), query) formats.append({ 'format_id': 'download', 'ext': info.get('original_format', 'mp3'), @@ -176,10 +178,7 @@ class SoundcloudIE(InfoExtractor): # We have to retrieve the url format_dict = self._download_json( 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id, - track_id, 'Downloading track url', query={ - 'client_id': self._CLIENT_ID, - 'secret_token': secret_token, - }) + track_id, 'Downloading track url', query=query) for key, stream_url in format_dict.items(): abr = int_or_none(self._search_regex( @@ -216,7 +215,7 @@ class SoundcloudIE(InfoExtractor): # cannot be always used, sometimes it can give an HTTP 404 error formats.append({ 'format_id': 'fallback', - 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID, + 'url': update_url_query(info['stream_url'], query), 'ext': ext, }) -- cgit v1.1 From d7c7100e3d920512a11bf7c6fee21e26da7ffa73 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 3 Sep 2017 16:18:24 +0700 Subject: [soundcloud] Simplify and add test (closes #14093) --- youtube_dl/extractor/soundcloud.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 23dcac8..1c6799d 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -17,7 +17,8 @@ from ..utils import ( ExtractorError, int_or_none, unified_strdate, - update_url_query) + update_url_query, +) class SoundcloudIE(InfoExtractor): @@ -120,6 +121,21 @@ class SoundcloudIE(InfoExtractor): 'license': 'cc-by-sa', }, }, + # private link, downloadable format + { + 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd', + 'md5': '64a60b16e617d41d0bef032b7f55441e', + 'info_dict': { + 'id': '340344461', + 'ext': 'wav', + 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', + 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', + 'uploader': 'Ori Uplift Music', + 'upload_date': '20170831', + 'duration': 7449, + 'license': 'all-rights-reserved', + }, + }, ] _CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg' @@ -166,7 +182,7 @@ class SoundcloudIE(InfoExtractor): if info.get('downloadable', False): # We can build a direct link to the song format_url = update_url_query( - 'https://api.soundcloud.com/tracks/{0}/download'.format(track_id), query) + 'https://api.soundcloud.com/tracks/%s/download' % track_id, query) formats.append({ 'format_id': 'download', 'ext': info.get('original_format', 'mp3'), -- cgit v1.1 From 0cbb841ba94c8d813ff81e817154c5491a796f20 Mon Sep 17 00:00:00 2001 From: Timendum <timedum@gmail.com> Date: Thu, 31 Aug 2017 12:56:37 +0200 Subject: [bpb] Fix extraction (closes #14043) --- youtube_dl/extractor/bpb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py index 9661ade..14bc0f7 100644 --- a/youtube_dl/extractor/bpb.py +++ b/youtube_dl/extractor/bpb.py @@ -33,13 +33,13 @@ class BpbIE(InfoExtractor): title = self._html_search_regex( r'<h2 class="white">(.*?)</h2>', webpage, 'title') video_info_dicts = re.findall( - r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage) + r"({\s*src\s*:\s*'https://film\.bpb\.de/[^}]+})", webpage) formats = [] for video_info in video_info_dicts: video_info = self._parse_json(video_info, video_id, transform_source=js_to_json) - quality = video_info['quality'] video_url = video_info['src'] + quality = 'high' if re.search(r'_high\.', video_url) else 'low' formats.append({ 'url': video_url, 'preference': 10 if quality == 'high' else 0, -- cgit v1.1 From c1c1585b316995ca47b59e8dc1e3b463beb1c54a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Sep 2017 16:38:43 +0700 Subject: [bpb] Improve (closes #14086) --- youtube_dl/extractor/bpb.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py index 14bc0f7..0783353 100644 --- a/youtube_dl/extractor/bpb.py +++ b/youtube_dl/extractor/bpb.py @@ -33,13 +33,18 @@ class BpbIE(InfoExtractor): title = self._html_search_regex( r'<h2 class="white">(.*?)</h2>', webpage, 'title') video_info_dicts = re.findall( - r"({\s*src\s*:\s*'https://film\.bpb\.de/[^}]+})", webpage) + r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage) formats = [] for video_info in video_info_dicts: - video_info = self._parse_json(video_info, video_id, transform_source=js_to_json) - video_url = video_info['src'] - quality = 'high' if re.search(r'_high\.', video_url) else 'low' + video_info = self._parse_json( + video_info, video_id, transform_source=js_to_json, fatal=False) + if not video_info: + continue + video_url = video_info.get('src') + if not video_url: + continue + quality = 'high' if '_high' in video_url else 'low' formats.append({ 'url': video_url, 'preference': 10 if quality == 'high' else 0, -- cgit v1.1 From 0b4a8eb3ac823c26b037eb368c114ce6d976c5c3 Mon Sep 17 00:00:00 2001 From: theychx <boomingzooming@gmail.com> Date: Mon, 28 Aug 2017 21:35:57 +0200 Subject: [vidme:user] Relax _VALID_URLs --- youtube_dl/extractor/vidme.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index a7971d7..39b65ed 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -263,29 +263,43 @@ class VidmeListBaseIE(InfoExtractor): class VidmeUserIE(VidmeListBaseIE): IE_NAME = 'vidme:user' - _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]|$)' + _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]|$)' _API_ITEM = 'list' _TITLE = 'Videos' - _TEST = { - 'url': 'https://vid.me/EFARCHIVE', + _TESTS = [{ + 'url': 'https://vid.me/MasakoX', 'info_dict': { - 'id': '3834632', - 'title': 'EFARCHIVE - %s' % _TITLE, + 'id': '16112341', + 'title': 'MasakoX - %s' % _TITLE, }, - 'playlist_mincount': 238, - } + 'playlist_mincount': 191, + }, { + 'url': 'https://vid.me/unsQuare_netWork', + 'info_dict': { + 'id': '16148757', + 'title': 'unsQuare_netWork - %s' % _TITLE, + }, + 'playlist_mincount': 73, + }] class VidmeUserLikesIE(VidmeListBaseIE): IE_NAME = 'vidme:user:likes' - _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes' + _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})/likes' _API_ITEM = 'likes' _TITLE = 'Likes' - _TEST = { + _TESTS = [{ 'url': 'https://vid.me/ErinAlexis/likes', 'info_dict': { 'id': '6483530', 'title': 'ErinAlexis - %s' % _TITLE, }, 'playlist_mincount': 415, - } + }, { + 'url': 'https://vid.me/Kaleidoscope-Ish/likes', + 'info_dict': { + 'id': '16908594', + 'title': 'Kaleidoscope-Ish - %s' % _TITLE, + }, + 'playlist_mincount': 43, + }] -- cgit v1.1 From bc35f075370ed1e67fe71c544e6243a2fc4fa430 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Sep 2017 17:02:11 +0700 Subject: [vidme:user] Make tests only matching (closes #14054) --- youtube_dl/extractor/vidme.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index 39b65ed..59adb23 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -275,11 +275,7 @@ class VidmeUserIE(VidmeListBaseIE): 'playlist_mincount': 191, }, { 'url': 'https://vid.me/unsQuare_netWork', - 'info_dict': { - 'id': '16148757', - 'title': 'unsQuare_netWork - %s' % _TITLE, - }, - 'playlist_mincount': 73, + 'only_matching': True, }] @@ -297,9 +293,5 @@ class VidmeUserLikesIE(VidmeListBaseIE): 'playlist_mincount': 415, }, { 'url': 'https://vid.me/Kaleidoscope-Ish/likes', - 'info_dict': { - 'id': '16908594', - 'title': 'Kaleidoscope-Ish - %s' % _TITLE, - }, - 'playlist_mincount': 43, + 'only_matching': True, }] -- cgit v1.1 From e9b865267aaa90e3b9e1b0468d20a4df31e13393 Mon Sep 17 00:00:00 2001 From: John D <jdong1992@gmail.com> Date: Wed, 30 Aug 2017 00:14:43 -0700 Subject: [manyvids] Add support for preview videos (closes #14053) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/manyvids.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/manyvids.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d335f9f..46a11f3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -564,6 +564,7 @@ from .mangomolo import ( MangomoloVideoIE, MangomoloLiveIE, ) +from .manyvids import ManyVidsIE from .matchtv import MatchTVIE from .mdr import MDRIE from .mediaset import MediasetIE diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dl/extractor/manyvids.py new file mode 100644 index 0000000..ea739ce --- /dev/null +++ b/youtube_dl/extractor/manyvids.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class ManyVidsIE(InfoExtractor): + _VALID_URL = r'https?://www.manyvids\.com/Video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/', + 'md5': '03f11bb21c52dd12a05be21a5c7dcc97', + 'info_dict': { + 'id': '133957', + 'ext': 'mp4', + 'title': 'everthing about me', + + } + } + + def _real_extract(self, url): + formats = [] + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_url = compat_urllib_parse_unquote(self._search_regex( + r'data-video-filepath=\"(.+?)\"', webpage, 'video URL', default='')) + + title = self._html_search_regex(r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title') + formats.append({ + 'url': video_url + }) + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } -- cgit v1.1 From efc57145c10bdf22da9d8571c35ccd0404e3b7c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Sep 2017 17:30:02 +0700 Subject: [manyvids] Improve (closes #14059) --- youtube_dl/extractor/manyvids.py | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dl/extractor/manyvids.py index ea739ce..b94b3c2 100644 --- a/youtube_dl/extractor/manyvids.py +++ b/youtube_dl/extractor/manyvids.py @@ -2,35 +2,47 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from ..utils import int_or_none class ManyVidsIE(InfoExtractor): - _VALID_URL = r'https?://www.manyvids\.com/Video/(?P<id>[0-9]+)' + _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)' _TEST = { 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/', 'md5': '03f11bb21c52dd12a05be21a5c7dcc97', 'info_dict': { 'id': '133957', 'ext': 'mp4', - 'title': 'everthing about me', - - } + 'title': 'everthing about me (Preview)', + 'view_count': int, + 'like_count': int, + }, } def _real_extract(self, url): - formats = [] video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - video_url = compat_urllib_parse_unquote(self._search_regex( - r'data-video-filepath=\"(.+?)\"', webpage, 'video URL', default='')) - title = self._html_search_regex(r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title') - formats.append({ - 'url': video_url - }) + video_url = self._search_regex( + r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'video URL', group='url') + + title = '%s (Preview)' % self._html_search_regex( + r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title') + + like_count = int_or_none(self._search_regex( + r'data-likes=["\'](\d+)', webpage, 'like count', default=None)) + view_count = int_or_none(self._html_search_regex( + r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage, + 'view count', default=None)) + return { 'id': video_id, 'title': title, - 'formats': formats, + 'view_count': view_count, + 'like_count': like_count, + 'formats': [{ + 'url': video_url, + }], } -- cgit v1.1 From 6348671c4a4e9f45cebc107ff4c148ef4970bb39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 Sep 2017 23:08:07 +0700 Subject: [arte] Relax unavailability check (closes #14112) --- youtube_dl/extractor/arte.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 02613cf..5cde90c 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -82,7 +82,7 @@ class ArteTVBaseIE(InfoExtractor): vsr = player_info['VSR'] - if not vsr and not player_info.get('VRU'): + if not vsr: raise ExtractorError( 'Video %s is not available' % player_info.get('VID') or video_id, expected=True) -- cgit v1.1 From 880fa66f4ffa9afcfce91b5ce39f05909050da67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Sep 2017 22:45:07 +0700 Subject: [redtube] Fix formats extraction (closes #14122) --- youtube_dl/extractor/redtube.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index c367a6a..f70a752 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -62,7 +63,23 @@ class RedTubeIE(InfoExtractor): 'format_id': format_id, 'height': int_or_none(format_id), }) - else: + medias = self._parse_json( + self._search_regex( + r'mediaDefinition\s*:\s*(\[.+?\])', webpage, + 'media definitions', default='{}'), + video_id, fatal=False) + if medias and isinstance(medias, list): + for media in medias: + format_url = media.get('videoUrl') + if not format_url or not isinstance(format_url, compat_str): + continue + format_id = media.get('quality') + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'height': int_or_none(format_id), + }) + if not formats: video_url = self._html_search_regex( r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') formats.append({'url': video_url}) @@ -73,7 +90,7 @@ class RedTubeIE(InfoExtractor): r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<', webpage, 'upload date', fatal=False)) duration = int_or_none(self._search_regex( - r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) + r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)', webpage, 'view count', fatal=False)) -- cgit v1.1 From c5c9bf0c120d2c481124a0c3913b981cf061fb95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Sep 2017 23:31:34 +0700 Subject: [YoutubeDL] Ensure dir existence for each requested format (closes #14116) --- youtube_dl/YoutubeDL.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5f4c93e..4f208f1 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1710,12 +1710,17 @@ class YoutubeDL(object): if filename is None: return - try: - dn = os.path.dirname(sanitize_path(encodeFilename(filename))) - if dn and not os.path.exists(dn): - os.makedirs(dn) - except (OSError, IOError) as err: - self.report_error('unable to create directory ' + error_to_compat_str(err)) + def ensure_dir_exists(path): + try: + dn = os.path.dirname(path) + if dn and not os.path.exists(dn): + os.makedirs(dn) + return True + except (OSError, IOError) as err: + self.report_error('unable to create directory ' + error_to_compat_str(err)) + return False + + if not ensure_dir_exists(sanitize_path(encodeFilename(filename))): return if self.params.get('writedescription', False): @@ -1853,8 +1858,11 @@ class YoutubeDL(object): for f in requested_formats: new_info = dict(info_dict) new_info.update(f) - fname = self.prepare_filename(new_info) - fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext']) + fname = prepend_extension( + self.prepare_filename(new_info), + 'f%s' % f['format_id'], new_info['ext']) + if not ensure_dir_exists(fname): + return downloaded.append(fname) partial_success = dl(fname, new_info) success = success and partial_success -- cgit v1.1 From 66c9fa36c10860b380806b9de48f38d628289e03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 6 Sep 2017 00:48:37 +0700 Subject: [youtube] Separate methods for embeds extraction --- youtube_dl/extractor/generic.py | 33 ++++----------------------------- youtube_dl/extractor/youtube.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c81efdc..b83c183 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2243,36 +2243,11 @@ class GenericIE(InfoExtractor): if vid_me_embed_url is not None: return self.url_result(vid_me_embed_url, 'Vidme') - # Look for embedded YouTube player - matches = re.findall(r'''(?x) - (?: - <iframe[^>]+?src=| - data-video-url=| - <embed[^>]+?src=| - embedSWF\(?:\s*| - <object[^>]+data=| - new\s+SWFObject\( - ) - (["\']) - (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ - (?:embed|v|p)/.+?) - \1''', webpage) - if matches: + # Look for YouTube embeds + youtube_urls = YoutubeIE._extract_urls(webpage) + if youtube_urls: return self.playlist_from_matches( - matches, video_id, video_title, lambda m: unescapeHTML(m[1])) - - # Look for lazyYT YouTube embed - matches = re.findall( - r'class="lazyYT" data-youtube-id="([^"]+)"', webpage) - if matches: - return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m)) - - # Look for Wordpress "YouTube Video Importer" plugin - matches = re.findall(r'''(?x)<div[^>]+ - class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ - data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) - if matches: - return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1]) + youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key()) matches = DailymotionIE._extract_urls(webpage) if matches: diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 953e382..ad2e933 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1374,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor): playback_url, video_id, 'Marking watched', 'Unable to mark watched', fatal=False) + @staticmethod + def _extract_urls(webpage): + # Embedded YouTube player + entries = [ + unescapeHTML(mobj.group('url')) + for mobj in re.finditer(r'''(?x) + (?: + <iframe[^>]+?src=| + data-video-url=| + <embed[^>]+?src=| + embedSWF\(?:\s*| + <object[^>]+data=| + new\s+SWFObject\( + ) + (["\']) + (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ + (?:embed|v|p)/.+?) + \1''', webpage)] + + # lazyYT YouTube embed + entries.extend(list(map( + unescapeHTML, + re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)))) + + # Wordpress "YouTube Video Importer" plugin + matches = re.findall(r'''(?x)<div[^>]+ + class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ + data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) + entries.extend(m[-1] for m in matches) + + return entries + + @staticmethod + def _extract_url(webpage): + urls = YoutubeIE._extract_urls(webpage) + return urls[0] if urls else None + @classmethod def extract_id(cls, url): mobj = re.match(cls._VALID_URL, url, re.VERBOSE) -- cgit v1.1 From 5113b6912467619bd463c5ebefe759d07078bea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 6 Sep 2017 00:50:25 +0700 Subject: [abcnews,chilloutsoze,cracked,vice,vk] Use dedicated YouTube embeds extraction routines --- youtube_dl/extractor/abcnews.py | 7 +++---- youtube_dl/extractor/chilloutzone.py | 9 ++++----- youtube_dl/extractor/cracked.py | 7 +++---- youtube_dl/extractor/vice.py | 7 +++---- youtube_dl/extractor/vk.py | 7 +++---- 5 files changed, 16 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py index 74d5456..f770fe9 100644 --- a/youtube_dl/extractor/abcnews.py +++ b/youtube_dl/extractor/abcnews.py @@ -7,6 +7,7 @@ import time from .amp import AMPIE from .common import InfoExtractor +from .youtube import YoutubeIE from ..compat import compat_urlparse @@ -108,9 +109,7 @@ class AbcNewsIE(InfoExtractor): r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL') full_video_url = compat_urlparse.urljoin(url, video_url) - youtube_url = self._html_search_regex( - r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"', - webpage, 'YouTube URL', default=None) + youtube_url = YoutubeIE._extract_url(webpage) timestamp = None date_str = self._html_search_regex( @@ -140,7 +139,7 @@ class AbcNewsIE(InfoExtractor): } if youtube_url: - entries = [entry, self.url_result(youtube_url, 'Youtube')] + entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())] return self.playlist_result(entries) return entry diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py index 0206d96..d4769da 100644 --- a/youtube_dl/extractor/chilloutzone.py +++ b/youtube_dl/extractor/chilloutzone.py @@ -5,6 +5,7 @@ import base64 import json from .common import InfoExtractor +from .youtube import YoutubeIE from ..utils import ( clean_html, ExtractorError @@ -70,11 +71,9 @@ class ChilloutzoneIE(InfoExtractor): # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed) if native_platform is None: - youtube_url = self._html_search_regex( - r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', - webpage, 'fallback video URL', default=None) - if youtube_url is not None: - return self.url_result(youtube_url, ie='Youtube') + youtube_url = YoutubeIE._extract_url(webpage) + if youtube_url: + return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or # the own CDN diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py index 94d03ce..f77a68e 100644 --- a/youtube_dl/extractor/cracked.py +++ b/youtube_dl/extractor/cracked.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from .youtube import YoutubeIE from ..utils import ( parse_iso8601, str_to_int, @@ -41,11 +42,9 @@ class CrackedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - youtube_url = self._search_regex( - r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"', - webpage, 'youtube url', default=None) + youtube_url = YoutubeIE._extract_url(webpage) if youtube_url: - return self.url_result(youtube_url, 'Youtube') + return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) video_url = self._html_search_regex( [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 54e207b..b8b8bf9 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -7,6 +7,7 @@ import hashlib import json from .adobepass import AdobePassIE +from .youtube import YoutubeIE from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( @@ -261,11 +262,9 @@ class ViceArticleIE(InfoExtractor): if embed_code: return _url_res('ooyala:%s' % embed_code, 'Ooyala') - youtube_url = self._html_search_regex( - r'<iframe[^>]+src="(.*youtube\.com/.*)"', - body, 'YouTube URL', default=None) + youtube_url = YoutubeIE._extract_url(body) if youtube_url: - return _url_res(youtube_url, 'Youtube') + return _url_res(youtube_url, YoutubeIE.ie_key()) video_url = self._html_search_regex( r'data-video-url="([^"]+)"', diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index dc2719c..105e172 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -25,6 +25,7 @@ from ..utils import ( from .dailymotion import DailymotionIE from .pladform import PladformIE from .vimeo import VimeoIE +from .youtube import YoutubeIE class VKBaseIE(InfoExtractor): @@ -345,11 +346,9 @@ class VKIE(VKBaseIE): if re.search(error_re, info_page): raise ExtractorError(error_msg % video_id, expected=True) - youtube_url = self._search_regex( - r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"', - info_page, 'youtube iframe', default=None) + youtube_url = YoutubeIE._extract_url(info_page) if youtube_url: - return self.url_result(youtube_url, 'Youtube') + return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) vimeo_url = VimeoIE._extract_url(url, info_page) if vimeo_url is not None: -- cgit v1.1 From 931edb2ada89db2bf3596ce1ad5c4d808914c7ab Mon Sep 17 00:00:00 2001 From: Olivier Bilodeau <olivier@bottomlesspit.org> Date: Fri, 8 Sep 2017 10:53:24 -0400 Subject: [radiocanada] Add fallback for title extraction --- youtube_dl/extractor/radiocanada.py | 45 +++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 6bbc278..b952e59 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -20,20 +20,37 @@ from ..utils import ( class RadioCanadaIE(InfoExtractor): IE_NAME = 'radiocanada' _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' - _TEST = { - 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', - 'info_dict': { - 'id': '7184272', - 'ext': 'mp4', - 'title': 'Le parcours du tireur capté sur vidéo', - 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', - 'upload_date': '20141023', - }, - 'params': { - # m3u8 download - 'skip_download': True, + _TESTS = [ + { + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', + 'info_dict': { + 'id': '7184272', + 'ext': 'mp4', + 'title': 'Le parcours du tireur capté sur vidéo', + 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', + 'upload_date': '20141023', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } }, - } + { + # empty Title + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/', + 'info_dict': { + 'id': '7754998', + 'ext': 'mp4', + 'title': 'letelejournal22h', + 'description': 'INTEGRALE WEB 22H-TJ', + 'upload_date': '20170720', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + ] def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -145,7 +162,7 @@ class RadioCanadaIE(InfoExtractor): return { 'id': video_id, - 'title': get_meta('Title'), + 'title': get_meta('Title') or get_meta('AV-nomEmission'), 'description': get_meta('Description') or get_meta('ShortDescription'), 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), 'duration': int_or_none(get_meta('length')), -- cgit v1.1 From 51aee72d16eb844377a44c12e50dbb95cd4ced27 Mon Sep 17 00:00:00 2001 From: kayb94 <30302445+kayb94@users.noreply.github.com> Date: Fri, 8 Sep 2017 15:13:17 +0000 Subject: [README.md] Clarify how to run extractor specific test cases --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6f5d00d..28ee63f 100644 --- a/README.md +++ b/README.md @@ -936,6 +936,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file python test/test_download.py nosetests +See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. + If you want to create a build of youtube-dl yourself, you'll need * python @@ -1003,7 +1005,7 @@ After you have ensured this site is distributing its content legally, you can fo } ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). -6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. +6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: -- cgit v1.1 From debed8d759e74507371758d2344ce5afe5e237c2 Mon Sep 17 00:00:00 2001 From: luceatnobis <wehrmeyer.martin@web.de> Date: Tue, 4 Jul 2017 11:26:02 +0200 Subject: [rutube:playlist] Add extractor (closes #13534) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rutube.py | 84 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 46a11f3..aefadc5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -899,6 +899,7 @@ from .rutube import ( RutubeEmbedIE, RutubeMovieIE, RutubePersonIE, + RutubePlaylistIE, ) from .rutv import RUTVIE from .ruutu import RuutuIE diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 889fa76..a6b17c0 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -7,10 +7,14 @@ import itertools from .common import InfoExtractor from ..compat import ( compat_str, + compat_parse_qs, + compat_urllib_parse_urlparse, ) from ..utils import ( determine_ext, unified_strdate, + try_get, + int_or_none, ) @@ -42,8 +46,24 @@ class RutubeIE(InfoExtractor): }, { 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', 'only_matching': True, + }, { + 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', + 'only_matching': True, }] + @classmethod + def suitable(cls, url): + parts = compat_urllib_parse_urlparse(url) + params = compat_parse_qs(parts.query) + + # see if URL without parameters is OK + res = super(RutubeIE, cls).suitable(url) + + if params: # we only allow pl_id parameter in the url + res = res and 'pl_id' in params and len(params) == 1 + + return res + @staticmethod def _extract_urls(webpage): return [mobj.group('url') for mobj in re.finditer( @@ -193,3 +213,67 @@ class RutubePersonIE(RutubeChannelIE): }] _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' + + +class RutubePlaylistIE(InfoExtractor): + IE_NAME = 'rutube:playlist' + IE_DESC = 'Rutube playlists' + _TESTS = [{ + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', + 'info_dict': { + 'id': '4252', + }, + 'playlist_count': 25, + }] + + _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?(?:.+)?pl_id=(?P<id>\d+)' + _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/source/%s/?page=%s' + + @staticmethod + def suitable(url): + params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + return params.get('pl_id') and int_or_none(params['pl_id'][0]) \ + and params.get('pl_type') + + def _real_extract(self, url): + playlist_id = self._match_id(url) + return self._extract_playlist(playlist_id) + + def _extract_playlist(self, playlist_id): + entries = [] + for pagenum in itertools.count(1): + page_url = self._PAGE_TEMPLATE % (playlist_id, pagenum) + + # download_json will sent an accept: application/xml header + page = self._download_json(page_url, playlist_id, + "Downloading metadata for page %s" % pagenum, + headers={'Accept': 'application/json'}) + + if not page['results']: + break + + results = page['results'] + for result in results: + entry = self.url_result(result.get('video_url'), 'Rutube') + category = try_get(result, lambda x: x['category']['name']) + entry.update({ + 'id': result.get('id'), + 'uploader': try_get(result, lambda x: x['author']['name']), + 'uploader_id': try_get(result, lambda x: x['author']['id']), + 'upload_date': unified_strdate(result.get('created_ts')), + 'title': result.get('title'), + 'description': result.get('description'), + 'thumbnail': result.get('thumbnail_url'), + 'duration': int_or_none(result.get('duration')), + 'category': [category] if category else None, + 'age_limit': 18 if result.get('is_adult') else 0, + 'view_count': int_or_none(result.get('hits')), + 'is_live': result.get('is_livestream'), + 'webpage_url': result.get('video_url'), + }) + entries.append(entry) + + if page['has_next'] is False: + break + + return self.playlist_result(entries, playlist_id, page['name']) -- cgit v1.1 From 48b813748d91acc7e9efc15075079a03faea18ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Sep 2017 18:39:13 +0700 Subject: [rutube] Rework and generalize playlist extractors (closes #13565) --- youtube_dl/extractor/rutube.py | 216 ++++++++++++++++++++--------------------- 1 file changed, 108 insertions(+), 108 deletions(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index a6b17c0..5a18487 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -12,34 +12,60 @@ from ..compat import ( ) from ..utils import ( determine_ext, - unified_strdate, + unified_timestamp, try_get, int_or_none, ) -class RutubeIE(InfoExtractor): +class RutubeBaseIE(InfoExtractor): + def _extract_video(self, video, video_id=None, require_title=True): + title = video['title'] if require_title else video.get('title') + + age_limit = video.get('is_adult') + if age_limit is not None: + age_limit = 18 if age_limit is True else 0 + + uploader_id = try_get(video, lambda x: x['author']['id']) + category = try_get(video, lambda x: x['category']['name']) + + return { + 'id': video.get('id') or video_id, + 'title': title, + 'description': video.get('description'), + 'thumbnail': video.get('thumbnail_url'), + 'duration': int_or_none(video.get('duration')), + 'uploader': try_get(video, lambda x: x['author']['name']), + 'uploader_id': compat_str(uploader_id) if uploader_id else None, + 'timestamp': unified_timestamp(video.get('created_ts')), + 'category': [category] if category else None, + 'age_limit': age_limit, + 'view_count': int_or_none(video.get('hits')), + 'comment_count': int_or_none(video.get('comments_count')), + 'is_live': video.get('is_livestream'), + } + + +class RutubeIE(RutubeBaseIE): IE_NAME = 'rutube' IE_DESC = 'Rutube videos' _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})' _TESTS = [{ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', + 'md5': '79938ade01294ef7e27574890d0d3769', 'info_dict': { 'id': '3eac3b4561676c17df9132a9a1e62e3e', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Раненный кенгуру забежал в аптеку', 'description': 'http://www.ntdtv.ru ', 'duration': 80, 'uploader': 'NTDRussian', 'uploader_id': '29790', + 'timestamp': 1381943602, 'upload_date': '20131016', 'age_limit': 0, }, - 'params': { - # It requires ffmpeg (m3u8 download) - 'skip_download': True, - }, }, { 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 'only_matching': True, @@ -49,20 +75,14 @@ class RutubeIE(InfoExtractor): }, { 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', 'only_matching': True, + }, { + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', + 'only_matching': True, }] @classmethod def suitable(cls, url): - parts = compat_urllib_parse_urlparse(url) - params = compat_parse_qs(parts.query) - - # see if URL without parameters is OK - res = super(RutubeIE, cls).suitable(url) - - if params: # we only allow pl_id parameter in the url - res = res and 'pl_id' in params and len(params) == 1 - - return res + return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url) @staticmethod def _extract_urls(webpage): @@ -72,12 +92,12 @@ class RutubeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + video = self._download_json( 'http://rutube.ru/api/video/%s/?format=json' % video_id, video_id, 'Downloading video JSON') - # Some videos don't have the author field - author = video.get('author') or {} + info = self._extract_video(video, video_id) options = self._download_json( 'http://rutube.ru/api/play/options/%s/?format=json' % video_id, @@ -99,19 +119,8 @@ class RutubeIE(InfoExtractor): }) self._sort_formats(formats) - return { - 'id': video['id'], - 'title': video['title'], - 'description': video['description'], - 'duration': video['duration'], - 'view_count': video['hits'], - 'formats': formats, - 'thumbnail': video['thumbnail_url'], - 'uploader': author.get('name'), - 'uploader_id': compat_str(author['id']) if author else None, - 'upload_date': unified_strdate(video['created_ts']), - 'age_limit': 18 if video['is_adult'] else 0, - } + info['formats'] = formats + return info class RutubeEmbedIE(InfoExtractor): @@ -123,7 +132,8 @@ class RutubeEmbedIE(InfoExtractor): 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'info_dict': { 'id': 'a10e53b86e8f349080f718582ce4c661', - 'ext': 'mp4', + 'ext': 'flv', + 'timestamp': 1387830582, 'upload_date': '20131223', 'uploader_id': '297833', 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', @@ -131,7 +141,7 @@ class RutubeEmbedIE(InfoExtractor): 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', }, 'params': { - 'skip_download': 'Requires ffmpeg', + 'skip_download': True, }, }, { 'url': 'http://rutube.ru/play/embed/8083783', @@ -145,10 +155,51 @@ class RutubeEmbedIE(InfoExtractor): canonical_url = self._html_search_regex( r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage, 'Canonical URL') - return self.url_result(canonical_url, 'Rutube') + return self.url_result(canonical_url, RutubeIE.ie_key()) -class RutubeChannelIE(InfoExtractor): +class RutubePlaylistBaseIE(RutubeBaseIE): + def _next_page_url(self, page_num, playlist_id, *args, **kwargs): + return self._PAGE_TEMPLATE % (playlist_id, page_num) + + def _entries(self, playlist_id, *args, **kwargs): + next_page_url = None + for pagenum in itertools.count(1): + page = self._download_json( + next_page_url or self._next_page_url( + pagenum, playlist_id, *args, **kwargs), + playlist_id, 'Downloading page %s' % pagenum) + + results = page.get('results') + if not results or not isinstance(results, list): + break + + for result in results: + video_url = result.get('video_url') + if not video_url or not isinstance(video_url, compat_str): + continue + entry = self._extract_video(result, require_title=False) + entry.update({ + '_type': 'url', + 'url': video_url, + 'ie_key': RutubeIE.ie_key(), + }) + yield entry + + next_page_url = page.get('next') + if not next_page_url or not page.get('has_next'): + break + + def _extract_playlist(self, playlist_id, *args, **kwargs): + return self.playlist_result( + self._entries(playlist_id, *args, **kwargs), + playlist_id, kwargs.get('playlist_name')) + + def _real_extract(self, url): + return self._extract_playlist(self._match_id(url)) + + +class RutubeChannelIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:channel' IE_DESC = 'Rutube channels' _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)' @@ -162,27 +213,8 @@ class RutubeChannelIE(InfoExtractor): _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' - def _extract_videos(self, channel_id, channel_title=None): - entries = [] - for pagenum in itertools.count(1): - page = self._download_json( - self._PAGE_TEMPLATE % (channel_id, pagenum), - channel_id, 'Downloading page %s' % pagenum) - results = page['results'] - if not results: - break - entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results) - if not page['has_next']: - break - return self.playlist_result(entries, channel_id, channel_title) - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') - return self._extract_videos(channel_id) - -class RutubeMovieIE(RutubeChannelIE): +class RutubeMovieIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:movie' IE_DESC = 'Rutube movies' _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)' @@ -196,11 +228,11 @@ class RutubeMovieIE(RutubeChannelIE): movie = self._download_json( self._MOVIE_TEMPLATE % movie_id, movie_id, 'Downloading movie JSON') - movie_name = movie['name'] - return self._extract_videos(movie_id, movie_name) + return self._extract_playlist( + movie_id, playlist_name=movie.get('name')) -class RutubePersonIE(RutubeChannelIE): +class RutubePersonIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:person' IE_DESC = 'Rutube person videos' _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)' @@ -215,65 +247,33 @@ class RutubePersonIE(RutubeChannelIE): _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' -class RutubePlaylistIE(InfoExtractor): +class RutubePlaylistIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:playlist' IE_DESC = 'Rutube playlists' + _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)' _TESTS = [{ - 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', + 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag', 'info_dict': { - 'id': '4252', + 'id': '3097', }, - 'playlist_count': 25, + 'playlist_count': 27, + }, { + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', + 'only_matching': True, }] - _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?(?:.+)?pl_id=(?P<id>\d+)' - _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/source/%s/?page=%s' + _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json' @staticmethod def suitable(url): params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - return params.get('pl_id') and int_or_none(params['pl_id'][0]) \ - and params.get('pl_type') - - def _real_extract(self, url): - playlist_id = self._match_id(url) - return self._extract_playlist(playlist_id) - - def _extract_playlist(self, playlist_id): - entries = [] - for pagenum in itertools.count(1): - page_url = self._PAGE_TEMPLATE % (playlist_id, pagenum) - - # download_json will sent an accept: application/xml header - page = self._download_json(page_url, playlist_id, - "Downloading metadata for page %s" % pagenum, - headers={'Accept': 'application/json'}) - - if not page['results']: - break + return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) - results = page['results'] - for result in results: - entry = self.url_result(result.get('video_url'), 'Rutube') - category = try_get(result, lambda x: x['category']['name']) - entry.update({ - 'id': result.get('id'), - 'uploader': try_get(result, lambda x: x['author']['name']), - 'uploader_id': try_get(result, lambda x: x['author']['id']), - 'upload_date': unified_strdate(result.get('created_ts')), - 'title': result.get('title'), - 'description': result.get('description'), - 'thumbnail': result.get('thumbnail_url'), - 'duration': int_or_none(result.get('duration')), - 'category': [category] if category else None, - 'age_limit': 18 if result.get('is_adult') else 0, - 'view_count': int_or_none(result.get('hits')), - 'is_live': result.get('is_livestream'), - 'webpage_url': result.get('video_url'), - }) - entries.append(entry) + def _next_page_url(self, page_num, playlist_id, item_kind): + return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num) - if page['has_next'] is False: - break - - return self.playlist_result(entries, playlist_id, page['name']) + def _real_extract(self, url): + qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + playlist_kind = qs['pl_type'][0] + playlist_id = qs['pl_id'][0] + return self._extract_playlist(playlist_id, item_kind=playlist_kind) -- cgit v1.1 From c7e327c4d46a9b72f3f707710194dccf6eee50d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Sep 2017 19:08:39 +0700 Subject: [utils] Introduce bool_or_none --- youtube_dl/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2554a2a..c42dd4c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1815,6 +1815,10 @@ def float_or_none(v, scale=1, invscale=1, default=None): return default +def bool_or_none(v, default=None): + return v if isinstance(v, bool) else default + + def strip_or_none(v): return None if v is None else v.strip() -- cgit v1.1 From c3dd44e08577c2ae0d08951037db5d1db7a321c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Sep 2017 19:09:27 +0700 Subject: [rutube] Use bool_or_none --- youtube_dl/extractor/rutube.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 5a18487..828c03b 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -12,9 +12,10 @@ from ..compat import ( ) from ..utils import ( determine_ext, - unified_timestamp, - try_get, + bool_or_none, int_or_none, + try_get, + unified_timestamp, ) @@ -42,7 +43,7 @@ class RutubeBaseIE(InfoExtractor): 'age_limit': age_limit, 'view_count': int_or_none(video.get('hits')), 'comment_count': int_or_none(video.get('comments_count')), - 'is_live': video.get('is_livestream'), + 'is_live': bool_or_none(video.get('is_livestream')), } -- cgit v1.1 From bf6ec2fea9087235c14df2a079620fcc2c17b5eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Sep 2017 22:08:32 +0700 Subject: [fox] Fix extraction (#14147) --- youtube_dl/extractor/fox.py | 125 +++++++++++++++++++++++++++++--------------- 1 file changed, 84 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 159fdf9..facc665 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -3,56 +3,99 @@ from __future__ import unicode_literals from .adobepass import AdobePassIE from ..utils import ( - smuggle_url, - update_url_query, + int_or_none, + parse_age_limit, + parse_duration, + try_get, + unified_timestamp, ) class FOXIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.fox.com/watch/255180355939/7684182528', + _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)' + _TESTS = [{ + # clip + 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/', 'md5': 'ebd296fcc41dd4b19f8115d8461a3165', 'info_dict': { - 'id': '255180355939', + 'id': '4b765a60490325103ea69888fb2bd4e8', 'ext': 'mp4', - 'title': 'Official Trailer: Gotham', - 'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.', - 'duration': 129, - 'timestamp': 1400020798, - 'upload_date': '20140513', - 'uploader': 'NEWA-FNG-FOXCOM', + 'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight', + 'description': 'md5:549cd9c70d413adb32ce2a779b53b486', + 'duration': 102, + 'timestamp': 1504291893, + 'upload_date': '20170901', + 'creator': 'FOX', + 'series': 'Gotham', }, - 'add_ie': ['ThePlatform'], - } + 'params': { + 'skip_download': True, + }, + }, { + # episode, geo-restricted + 'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/', + 'only_matching': True, + }, { + # episode, geo-restricted, tv provided required + 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - settings = self._parse_json(self._search_regex( - r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', - webpage, 'drupal settings'), video_id) - fox_pdk_player = settings['fox_pdk_player'] - release_url = fox_pdk_player['release_url'] - query = { - 'mbr': 'true', - 'switch': 'http' - } - if fox_pdk_player.get('access') == 'locked': - ap_p = settings['foxAdobePassProvider'] - rating = ap_p.get('videoRating') - if rating == 'n/a': - rating = None - resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating) - query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource) - - info = self._search_json_ld(webpage, video_id, fatal=False) - info.update({ - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), - 'id': video_id, - }) - return info + video = self._download_json( + 'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id, + video_id, headers={ + 'apikey': 'abdcbed02c124d393b39e818a4312055', + 'Content-Type': 'application/json', + 'Referer': url, + }) + + title = video['name'] + + m3u8_url = self._download_json( + video['videoRelease']['url'], video_id)['playURL'] + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + description = video.get('description') + duration = int_or_none(video.get('durationInSeconds')) or int_or_none( + video.get('duration')) or parse_duration(video.get('duration')) + timestamp = unified_timestamp(video.get('datePublished')) + age_limit = parse_age_limit(video.get('contentRating')) + + data = try_get( + video, lambda x: x['trackingData']['properties'], dict) or {} + + creator = data.get('brand') or data.get('network') or video.get('network') + + series = video.get('seriesName') or data.get( + 'seriesName') or data.get('show') + season_number = int_or_none(video.get('seasonNumber')) + episode = video.get('name') + episode_number = int_or_none(video.get('episodeNumber')) + release_year = int_or_none(video.get('releaseYear')) + + if data.get('authRequired'): + # TODO: AP + pass + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'age_limit': age_limit, + 'creator': creator, + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + 'release_year': release_year, + 'formats': formats, + } -- cgit v1.1 From b98339b54b1de517def970a955cbbdda3e1d4874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Sep 2017 22:15:55 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ChangeLog b/ChangeLog index c439c8e..86b36c3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +version <unreleased> + +Core ++ [utils] Introduce bool_or_none +* [YoutubeDL] Ensure dir existence for each requested format (#14116) + +Extractors +* [fox] Fix extraction (#14147) +* [rutube] Use bool_or_none +* [rutube] Rework and generalize playlist extractors (#13565) ++ [rutube:playlist] Add support for playlists (#13534, #13565) ++ [radiocanada] Add fallback for title extraction (#14145) +* [vk] Use dedicated YouTube embeds extraction routine +* [vice] Use dedicated YouTube embeds extraction routine +* [cracked] Use dedicated YouTube embeds extraction routine +* [chilloutzone] Use dedicated YouTube embeds extraction routine +* [abcnews] Use dedicated YouTube embeds extraction routine +* [youtube] Separate methods for embeds extraction +* [redtube] Fix formats extraction (#14122) +* [arte] Relax unavailability check (#14112) ++ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059) +* [vidme:user] Relax URL regular expression (#14054) +* [bpb] Fix extraction (#14043, #14086) +* [soundcloud] Fix download URL with private tracks (#14093) +* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707) +* [viidea] Capture and output lecture error message (#14099) +* [radiocanada] Skip unsupported platforms (#14100) + + version 2017.09.02 Extractors -- cgit v1.1 From 806498cf2f35cc98cf0e6c5b46f58ca357a842de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Sep 2017 22:16:55 +0700 Subject: release 2017.09.10 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 4 +++- ChangeLog | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index bd9e219..fb934d4 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.09.02 +[debug] youtube-dl version 2017.09.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a8091e7..333acee 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -82,6 +82,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file python test/test_download.py nosetests +See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. + If you want to create a build of youtube-dl yourself, you'll need * python @@ -149,7 +151,7 @@ After you have ensured this site is distributing its content legally, you can fo } ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). -6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. +6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: diff --git a/ChangeLog b/ChangeLog index 86b36c3..99667e5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.09.10 Core + [utils] Introduce bool_or_none diff --git a/docs/supportedsites.md b/docs/supportedsites.md index dbec6c8..798a81d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -38,6 +38,7 @@ - **afreecatv**: afreecatv.com - **afreecatv:global**: afreecatv.com - **AirMozilla** + - **AliExpressLive** - **AlJazeera** - **Allocine** - **AlphaPorno** @@ -437,6 +438,7 @@ - **MakerTV** - **mangomolo:live** - **mangomolo:video** + - **ManyVids** - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** @@ -701,6 +703,7 @@ - **rutube:embed**: Rutube embedded videos - **rutube:movie**: Rutube movies - **rutube:person**: Rutube person videos + - **rutube:playlist**: Rutube playlists - **RUTV**: RUTV.RU - **Ruutu** - **Ruv** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 60ed35d..736f753 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.09.02' +__version__ = '2017.09.10' -- cgit v1.1 From f12a6e88b2c2632b10c156eb94d91675327485f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Sep 2017 03:22:27 +0700 Subject: [rutube:playlist] Fix suitable (closes #14166) --- youtube_dl/extractor/rutube.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 828c03b..89d89b6 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -265,8 +265,10 @@ class RutubePlaylistIE(RutubePlaylistBaseIE): _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json' - @staticmethod - def suitable(url): + @classmethod + def suitable(cls, url): + if not super(RutubePlaylistIE, cls).suitable(url): + return False params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) -- cgit v1.1 From 43df248f10548e3c43f0f02584a360136f1129d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Sep 2017 03:27:43 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 99667e5..1892764 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [rutube:playlist] Fix suitable (#14166) + + version 2017.09.10 Core -- cgit v1.1 From 7dacceae75d3c513f442cfd20d778a31bb35d3d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Sep 2017 03:30:33 +0700 Subject: release 2017.09.11 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fb934d4..f40cb2c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.11*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.11** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.09.10 +[debug] youtube-dl version 2017.09.11 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 1892764..c286da6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.09.11 Extractors * [rutube:playlist] Fix suitable (#14166) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 736f753..cdcb32e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.09.10' +__version__ = '2017.09.11' -- cgit v1.1 From 2709d9fa28155f7abc84d3b57ce4491391d185ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Sep 2017 04:14:54 +0700 Subject: [animeondemand] Add support for flash videos (closes #9944) --- youtube_dl/extractor/animeondemand.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 9e28f25..c225307 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -46,6 +46,10 @@ class AnimeOnDemandIE(InfoExtractor): # Full length film, non-series, ger/jap, Dub/OmU, account required 'url': 'https://www.anime-on-demand.de/anime/185', 'only_matching': True, + }, { + # Flash videos + 'url': 'https://www.anime-on-demand.de/anime/12', + 'only_matching': True, }] def _login(self): @@ -120,10 +124,11 @@ class AnimeOnDemandIE(InfoExtractor): formats = [] for input_ in re.findall( - r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html): + r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html): attributes = extract_attributes(input_) + title = attributes.get('data-dialog-header') playlist_urls = [] - for playlist_key in ('data-playlist', 'data-otherplaylist'): + for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'): playlist_url = attributes.get(playlist_key) if isinstance(playlist_url, compat_str) and re.match( r'/?[\da-zA-Z]+', playlist_url): @@ -160,6 +165,23 @@ class AnimeOnDemandIE(InfoExtractor): fatal=False) if not playlist: continue + stream_url = playlist.get('streamurl') + if stream_url: + rtmp = re.search( + r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)', + stream_url) + if rtmp: + formats.append({ + 'url': rtmp.group('url'), + 'app': rtmp.group('app'), + 'play_path': rtmp.group('playpath'), + 'page_url': url, + 'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf', + 'rtmp_real_time': True, + 'format_id': 'rtmp', + 'ext': 'flv', + }) + continue start_video = playlist.get('startvideo', 0) playlist = playlist.get('playlist') if not playlist or not isinstance(playlist, list): -- cgit v1.1 From 018cc61549f417cf1e88af46ff68a17b75e62630 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Sep 2017 04:22:55 +0700 Subject: [animeondemand] Bypass geo restriction --- youtube_dl/extractor/animeondemand.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index c225307..2a1cd65 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -21,6 +21,8 @@ class AnimeOnDemandIE(InfoExtractor): _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in' _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply' _NETRC_MACHINE = 'animeondemand' + # German-speaking countries of Europe + _GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU'] _TESTS = [{ # jap, OmU 'url': 'https://www.anime-on-demand.de/anime/161', -- cgit v1.1 From 2f483758bc6a6661f1215c38161ee626d90ab655 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Sep 2017 04:32:35 +0700 Subject: [animeondemand] Improve and modernize --- youtube_dl/extractor/animeondemand.py | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 2a1cd65..69d3633 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -3,16 +3,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urlparse, - compat_str, -) +from ..compat import compat_str from ..utils import ( determine_ext, extract_attributes, ExtractorError, - sanitized_Request, urlencode_postdata, + urljoin, ) @@ -78,14 +75,13 @@ class AnimeOnDemandIE(InfoExtractor): 'post url', default=self._LOGIN_URL, group='url') if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) - - request = sanitized_Request( - post_url, urlencode_postdata(login_form)) - request.add_header('Referer', self._LOGIN_URL) + post_url = urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( - request, None, 'Logging in as %s' % username) + post_url, None, 'Logging in as %s' % username, + data=urlencode_postdata(login_form), headers={ + 'Referer': self._LOGIN_URL, + }) if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')): error = self._search_regex( @@ -154,17 +150,19 @@ class AnimeOnDemandIE(InfoExtractor): format_id_list.append(compat_str(num)) format_id = '-'.join(format_id_list) format_note = ', '.join(filter(None, (kind, lang_note))) - request = sanitized_Request( - compat_urlparse.urljoin(url, playlist_url), + item_id_list = [] + if format_id: + item_id_list.append(format_id) + item_id_list.append('videomaterial') + playlist = self._download_json( + urljoin(url, playlist_url), video_id, + 'Downloading %s JSON' % ' '.join(item_id_list), headers={ 'X-Requested-With': 'XMLHttpRequest', 'X-CSRF-Token': csrf_token, 'Referer': url, 'Accept': 'application/json, text/javascript, */*; q=0.01', - }) - playlist = self._download_json( - request, video_id, 'Downloading %s playlist JSON' % format_id, - fatal=False) + }, fatal=False) if not playlist: continue stream_url = playlist.get('streamurl') @@ -246,7 +244,7 @@ class AnimeOnDemandIE(InfoExtractor): f.update({ 'id': '%s-%s' % (f['id'], m.group('kind').lower()), 'title': m.group('title'), - 'url': compat_urlparse.urljoin(url, m.group('href')), + 'url': urljoin(url, m.group('href')), }) entries.append(f) -- cgit v1.1