]+data-container="([0-9]+)"', webpage, 'container ID') @@ -47,12 +74,6 @@ class HeiseIE(InfoExtractor): r'

]+data-sequenz="([0-9]+)"', webpage, 'sequenz ID') - title = self._html_search_meta('fulltitle', webpage, default=None) - if not title or title == "c't": - title = self._search_regex( - r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', - webpage, 'title') - doc = self._download_xml( 'http://www.heise.de/videout/feed', video_id, query={ 'container': container_id, -- cgit v1.1 From 8a1a60d17397721620e75d83f2aad3a353286f15 Mon Sep 17 00:00:00 2001 From: Kareem Moussa Date: Tue, 19 Sep 2017 08:51:20 -0700 Subject: [devscripts/check-porn] Fix gettestcases import --- devscripts/check-porn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py index 7a219eb..72b2ee4 100644 --- a/devscripts/check-porn.py +++ b/devscripts/check-porn.py @@ -14,7 +14,7 @@ import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import get_testcases +from test.helper import gettestcases from youtube_dl.utils import compat_urllib_parse_urlparse from youtube_dl.utils import compat_urllib_request @@ -24,7 +24,7 @@ if len(sys.argv) > 1: else: METHOD = 'EURISTIC' -for test in get_testcases(): +for test in gettestcases(): if METHOD == 'EURISTIC': try: webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() -- cgit v1.1 From dc76eef092dc10f7e3f599fa7d85a04de8d84b8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 19 Sep 2017 23:59:36 +0700 Subject: [tvplay] Bypass geo restriction --- youtube_dl/extractor/tvplay.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 99ff82a..46132ed 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -15,7 +15,9 @@ from ..utils import ( int_or_none, parse_iso8601, qualities, + smuggle_url, try_get, + unsmuggle_url, update_url_query, ) @@ -224,6 +226,9 @@ class TVPlayIE(InfoExtractor): ] def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass(smuggled_data.get('geo_countries')) + video_id = self._match_id(url) geo_country = self._search_regex( r'https?://[^/]+\.([a-z]{2})', url, @@ -426,4 +431,9 @@ class ViafreeIE(InfoExtractor): r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})', webpage, 'video id') - return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key()) + return self.url_result( + smuggle_url( + 'mtg:%s' % video_id, + {'geo_countries': [ + compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]]}), + ie=TVPlayIE.ie_key(), video_id=video_id) -- cgit v1.1 From 3b65a6fbf31256030ff35210a7be2b50369a6c4f Mon Sep 17 00:00:00 2001 From: capital-G Date: Tue, 19 Sep 2017 22:58:06 +0200 Subject: [twitter] Fix duration extraction --- youtube_dl/extractor/twitter.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 6eaf360..7399cf5 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -229,7 +229,7 @@ class TwitterCardIE(TwitterBaseIE): title = self._search_regex(r'([^<]+)', webpage, 'title') thumbnail = config.get('posterImageUrl') or config.get('image_src') - duration = float_or_none(config.get('duration')) or duration + duration = float_or_none(config.get('duration'), scale=1000) or duration return { 'id': video_id, @@ -255,6 +255,7 @@ class TwitterIE(InfoExtractor): 'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"', 'uploader': 'FREE THE NIPPLE', 'uploader_id': 'freethenipple', + 'duration': 12.922, }, 'params': { 'skip_download': True, # requires ffmpeg @@ -305,11 +306,12 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'Donte - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'Donte on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'あかさ - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'あかさ on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'Donte', + 'uploader': 'あかさ', 'uploader_id': 'jaydingeer', + 'duration': 30.0, }, 'params': { 'skip_download': True, # requires ffmpeg @@ -337,6 +339,7 @@ class TwitterIE(InfoExtractor): 'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"', 'uploader_id': 'captainamerica', 'uploader': 'Captain America', + 'duration': 3.17, }, 'params': { 'skip_download': True, # requires ffmpeg @@ -364,6 +367,7 @@ class TwitterIE(InfoExtractor): 'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"', 'uploader': 'عالم الأخبار', 'uploader_id': 'news_al3alm', + 'duration': 277.4, }, 'params': { 'format': 'best[format_id^=http-]', -- cgit v1.1 From 12ea5c79fb0bfa878d62d130cf67057fc230dfa7 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Wed, 20 Sep 2017 14:53:06 -0500 Subject: [nbcsports:vplayer] Correct theplatform URL (closes #13873) --- youtube_dl/extractor/nbc.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 62db70b..836a41f 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -109,10 +109,10 @@ class NBCSportsVPlayerIE(InfoExtractor): _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P[0-9a-zA-Z_]+)' _TESTS = [{ - 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', + 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI', 'info_dict': { 'id': '9CsDKds0kvHI', - 'ext': 'flv', + 'ext': 'mp4', 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', 'timestamp': 1426270238, @@ -120,7 +120,7 @@ class NBCSportsVPlayerIE(InfoExtractor): 'uploader': 'NBCU-SPORTS', } }, { - 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z', + 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z', 'only_matching': True, }] @@ -134,7 +134,8 @@ class NBCSportsVPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - theplatform_url = self._og_search_video_url(webpage) + theplatform_url = self._og_search_video_url(webpage).replace( + 'vplayer.nbcsports.com', 'player.theplatform.com') return self.url_result(theplatform_url, 'ThePlatform') -- cgit v1.1 From f6ff52b473c9ed969fadb3e3d50852c4a27ba17e Mon Sep 17 00:00:00 2001 From: Giuseppe Fabiano Date: Wed, 20 Sep 2017 23:05:33 +0200 Subject: [beeg] Fix extraction (closes #14275) --- youtube_dl/extractor/beeg.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index d5c5822..bbeae4b 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -9,6 +9,7 @@ from ..compat import ( from ..utils import ( int_or_none, parse_iso8601, + urljoin, ) @@ -36,9 +37,11 @@ class BeegIE(InfoExtractor): webpage = self._download_webpage(url, video_id) cpl_url = self._search_regex( - r']+src=(["\'])(?P(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1', + r']+src=(["\'])(?P(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1', webpage, 'cpl', default=None, group='url') + cpl_url = urljoin(url, cpl_url) + beeg_version, beeg_salt = [None] * 2 if cpl_url: @@ -54,7 +57,7 @@ class BeegIE(InfoExtractor): r'beeg_salt\s*=\s*(["\'])(?P.+?)\1', cpl, 'beeg salt', default=None, group='beeg_salt') - beeg_version = beeg_version or '2000' + beeg_version = beeg_version or '2185' beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H' video = self._download_json( -- cgit v1.1 From 8c6919e4331e1cd44f50e700e8fc4e630d913a2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Sep 2017 23:00:35 +0700 Subject: [lynda] Add support for educourse.ga (closes #14286) --- youtube_dl/extractor/lynda.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index d2f7529..1b6f509 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -94,7 +94,7 @@ class LyndaBaseIE(InfoExtractor): class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' IE_DESC = 'lynda.com videos' - _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/(?P\d+)|player/embed)/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?(?:lynda\.com|educourse\.ga)/(?:[^/]+/[^/]+/(?P\d+)|player/embed)/(?P\d+)' _TIMECODE_REGEX = r'\[(?P\d+:\d+:\d+[\.,]\d+)\]' @@ -110,6 +110,9 @@ class LyndaIE(LyndaBaseIE): }, { 'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0', 'only_matching': True, + }, { + 'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', + 'only_matching': True, }] def _raise_unavailable(self, video_id): @@ -253,7 +256,7 @@ class LyndaCourseIE(LyndaBaseIE): # Course link equals to welcome/introduction video link of same course # We will recognize it as course link - _VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P[^/]+/[^/]+/(?P\d+))-\d\.html' + _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P[^/]+/[^/]+/(?P\d+))-\d\.html' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) -- cgit v1.1 From 8c2895305dc09920055611c8120f5a65fcd2614f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 23 Sep 2017 02:30:03 +0800 Subject: [options] Accept lrc as a subtitle conversion target format (closes #14292) --- ChangeLog | 1 + youtube_dl/__init__.py | 2 +- youtube_dl/options.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index ba9260e..42ba879 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core ++ [options] Accept lrc as a subtitle conversion target format (#14292) * [utils] Fix handling raw TTML subtitles (#14191) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c458941..ba684a0 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -206,7 +206,7 @@ def _real_main(argv=None): if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']: parser.error('invalid video recode format specified') if opts.convertsubtitles is not None: - if opts.convertsubtitles not in ['srt', 'vtt', 'ass']: + if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']: parser.error('invalid subtitle format specified') if opts.date is not None: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 38439c9..4c04550 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -847,7 +847,7 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--convert-subs', '--convert-subtitles', metavar='FORMAT', dest='convertsubtitles', default=None, - help='Convert the subtitles to other format (currently supported: srt|ass|vtt)') + help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)') parser.add_option_group(general) parser.add_option_group(network) -- cgit v1.1 From 2384f5a64e501d7abb844e8d31fe340b34d8d4e7 Mon Sep 17 00:00:00 2001 From: Tatsuyuki Ishi Date: Wed, 6 Sep 2017 11:24:34 +0900 Subject: [mixcloud] Fix extraction (closes #14088) --- youtube_dl/compat.py | 10 ++- youtube_dl/extractor/mixcloud.py | 167 ++++++++++++++++++++++++--------------- 2 files changed, 114 insertions(+), 63 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 9e4e13b..2a62248 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -6,6 +6,7 @@ import collections import email import getpass import io +import itertools import optparse import os import re @@ -15,7 +16,6 @@ import socket import struct import subprocess import sys -import itertools import xml.etree.ElementTree @@ -2898,6 +2898,13 @@ else: compat_struct_pack = struct.pack compat_struct_unpack = struct.unpack +try: + from future_builtins import zip as compat_zip +except ImportError: # not 2.6+ or is 3.x + try: + from itertools import izip as compat_zip # < 2.5 or 3.x + except ImportError: + compat_zip = zip __all__ = [ 'compat_HTMLParseError', @@ -2948,5 +2955,6 @@ __all__ = [ 'compat_urlretrieve', 'compat_xml_parse_error', 'compat_xpath', + 'compat_zip', 'workaround_optparse_bug9161', ] diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index f6360cc..4811823 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -9,16 +9,16 @@ from .common import InfoExtractor from ..compat import ( compat_chr, compat_ord, - compat_str, compat_urllib_parse_unquote, compat_urlparse, + compat_zip ) from ..utils import ( clean_html, ExtractorError, OnDemandPagedList, str_to_int, -) + try_get) class MixcloudIE(InfoExtractor): @@ -54,27 +54,19 @@ class MixcloudIE(InfoExtractor): 'only_matching': True, }] - _keys = [ - 'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };', - 'pleasedontdownloadourmusictheartistswontgetpaid', - 'window.addEventListener = window.addEventListener || function() {};', - '(function() { return new Date().toLocaleDateString(); })()' - ] - _current_key = None - - # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js - def _decrypt_play_info(self, play_info, video_id): - play_info = base64.b64decode(play_info.encode('ascii')) - for num, key in enumerate(self._keys, start=1): - try: - return self._parse_json( - ''.join([ - compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)])) - for idx, ch in enumerate(play_info)]), - video_id) - except ExtractorError: - if num == len(self._keys): - raise + @staticmethod + def _decrypt_xor_cipher(key, ciphertext): + """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR.""" + return ''.join([ + compat_chr(compat_ord(ch) ^ compat_ord(k)) + for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) + + @staticmethod + def _decrypt_and_extend(stream_info, url_key, getter, key, formats): + maybe_url = stream_info.get(url_key) + if maybe_url is not None: + decrypted = MixcloudIE._decrypt_xor_cipher(key, base64.b64decode(maybe_url)) + formats.extend(getter(decrypted)) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -84,54 +76,105 @@ class MixcloudIE(InfoExtractor): webpage = self._download_webpage(url, track_id) - if not self._current_key: - js_url = self._search_regex( - r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)', - webpage, 'js url', default=None) - if js_url: - js = self._download_webpage(js_url, track_id, fatal=False) - if js: - KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' - for key_name in ('value', 'key_value', 'key_value.*?', '.*?value.*?'): - key = self._search_regex( - KEY_RE_TEMPLATE % key_name, js, 'key', - default=None, group='key') - if key and isinstance(key, compat_str): - self._keys.insert(0, key) - self._current_key = key + # Legacy path + encrypted_play_info = self._search_regex( + r'm-play-info="([^"]+)"', webpage, 'play info', default=None) + + if encrypted_play_info is not None: + # Decode + encrypted_play_info = base64.b64decode(encrypted_play_info) + else: + # New path + full_info_json = self._parse_json(self._html_search_regex( + r'', webpage, 'play info'), 'play info') + for item in full_info_json: + item_data = try_get(item, lambda x: x['cloudcast']['data']['cloudcastLookup']) + if try_get(item_data, lambda x: x['streamInfo']['url']): + info_json = item_data + break + else: + raise ExtractorError('Failed to extract matching stream info') message = self._html_search_regex( r'(?s)]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', webpage, 'error message', default=None) - encrypted_play_info = self._search_regex( - r'm-play-info="([^"]+)"', webpage, 'play info') - - play_info = self._decrypt_play_info(encrypted_play_info, track_id) - - if message and 'stream_url' not in play_info: - raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) - - song_url = play_info['stream_url'] - - title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title') - thumbnail = self._proto_relative_url(self._html_search_regex( - r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False)) - uploader = self._html_search_regex( - r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False) - uploader_id = self._search_regex( - r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) - description = self._og_search_description(webpage) - view_count = str_to_int(self._search_regex( - [r'([0-9,.]+)', - r'(?:m|data)-tooltip=["\']([\d,.]+) plays'], - webpage, 'play count', default=None)) + js_url = self._search_regex( + r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)', + webpage, 'js url', default=None) + if js_url is None: + js_url = self._search_regex( + r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js/www\.[^>]+\.js)', + webpage, 'js url') + js = self._download_webpage(js_url, track_id) + # Known plaintext attack + if encrypted_play_info: + kps = ['{"stream_url":'] + kpa_target = encrypted_play_info + else: + kps = ['https://', 'http://'] + kpa_target = base64.b64decode(info_json['streamInfo']['url']) + for kp in kps: + partial_key = self._decrypt_xor_cipher(kpa_target, kp) + for quote in ["'", '"']: + key = self._search_regex(r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)), js, + "encryption key", default=None) + if key is not None: + break + else: + continue + break + else: + raise ExtractorError('Failed to extract encryption key') + + if encrypted_play_info is not None: + play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info') + if message and 'stream_url' not in play_info: + raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) + song_url = play_info['stream_url'] + formats = [{ + 'format_id': 'normal', + 'url': song_url + }] + + title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title') + thumbnail = self._proto_relative_url(self._html_search_regex( + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False)) + uploader = self._html_search_regex( + r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False) + uploader_id = self._search_regex( + r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) + description = self._og_search_description(webpage) + view_count = str_to_int(self._search_regex( + [r'([0-9,.]+)', + r'(?:m|data)-tooltip=["\']([\d,.]+) plays'], + webpage, 'play count', default=None)) + + else: + title = info_json['name'] + thumbnail = try_get(info_json, + lambda x: 'https://thumbnailer.mixcloud.com/unsafe/600x600/' + x['picture']['urlRoot']) + uploader = try_get(info_json, lambda x: x['owner']['displayName']) + uploader_id = try_get(info_json, lambda x: x['owner']['username']) + description = try_get(info_json, lambda x: x['description']) + view_count = try_get(info_json, lambda x: x['plays']) + + stream_info = info_json['streamInfo'] + formats = [] + self._decrypt_and_extend(stream_info, 'url', lambda x: [{ + 'format_id': 'normal', + 'url': x + }], key, formats) + self._decrypt_and_extend(stream_info, 'hlsUrl', lambda x: self._extract_m3u8_formats(x, title), key, + formats) + self._decrypt_and_extend(stream_info, 'dashUrl', lambda x: self._extract_mpd_formats(x, title), key, + formats) return { 'id': track_id, 'title': title, - 'url': song_url, + 'formats': formats, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, -- cgit v1.1 From 095774e59130c999ed8ce132f80a7164c5ee39a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 05:35:55 +0700 Subject: [mixcloud] Improve and simplify (closes #14132) --- youtube_dl/extractor/mixcloud.py | 71 +++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 4811823..f331db8 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -9,6 +9,7 @@ from .common import InfoExtractor from ..compat import ( compat_chr, compat_ord, + compat_str, compat_urllib_parse_unquote, compat_urlparse, compat_zip @@ -16,9 +17,12 @@ from ..compat import ( from ..utils import ( clean_html, ExtractorError, + int_or_none, OnDemandPagedList, str_to_int, - try_get) + try_get, + urljoin, +) class MixcloudIE(InfoExtractor): @@ -61,13 +65,6 @@ class MixcloudIE(InfoExtractor): compat_chr(compat_ord(ch) ^ compat_ord(k)) for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) - @staticmethod - def _decrypt_and_extend(stream_info, url_key, getter, key, formats): - maybe_url = stream_info.get(url_key) - if maybe_url is not None: - decrypted = MixcloudIE._decrypt_xor_cipher(key, base64.b64decode(maybe_url)) - formats.extend(getter(decrypted)) - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) uploader = mobj.group(1) @@ -86,9 +83,12 @@ class MixcloudIE(InfoExtractor): else: # New path full_info_json = self._parse_json(self._html_search_regex( - r'', webpage, 'play info'), 'play info') + r'', + webpage, 'play info'), 'play info') for item in full_info_json: - item_data = try_get(item, lambda x: x['cloudcast']['data']['cloudcastLookup']) + item_data = try_get( + item, lambda x: x['cloudcast']['data']['cloudcastLookup'], + dict) if try_get(item_data, lambda x: x['streamInfo']['url']): info_json = item_data break @@ -100,13 +100,9 @@ class MixcloudIE(InfoExtractor): webpage, 'error message', default=None) js_url = self._search_regex( - r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)', - webpage, 'js url', default=None) - if js_url is None: - js_url = self._search_regex( - r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js/www\.[^>]+\.js)', - webpage, 'js url') - js = self._download_webpage(js_url, track_id) + r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)', + webpage, 'js url') + js = self._download_webpage(js_url, track_id, 'Downloading JS') # Known plaintext attack if encrypted_play_info: kps = ['{"stream_url":'] @@ -117,8 +113,9 @@ class MixcloudIE(InfoExtractor): for kp in kps: partial_key = self._decrypt_xor_cipher(kpa_target, kp) for quote in ["'", '"']: - key = self._search_regex(r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)), js, - "encryption key", default=None) + key = self._search_regex( + r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)), + js, 'encryption key', default=None) if key is not None: break else: @@ -153,23 +150,37 @@ class MixcloudIE(InfoExtractor): else: title = info_json['name'] - thumbnail = try_get(info_json, - lambda x: 'https://thumbnailer.mixcloud.com/unsafe/600x600/' + x['picture']['urlRoot']) + thumbnail = urljoin( + 'https://thumbnailer.mixcloud.com/unsafe/600x600/', + try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str)) uploader = try_get(info_json, lambda x: x['owner']['displayName']) uploader_id = try_get(info_json, lambda x: x['owner']['username']) description = try_get(info_json, lambda x: x['description']) - view_count = try_get(info_json, lambda x: x['plays']) + view_count = int_or_none(try_get(info_json, lambda x: x['plays'])) stream_info = info_json['streamInfo'] formats = [] - self._decrypt_and_extend(stream_info, 'url', lambda x: [{ - 'format_id': 'normal', - 'url': x - }], key, formats) - self._decrypt_and_extend(stream_info, 'hlsUrl', lambda x: self._extract_m3u8_formats(x, title), key, - formats) - self._decrypt_and_extend(stream_info, 'dashUrl', lambda x: self._extract_mpd_formats(x, title), key, - formats) + + for url_key in ('url', 'hlsUrl', 'dashUrl'): + format_url = stream_info.get(url_key) + if not format_url: + continue + decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url)) + if not decrypted: + continue + if url_key == 'hlsUrl': + formats.extend(self._extract_m3u8_formats( + decrypted, track_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif url_key == 'dashUrl': + formats.extend(self._extract_mpd_formats( + decrypted, track_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'format_id': 'http', + 'url': decrypted, + }) + self._sort_formats(formats) return { 'id': track_id, -- cgit v1.1 From 9ce1ac404648142139f8b231c674d434ad4f9ffe Mon Sep 17 00:00:00 2001 From: kayb94 <30302445+kayb94@users.noreply.github.com> Date: Fri, 22 Sep 2017 22:49:48 +0000 Subject: [generic] Fix support for multiple HTML5 videos on one page (closes #14080) --- youtube_dl/extractor/generic.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b83c183..7d0edf0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1879,6 +1879,15 @@ class GenericIE(InfoExtractor): 'title': 'Building A Business Online: Principal Chairs Q & A', }, }, + { + # multiple HTML5 videos on one page + 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html', + 'info_dict': { + 'id': 'keyscenarios', + 'title': 'Rescue Kit 14 Free Edition - Getting started', + }, + 'playlist_count': 4, + } # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2849,13 +2858,20 @@ class GenericIE(InfoExtractor): # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: - for entry in entries: - entry.update({ + if len(entries) == 1: + entries[0].update({ 'id': video_id, 'title': video_title, }) + else: + for num, entry in enumerate(entries, start=1): + entry.update({ + 'id': '%s-%s' % (video_id, num), + 'title': '%s (%d)' % (video_title, num), + }) + for entry in entries: self._sort_formats(entry['formats']) - return self.playlist_result(entries) + return self.playlist_result(entries, video_id, video_title) jwplayer_data = self._find_jwplayer_data( webpage, video_id, transform_source=js_to_json) -- cgit v1.1 From 13de91c9e92bd831fee38fddbdabce7f6e82ef91 Mon Sep 17 00:00:00 2001 From: Dan Weber Date: Tue, 12 Sep 2017 22:52:54 -0400 Subject: [americastestkitchen] Add extractor (closes #10764) --- youtube_dl/extractor/americastestkitchen.py | 85 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 86 insertions(+) create mode 100755 youtube_dl/extractor/americastestkitchen.py diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py new file mode 100755 index 0000000..f231e7f --- /dev/null +++ b/youtube_dl/extractor/americastestkitchen.py @@ -0,0 +1,85 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class AmericasTestKitchenIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/episode/(?P\d+)' + _TESTS = [{ + 'url': + 'https://www.americastestkitchen.com/episode/548-summer-dinner-party', + 'md5': 'b861c3e365ac38ad319cfd509c30577f', + 'info_dict': { + 'id': '1_5g5zua6e', + 'title': 'atk_s17_e24.mp4', + 'ext': 'mp4', + 'description': '

Host Julia Collin Davison goes into the test kitchen with test cook Dan Souza to learn how to make the ultimate Grill-Roasted Beef Tenderloin. Next, equipment expert Adam Ried reviews gas grills in the Equipment Corner. Then, gadget guru Lisa McManus uncovers the best quirky gadgets. Finally, test cook Erin McMurrer shows host Bridget Lancaster how to make an elegant Pear-Walnut Upside-Down Cake.

', + 'timestamp': 1497285541, + 'upload_date': '20170612', + 'uploader_id': 'roger.metcalf@americastestkitchen.com', + 'release_date': '2017-06-17', + 'thumbnail': 'http://d3cizcpymoenau.cloudfront.net/images/35973/e24-tenderloin-16.jpg', + 'episode_number': 24, + 'episode': 'Summer Dinner Party', + 'episode_id': '548-summer-dinner-party', + 'season_number': 17 + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': + 'https://www.americastestkitchen.com/episode/546-a-spanish-affair', + 'only_matching': + True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + partner_id = self._search_regex( + r'partner_id/(?P\d+)', + webpage, + 'partner_id', + group='partner_id') + + video_data = self._parse_json( + self._search_regex( + r'window\.__INITIAL_STATE__\s*=\s*({.+?});\s*', + webpage, 'initial context'), + video_id) + + episode_data = video_data['episodeDetail']['content']['data'] + episode_content_meta = episode_data['full_video'] + external_id = episode_content_meta['external_id'] + + # photo data + photo_data = episode_content_meta.get('photo') + thumbnail = photo_data.get('image_url') if photo_data else None + + # meta + release_date = episode_data.get('aired_at') + description = episode_content_meta.get('description') + episode_number = int(episode_content_meta.get('episode_number')) + episode = episode_content_meta.get('title') + episode_id = episode_content_meta.get('episode_slug') + season_number = int(episode_content_meta.get('season_number')) + + return { + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, external_id), + 'ie_key': 'Kaltura', + 'id': video_id, + 'release_date': release_date, + 'thumbnail': thumbnail, + 'description': description, + 'episode_number': episode_number, + 'episode': episode, + 'episode_id': episode_id, + 'season_number': season_number + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ab95c85..5853005 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -39,6 +39,7 @@ from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE from .amcnetworks import AMCNetworksIE +from .americastestkitchen import AmericasTestKitchenIE from .animeondemand import AnimeOnDemandIE from .anitube import AnitubeIE from .anvato import AnvatoIE -- cgit v1.1 From 4bb58fa118a8c75b2ecf05f7b29a0ae27eef6239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 06:28:46 +0700 Subject: [americastestkitchen] Improve (closes #13996) --- youtube_dl/extractor/americastestkitchen.py | 82 ++++++++++++++--------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py index f231e7f..0173687 100755 --- a/youtube_dl/extractor/americastestkitchen.py +++ b/youtube_dl/extractor/americastestkitchen.py @@ -1,85 +1,85 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + try_get, + unified_strdate, +) class AmericasTestKitchenIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/episode/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P\d+)' _TESTS = [{ - 'url': - 'https://www.americastestkitchen.com/episode/548-summer-dinner-party', + 'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party', 'md5': 'b861c3e365ac38ad319cfd509c30577f', 'info_dict': { 'id': '1_5g5zua6e', - 'title': 'atk_s17_e24.mp4', + 'title': 'Summer Dinner Party', 'ext': 'mp4', - 'description': '

', + 'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1497285541, 'upload_date': '20170612', 'uploader_id': 'roger.metcalf@americastestkitchen.com', - 'release_date': '2017-06-17', - 'thumbnail': 'http://d3cizcpymoenau.cloudfront.net/images/35973/e24-tenderloin-16.jpg', - 'episode_number': 24, + 'release_date': '20170617', + 'series': "America's Test Kitchen", + 'season_number': 17, 'episode': 'Summer Dinner Party', - 'episode_id': '548-summer-dinner-party', - 'season_number': 17 + 'episode_number': 24, }, 'params': { - # m3u8 download 'skip_download': True, }, }, { - 'url': - 'https://www.americastestkitchen.com/episode/546-a-spanish-affair', - 'only_matching': - True, + 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) partner_id = self._search_regex( - r'partner_id/(?P\d+)', - webpage, - 'partner_id', - group='partner_id') + r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', + webpage, 'kaltura partner id') video_data = self._parse_json( self._search_regex( - r'window\.__INITIAL_STATE__\s*=\s*({.+?});\s*', + r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*', webpage, 'initial context'), video_id) - episode_data = video_data['episodeDetail']['content']['data'] - episode_content_meta = episode_data['full_video'] - external_id = episode_content_meta['external_id'] + ep_data = try_get( + video_data, + (lambda x: x['episodeDetail']['content']['data'], + lambda x: x['videoDetail']['content']['data']), dict) + ep_meta = ep_data.get('full_video', {}) + external_id = ep_data.get('external_id') or ep_meta['external_id'] - # photo data - photo_data = episode_content_meta.get('photo') - thumbnail = photo_data.get('image_url') if photo_data else None + title = ep_data.get('title') or ep_meta.get('title') + description = clean_html(ep_meta.get('episode_description') or ep_data.get( + 'description') or ep_meta.get('description')) + thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url']) + release_date = unified_strdate(ep_data.get('aired_at')) - # meta - release_date = episode_data.get('aired_at') - description = episode_content_meta.get('description') - episode_number = int(episode_content_meta.get('episode_number')) - episode = episode_content_meta.get('title') - episode_id = episode_content_meta.get('episode_slug') - season_number = int(episode_content_meta.get('season_number')) + season_number = int_or_none(ep_meta.get('season_number')) + episode = ep_meta.get('title') + episode_number = int_or_none(ep_meta.get('episode_number')) return { '_type': 'url_transparent', 'url': 'kaltura:%s:%s' % (partner_id, external_id), 'ie_key': 'Kaltura', - 'id': video_id, - 'release_date': release_date, - 'thumbnail': thumbnail, + 'title': title, 'description': description, - 'episode_number': episode_number, + 'thumbnail': thumbnail, + 'release_date': release_date, + 'series': "America's Test Kitchen", + 'season_number': season_number, 'episode': episode, - 'episode_id': episode_id, - 'season_number': season_number + 'episode_number': episode_number, } -- cgit v1.1 From 5c1452e8f1e744db14be1baef840e9f531e8f144 Mon Sep 17 00:00:00 2001 From: Giuseppe Fabiano Date: Sat, 23 Sep 2017 01:38:09 +0200 Subject: [twitter] Add support for user_id-less URLs (closes #14270) --- youtube_dl/extractor/twitter.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 7399cf5..0df3ad7 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -242,8 +242,9 @@ class TwitterCardIE(TwitterBaseIE): class TwitterIE(InfoExtractor): IE_NAME = 'twitter' - _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P[^/]+)/status/(?P\d+)' + _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P[^/]+))/status/(?P\d+)' _TEMPLATE_URL = 'https://twitter.com/%s/status/%s' + _TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s' _TESTS = [{ 'url': 'https://twitter.com/freethenipple/status/643211948184596480', @@ -322,9 +323,9 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': 'MIOxnrUteUd', 'ext': 'mp4', - 'title': 'FilmDrunk - Vine of the day', - 'description': 'FilmDrunk on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"', - 'uploader': 'FilmDrunk', + 'title': 'Vince Mancini - Vine of the day', + 'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"', + 'uploader': 'Vince Mancini', 'uploader_id': 'Filmdrunk', 'timestamp': 1402826626, 'upload_date': '20140615', @@ -372,6 +373,21 @@ class TwitterIE(InfoExtractor): 'params': { 'format': 'best[format_id^=http-]', }, + }, { + 'url': 'https://twitter.com/i/web/status/910031516746514432', + 'info_dict': { + 'id': '910031516746514432', + 'ext': 'mp4', + 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.', + 'thumbnail': r're:^https?://.*\.jpg', + 'description': 'Préfet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo"', + 'uploader': 'Préfet de Guadeloupe', + 'uploader_id': 'Prefet971', + 'duration': 47.48, + }, + 'params': { + 'skip_download': True, # requires ffmpeg + }, }] def _real_extract(self, url): @@ -380,11 +396,15 @@ class TwitterIE(InfoExtractor): twid = mobj.group('id') webpage, urlh = self._download_webpage_handle( - self._TEMPLATE_URL % (user_id, twid), twid) + self._TEMPLATE_STATUSES_URL % twid, twid) if 'twitter.com/account/suspended' in urlh.geturl(): raise ExtractorError('Account suspended by Twitter.', expected=True) + if user_id is None: + mobj = re.match(self._VALID_URL, urlh.geturl()) + user_id = mobj.group('user_id') + username = remove_end(self._og_search_title(webpage), ' on Twitter') title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”') -- cgit v1.1 From 1c22d7a7f30917abfd2b7495f7bd02d51cb8528a Mon Sep 17 00:00:00 2001 From: Namnamseo <0201ssw+github@gmail.com> Date: Thu, 24 Aug 2017 11:32:24 +0900 Subject: [kakao] Add extractor (closes #12298) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/kakao.py | 140 +++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 youtube_dl/extractor/kakao.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5853005..4232a4f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -483,6 +483,7 @@ from .jove import JoveIE from .joj import JojIE from .jwplatform import JWPlatformIE from .jpopsukitv import JpopsukiIE +from .kakao import KakaoIE from .kaltura import KalturaIE from .kamcord import KamcordIE from .kanalplay import KanalPlayIE diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py new file mode 100644 index 0000000..0caa41e --- /dev/null +++ b/youtube_dl/extractor/kakao.py @@ -0,0 +1,140 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + compat_str, + unified_timestamp, +) + + +class KakaoIE(InfoExtractor): + _VALID_URL = r'https?://tv.kakao.com/channel/(?P\d+)/cliplink/(?P\d+)' + IE_NAME = 'kakao.com' + + _TESTS = [{ + 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', + 'md5': '702b2fbdeb51ad82f5c904e8c0766340', + 'info_dict': { + 'id': '301965083', + 'ext': 'mp4', + 'title': '乃木坂46 バナナマン「3期生紹介コーナーが始動！顔高低差GPも！」『乃木坂工事中』', + 'uploader_id': 2671005, + 'uploader': '그랑그랑이', + 'timestamp': 1488160199, + 'upload_date': '20170227', + } + }, { + 'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180', + 'md5': 'a8917742069a4dd442516b86e7d66529', + 'info_dict': { + 'id': '300103180', + 'ext': 'mp4', + 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', + 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', + 'uploader_id': 2653210, + 'uploader': '쇼 음악중심', + 'timestamp': 1485684628, + 'upload_date': '20170129', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + player_url = 'http://tv.kakao.com/embed/player/cliplink/' + video_id + \ + '?service=kakao_tv&autoplay=1&profile=HIGH&wmode=transparent' + player_header = {'Referer': player_url} + + impress = self._download_json( + 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/impress' % video_id, + video_id, 'Downloading video info', + query={ + 'player': 'monet_html5', + 'referer': url, + 'uuid': '', + 'service': 'kakao_tv', + 'section': '', + 'dteType': 'PC', + 'fields': 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' + }, headers=player_header) + + clipLink = impress['clipLink'] + clip = clipLink['clip'] + + video_info = { + 'id': video_id, + 'title': clip['title'], + 'description': clip.get('description'), + 'uploader': clipLink.get('channel', {}).get('name'), + 'uploader_id': clipLink.get('channelId'), + 'duration': int_or_none(clip.get('duration')), + 'view_count': int_or_none(clip.get('playCount')), + 'like_count': int_or_none(clip.get('likeCount')), + 'comment_count': int_or_none(clip.get('commentCount')), + } + + tid = impress.get('tid', '') + raw = self._download_json( + 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw' % video_id, + video_id, 'Downloading video formats info', + query={ + 'player': 'monet_html5', + 'referer': url, + 'uuid': '', + 'service': 'kakao_tv', + 'section': '', + 'tid': tid, + 'profile': 'HIGH', + 'dteType': 'PC', + }, headers=player_header, fatal=False) + + formats = [] + for fmt in raw.get('outputList', []): + try: + profile_name = fmt['profile'] + fmt_url_json = self._download_json( + 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw/videolocation' % video_id, + video_id, 'Downloading video URL for profile %s' % profile_name, + query={ + 'service': 'kakao_tv', + 'section': '', + 'tid': tid, + 'profile': profile_name + }, headers=player_header, fatal=False) + + if fmt_url_json is None: + continue + + fmt_url = fmt_url_json['url'] + formats.append({ + 'url': fmt_url, + 'format_id': profile_name, + 'width': int_or_none(fmt.get('width')), + 'height': int_or_none(fmt.get('height')), + 'format_note': fmt.get('label'), + 'filesize': int_or_none(fmt.get('filesize')) + }) + except KeyError: + pass + + self._sort_formats(formats) + video_info['formats'] = formats + + top_thumbnail = clip.get('thumbnailUrl') + thumbs = [] + for thumb in clip.get('clipChapterThumbnailList', []): + thumbs.append({ + 'url': thumb.get('thumbnailUrl'), + 'id': compat_str(thumb.get('timeInSec')), + 'preference': -1 if thumb.get('isDefault') else 0 + }) + video_info['thumbnail'] = top_thumbnail + video_info['thumbnails'] = thumbs + + upload_date = unified_timestamp(clipLink.get('createTime')) + video_info['timestamp'] = upload_date + + return video_info -- cgit v1.1 From f70ddd4aebbfb0bdf2f63c1eba5b5614d2cfb70f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 07:25:15 +0700 Subject: [kakao] Improve (closes #14007) --- youtube_dl/extractor/kakao.py | 113 +++++++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 52 deletions(-) diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py index 0caa41e..c9b438e 100644 --- a/youtube_dl/extractor/kakao.py +++ b/youtube_dl/extractor/kakao.py @@ -3,16 +3,17 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( int_or_none, - compat_str, unified_timestamp, + update_url_query, ) class KakaoIE(InfoExtractor): _VALID_URL = r'https?://tv.kakao.com/channel/(?P\d+)/cliplink/(?P\d+)' - IE_NAME = 'kakao.com' + _API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks' _TESTS = [{ 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', @@ -44,60 +45,57 @@ class KakaoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - player_url = 'http://tv.kakao.com/embed/player/cliplink/' + video_id + \ - '?service=kakao_tv&autoplay=1&profile=HIGH&wmode=transparent' - player_header = {'Referer': player_url} + player_header = { + 'Referer': update_url_query( + 'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, { + 'service': 'kakao_tv', + 'autoplay': '1', + 'profile': 'HIGH', + 'wmode': 'transparent', + }) + } + + QUERY_COMMON = { + 'player': 'monet_html5', + 'referer': url, + 'uuid': '', + 'service': 'kakao_tv', + 'section': '', + 'dteType': 'PC', + } + query = QUERY_COMMON.copy() + query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' impress = self._download_json( - 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/impress' % video_id, + '%s/%s/impress' % (self._API_BASE, video_id), video_id, 'Downloading video info', - query={ - 'player': 'monet_html5', - 'referer': url, - 'uuid': '', - 'service': 'kakao_tv', - 'section': '', - 'dteType': 'PC', - 'fields': 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' - }, headers=player_header) - - clipLink = impress['clipLink'] - clip = clipLink['clip'] - - video_info = { - 'id': video_id, - 'title': clip['title'], - 'description': clip.get('description'), - 'uploader': clipLink.get('channel', {}).get('name'), - 'uploader_id': clipLink.get('channelId'), - 'duration': int_or_none(clip.get('duration')), - 'view_count': int_or_none(clip.get('playCount')), - 'like_count': int_or_none(clip.get('likeCount')), - 'comment_count': int_or_none(clip.get('commentCount')), - } + query=query, headers=player_header) + + clip_link = impress['clipLink'] + clip = clip_link['clip'] + + title = clip.get('title') or clip_link.get('displayTitle') tid = impress.get('tid', '') + + query = QUERY_COMMON.copy() + query.update({ + 'tid': tid, + 'profile': 'HIGH', + }) raw = self._download_json( - 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw' % video_id, + '%s/%s/raw' % (self._API_BASE, video_id), video_id, 'Downloading video formats info', - query={ - 'player': 'monet_html5', - 'referer': url, - 'uuid': '', - 'service': 'kakao_tv', - 'section': '', - 'tid': tid, - 'profile': 'HIGH', - 'dteType': 'PC', - }, headers=player_header, fatal=False) + query=query, headers=player_header) formats = [] for fmt in raw.get('outputList', []): try: profile_name = fmt['profile'] fmt_url_json = self._download_json( - 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw/videolocation' % video_id, - video_id, 'Downloading video URL for profile %s' % profile_name, + '%s/%s/raw/videolocation' % (self._API_BASE, video_id), + video_id, + 'Downloading video URL for profile %s' % profile_name, query={ 'service': 'kakao_tv', 'section': '', @@ -119,11 +117,8 @@ class KakaoIE(InfoExtractor): }) except KeyError: pass - self._sort_formats(formats) - video_info['formats'] = formats - top_thumbnail = clip.get('thumbnailUrl') thumbs = [] for thumb in clip.get('clipChapterThumbnailList', []): thumbs.append({ @@ -131,10 +126,24 @@ class KakaoIE(InfoExtractor): 'id': compat_str(thumb.get('timeInSec')), 'preference': -1 if thumb.get('isDefault') else 0 }) - video_info['thumbnail'] = top_thumbnail - video_info['thumbnails'] = thumbs - - upload_date = unified_timestamp(clipLink.get('createTime')) - video_info['timestamp'] = upload_date + top_thumbnail = clip.get('thumbnailUrl') + if top_thumbnail: + thumbs.append({ + 'url': top_thumbnail, + 'preference': 10, + }) - return video_info + return { + 'id': video_id, + 'title': title, + 'description': clip.get('description'), + 'uploader': clip_link.get('channel', {}).get('name'), + 'uploader_id': clip_link.get('channelId'), + 'thumbnails': thumbs, + 'timestamp': unified_timestamp(clip_link.get('createTime')), + 'duration': int_or_none(clip.get('duration')), + 'view_count': int_or_none(clip.get('playCount')), + 'like_count': int_or_none(clip.get('likeCount')), + 'comment_count': int_or_none(clip.get('commentCount')), + 'formats': formats, + } -- cgit v1.1 From 7f4921b38d10c17fe354bab20b741b362c5ae0aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 07:26:40 +0700 Subject: [heise] PEP 8 --- youtube_dl/extractor/heise.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 495ffb7..82e11a7 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -60,8 +60,8 @@ class HeiseIE(InfoExtractor): title = self._html_search_meta('fulltitle', webpage, default=None) if not title or title == "c't": title = self._search_regex( - r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', - webpage, 'title') + r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', + webpage, 'title') yt_urls = YoutubeIE._extract_urls(webpage) if yt_urls: -- cgit v1.1 From 136507b39a2b48cb775249e9724eeeedb56baed2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 07:41:22 +0700 Subject: [24video] Add support for 24video.adult (closes #14295) --- youtube_dl/extractor/twentyfourvideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 7af1165..cc51ca0 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -14,7 +14,7 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?P(?:www\.)?24video\.(?:net|me|xxx|sex|tube))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P\d+)' + _VALID_URL = r'https?://(?P(?:www\.)?24video\.(?:net|me|xxx|sex|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P\d+)' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', -- cgit v1.1 From e3440d824a7326d0ba609d8f0896203208ecc558 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 07:42:17 +0700 Subject: [24video] Fix timestamp extraction and make non fatal (#14295) --- youtube_dl/extractor/twentyfourvideo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index cc51ca0..96e0b96 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -60,8 +60,8 @@ class TwentyFourVideoIE(InfoExtractor): duration = int_or_none(self._og_search_property( 'duration', webpage, 'duration', fatal=False)) timestamp = parse_iso8601(self._search_regex( - r'', - webpage, 'upload date')) + r']+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"', + webpage, 'upload date', fatal=False)) uploader = self._html_search_regex( r'class="video-uploaded"[^>]*>\s*]*>([^<]+)', @@ -72,7 +72,7 @@ class TwentyFourVideoIE(InfoExtractor): webpage, 'view count', fatal=False)) comment_count = int_or_none(self._html_search_regex( r']+href="#tab-comments"[^>]*>(\d+) комментари', - webpage, 'comment count', fatal=False)) + webpage, 'comment count', default=None)) # Sets some cookies self._download_xml( -- cgit v1.1 From 47b5dfb047ce78dfb4ca7091efb4f7d0a1af3545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 23:14:41 +0700 Subject: Credit @luboss for joj (#13268) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 478c787..c2dd506 100644 --- a/AUTHORS +++ b/AUTHORS @@ -224,3 +224,4 @@ Giuseppe Fabiano Örn Guðjónsson Parmjit Virk Genki Sky +Ľuboš Katrinec -- cgit v1.1 From 07d1344c85781c911d0c1b1f6aa1804cd69f37d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 23:16:27 +0700 Subject: Credit @coreynicholson for vlive:playlist (#13613) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index c2dd506..cff6600 100644 --- a/AUTHORS +++ b/AUTHORS @@ -225,3 +225,4 @@ Giuseppe Fabiano Parmjit Virk Genki Sky Ľuboš Katrinec +Corey Nicholson -- cgit v1.1 From 16f54d0751553c09a512a69f56920235a186d4bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 23:20:20 +0700 Subject: Credit @codeasashu for voot (#11814) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index cff6600..295b784 100644 --- a/AUTHORS +++ b/AUTHORS @@ -226,3 +226,4 @@ Parmjit Virk Genki Sky Ľuboš Katrinec Corey Nicholson +Ashutosh Chaudhary -- cgit v1.1 From aab20aabfc8098a667b0f1bd820fe85ab2653b8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 23:23:27 +0700 Subject: Credit @jdong92 for voot (#14059) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 295b784..147c3e9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -227,3 +227,4 @@ Genki Sky Ľuboš Katrinec Corey Nicholson Ashutosh Chaudhary +John Dong -- cgit v1.1 From 2f3933aa1e33d7ec24a8701f102a309029a31768 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 23:26:35 +0700 Subject: Credit @ishitatsuyuki for mixcloud fix (#14132) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 147c3e9..d1f3bba 100644 --- a/AUTHORS +++ b/AUTHORS @@ -228,3 +228,4 @@ Genki Sky Corey Nicholson Ashutosh Chaudhary John Dong +Tatsuyuki Ishi -- cgit v1.1 From a87d7b4953d46e75e62fe25a5cc5e689297127ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 23:27:28 +0700 Subject: Credit @nbppp2 for americastestkitchen (#13996) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index d1f3bba..1d40c86 100644 --- a/AUTHORS +++ b/AUTHORS @@ -229,3 +229,4 @@ Corey Nicholson Ashutosh Chaudhary John Dong Tatsuyuki Ishi +Daniel Weber -- cgit v1.1 From 4c54b89e0310abd6be643604dc0f1ec3a608b68d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 24 Sep 2017 00:08:27 +0700 Subject: Hide experimental phantomjs wrapper --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/extractor/openload.py | 228 ++++++++++++++++++++++++++++++++++++++- youtube_dl/utils.py | 214 ------------------------------------ 3 files changed, 228 insertions(+), 216 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bfb4ff2..0a7f36c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -89,10 +89,10 @@ from .utils import ( write_string, YoutubeDLCookieProcessor, YoutubeDLHandler, - PhantomJSwrapper, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER +from .extractor.openload import PhantomJSwrapper from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 292476e..da1ef02 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,17 +1,243 @@ # coding: utf-8 from __future__ import unicode_literals +import json +import os import re +import subprocess +import tempfile from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( + check_executable, determine_ext, + encodeArgument, ExtractorError, get_element_by_id, - PhantomJSwrapper, + get_exe_version, + is_outdated_version, + std_headers, ) +def cookie_to_dict(cookie): + cookie_dict = { + 'name': cookie.name, + 'value': cookie.value, + } + if cookie.port_specified: + cookie_dict['port'] = cookie.port + if cookie.domain_specified: + cookie_dict['domain'] = cookie.domain + if cookie.path_specified: + cookie_dict['path'] = cookie.path + if cookie.expires is not None: + cookie_dict['expires'] = cookie.expires + if cookie.secure is not None: + cookie_dict['secure'] = cookie.secure + if cookie.discard is not None: + cookie_dict['discard'] = cookie.discard + try: + if (cookie.has_nonstandard_attr('httpOnly') or + cookie.has_nonstandard_attr('httponly') or + cookie.has_nonstandard_attr('HttpOnly')): + cookie_dict['httponly'] = True + except TypeError: + pass + return cookie_dict + + +def cookie_jar_to_list(cookie_jar): + return [cookie_to_dict(cookie) for cookie in cookie_jar] + + +class PhantomJSwrapper(object): + """PhantomJS wrapper class + + This class is experimental. + """ + + _TEMPLATE = r''' + phantom.onError = function(msg, trace) {{ + var msgStack = ['PHANTOM ERROR: ' + msg]; + if(trace && trace.length) {{ + msgStack.push('TRACE:'); + trace.forEach(function(t) {{ + msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + + (t.function ? ' (in function ' + t.function +')' : '')); + }}); + }} + console.error(msgStack.join('\n')); + phantom.exit(1); + }}; + var page = require('webpage').create(); + var fs = require('fs'); + var read = {{ mode: 'r', charset: 'utf-8' }}; + var write = {{ mode: 'w', charset: 'utf-8' }}; + JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{ + phantom.addCookie(x); + }}); + page.settings.resourceTimeout = {timeout}; + page.settings.userAgent = "{ua}"; + page.onLoadStarted = function() {{ + page.evaluate(function() {{ + delete window._phantom; + delete window.callPhantom; + }}); + }}; + var saveAndExit = function() {{ + fs.write("{html}", page.content, write); + fs.write("{cookies}", JSON.stringify(phantom.cookies), write); + phantom.exit(); + }}; + page.onLoadFinished = function(status) {{ + if(page.url === "") {{ + page.setContent(fs.read("{html}", read), "{url}"); + }} + else {{ + {jscode} + }} + }}; + page.open(""); + ''' + + _TMP_FILE_NAMES = ['script', 'html', 'cookies'] + + @staticmethod + def _version(): + return get_exe_version('phantomjs', version_re=r'([0-9.]+)') + + def __init__(self, extractor, required_version=None, timeout=10000): + self.exe = check_executable('phantomjs', ['-v']) + if not self.exe: + raise ExtractorError('PhantomJS executable not found in PATH, ' + 'download it from http://phantomjs.org', + expected=True) + + self.extractor = extractor + + if required_version: + version = self._version() + if is_outdated_version(version, required_version): + self.extractor._downloader.report_warning( + 'Your copy of PhantomJS is outdated, update it to version ' + '%s or newer if you encounter any errors.' % required_version) + + self.options = { + 'timeout': timeout, + } + self._TMP_FILES = {} + for name in self._TMP_FILE_NAMES: + tmp = tempfile.NamedTemporaryFile(delete=False) + tmp.close() + self._TMP_FILES[name] = tmp + + def __del__(self): + for name in self._TMP_FILE_NAMES: + try: + os.remove(self._TMP_FILES[name].name) + except: + pass + + def _save_cookies(self, url): + cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar) + for cookie in cookies: + if 'path' not in cookie: + cookie['path'] = '/' + if 'domain' not in cookie: + cookie['domain'] = compat_urlparse.urlparse(url).netloc + with open(self._TMP_FILES['cookies'].name, 'wb') as f: + f.write(json.dumps(cookies).encode('utf-8')) + + def _load_cookies(self): + with open(self._TMP_FILES['cookies'].name, 'rb') as f: + cookies = json.loads(f.read().decode('utf-8')) + for cookie in cookies: + if cookie['httponly'] is True: + cookie['rest'] = {'httpOnly': None} + if 'expiry' in cookie: + cookie['expire_time'] = cookie['expiry'] + self.extractor._set_cookie(**cookie) + + def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): + """ + Downloads webpage (if needed) and executes JS + + Params: + url: website url + html: optional, html code of website + video_id: video id + note: optional, displayed when downloading webpage + note2: optional, displayed when executing JS + headers: custom http headers + jscode: code to be executed when page is loaded + + Returns tuple with: + * downloaded website (after JS execution) + * anything you print with `console.log` (but not inside `page.execute`!) + + In most cases you don't need to add any `jscode`. + It is executed in `page.onLoadFinished`. + `saveAndExit();` is mandatory, use it instead of `phantom.exit()` + It is possible to wait for some element on the webpage, for example: + var check = function() { + var elementFound = page.evaluate(function() { + return document.querySelector('#b.done') !== null; + }); + if(elementFound) + saveAndExit(); + else + window.setTimeout(check, 500); + } + + page.evaluate(function(){ + document.querySelector('#a').click(); + }); + check(); + """ + if 'saveAndExit();' not in jscode: + raise ExtractorError('`saveAndExit();` not found in `jscode`') + if not html: + html = self.extractor._download_webpage(url, video_id, note=note, headers=headers) + with open(self._TMP_FILES['html'].name, 'wb') as f: + f.write(html.encode('utf-8')) + + self._save_cookies(url) + + replaces = self.options + replaces['url'] = url + user_agent = headers.get('User-Agent') or std_headers['User-Agent'] + replaces['ua'] = user_agent.replace('"', '\\"') + replaces['jscode'] = jscode + + for x in self._TMP_FILE_NAMES: + replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') + + with open(self._TMP_FILES['script'].name, 'wb') as f: + f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) + + if video_id is None: + self.extractor.to_screen('%s' % (note2,)) + else: + self.extractor.to_screen('%s: %s' % (video_id, note2)) + + p = subprocess.Popen([ + self.exe, '--ssl-protocol=any', + self._TMP_FILES['script'].name + ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate() + if p.returncode != 0: + raise ExtractorError( + 'Executing JS failed\n:' + encodeArgument(err)) + with open(self._TMP_FILES['html'].name, 'rb') as f: + html = f.read().decode('utf-8') + + self._load_cookies() + + return (html, encodeArgument(out)) + + class OpenloadIE(InfoExtractor): _VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index acc4f98..92b22e6 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3826,220 +3826,6 @@ def write_xattr(path, key, value): "or the 'xattr' binary.") -def cookie_to_dict(cookie): - cookie_dict = { - 'name': cookie.name, - 'value': cookie.value, - } - if cookie.port_specified: - cookie_dict['port'] = cookie.port - if cookie.domain_specified: - cookie_dict['domain'] = cookie.domain - if cookie.path_specified: - cookie_dict['path'] = cookie.path - if cookie.expires is not None: - cookie_dict['expires'] = cookie.expires - if cookie.secure is not None: - cookie_dict['secure'] = cookie.secure - if cookie.discard is not None: - cookie_dict['discard'] = cookie.discard - try: - if (cookie.has_nonstandard_attr('httpOnly') or - cookie.has_nonstandard_attr('httponly') or - cookie.has_nonstandard_attr('HttpOnly')): - cookie_dict['httponly'] = True - except TypeError: - pass - return cookie_dict - - -def cookie_jar_to_list(cookie_jar): - return [cookie_to_dict(cookie) for cookie in cookie_jar] - - -class PhantomJSwrapper(object): - """PhantomJS wrapper class""" - - _TEMPLATE = r''' - phantom.onError = function(msg, trace) {{ - var msgStack = ['PHANTOM ERROR: ' + msg]; - if(trace && trace.length) {{ - msgStack.push('TRACE:'); - trace.forEach(function(t) {{ - msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line - + (t.function ? ' (in function ' + t.function +')' : '')); - }}); - }} - console.error(msgStack.join('\n')); - phantom.exit(1); - }}; - var page = require('webpage').create(); - var fs = require('fs'); - var read = {{ mode: 'r', charset: 'utf-8' }}; - var write = {{ mode: 'w', charset: 'utf-8' }}; - JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{ - phantom.addCookie(x); - }}); - page.settings.resourceTimeout = {timeout}; - page.settings.userAgent = "{ua}"; - page.onLoadStarted = function() {{ - page.evaluate(function() {{ - delete window._phantom; - delete window.callPhantom; - }}); - }}; - var saveAndExit = function() {{ - fs.write("{html}", page.content, write); - fs.write("{cookies}", JSON.stringify(phantom.cookies), write); - phantom.exit(); - }}; - page.onLoadFinished = function(status) {{ - if(page.url === "") {{ - page.setContent(fs.read("{html}", read), "{url}"); - }} - else {{ - {jscode} - }} - }}; - page.open(""); - ''' - - _TMP_FILE_NAMES = ['script', 'html', 'cookies'] - - @staticmethod - def _version(): - return get_exe_version('phantomjs', version_re=r'([0-9.]+)') - - def __init__(self, extractor, required_version=None, timeout=10000): - self.exe = check_executable('phantomjs', ['-v']) - if not self.exe: - raise ExtractorError('PhantomJS executable not found in PATH, ' - 'download it from http://phantomjs.org', - expected=True) - - self.extractor = extractor - - if required_version: - version = self._version() - if is_outdated_version(version, required_version): - self.extractor._downloader.report_warning( - 'Your copy of PhantomJS is outdated, update it to version ' - '%s or newer if you encounter any errors.' % required_version) - - self.options = { - 'timeout': timeout, - } - self._TMP_FILES = {} - for name in self._TMP_FILE_NAMES: - tmp = tempfile.NamedTemporaryFile(delete=False) - tmp.close() - self._TMP_FILES[name] = tmp - - def __del__(self): - for name in self._TMP_FILE_NAMES: - try: - os.remove(self._TMP_FILES[name].name) - except: - pass - - def _save_cookies(self, url): - cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar) - for cookie in cookies: - if 'path' not in cookie: - cookie['path'] = '/' - if 'domain' not in cookie: - cookie['domain'] = compat_urlparse.urlparse(url).netloc - with open(self._TMP_FILES['cookies'].name, 'wb') as f: - f.write(json.dumps(cookies).encode('utf-8')) - - def _load_cookies(self): - with open(self._TMP_FILES['cookies'].name, 'rb') as f: - cookies = json.loads(f.read().decode('utf-8')) - for cookie in cookies: - if cookie['httponly'] is True: - cookie['rest'] = {'httpOnly': None} - if 'expiry' in cookie: - cookie['expire_time'] = cookie['expiry'] - self.extractor._set_cookie(**cookie) - - def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): - """ - Downloads webpage (if needed) and executes JS - - Params: - url: website url - html: optional, html code of website - video_id: video id - note: optional, displayed when downloading webpage - note2: optional, displayed when executing JS - headers: custom http headers - jscode: code to be executed when page is loaded - - Returns tuple with: - * downloaded website (after JS execution) - * anything you print with `console.log` (but not inside `page.execute`!) - - In most cases you don't need to add any `jscode`. - It is executed in `page.onLoadFinished`. - `saveAndExit();` is mandatory, use it instead of `phantom.exit()` - It is possible to wait for some element on the webpage, for example: - var check = function() { - var elementFound = page.evaluate(function() { - return document.querySelector('#b.done') !== null; - }); - if(elementFound) - saveAndExit(); - else - window.setTimeout(check, 500); - } - - page.evaluate(function(){ - document.querySelector('#a').click(); - }); - check(); - """ - if 'saveAndExit();' not in jscode: - raise ExtractorError('`saveAndExit();` not found in `jscode`') - if not html: - html = self.extractor._download_webpage(url, video_id, note=note, headers=headers) - with open(self._TMP_FILES['html'].name, 'wb') as f: - f.write(html.encode('utf-8')) - - self._save_cookies(url) - - replaces = self.options - replaces['url'] = url - user_agent = headers.get('User-Agent') or std_headers['User-Agent'] - replaces['ua'] = user_agent.replace('"', '\\"') - replaces['jscode'] = jscode - - for x in self._TMP_FILE_NAMES: - replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') - - with open(self._TMP_FILES['script'].name, 'wb') as f: - f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) - - if video_id is None: - self.extractor.to_screen('%s' % (note2,)) - else: - self.extractor.to_screen('%s: %s' % (video_id, note2)) - - p = subprocess.Popen([ - self.exe, '--ssl-protocol=any', - self._TMP_FILES['script'].name - ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = p.communicate() - if p.returncode != 0: - raise ExtractorError( - 'Executing JS failed\n:' + encodeArgument(err)) - with open(self._TMP_FILES['html'].name, 'rb') as f: - html = f.read().decode('utf-8') - - self._load_cookies() - - return (html, encodeArgument(out)) - - def random_birthday(year_field, month_field, day_field): return { year_field: str(random.randint(1950, 1995)), -- cgit v1.1 From 011da618bdb907cbe02aacefba421dad4cf49a4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 24 Sep 2017 00:12:40 +0700 Subject: [openload] Fix _load_cookies for python 2.6 --- youtube_dl/extractor/openload.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index da1ef02..b50d6c7 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -8,7 +8,10 @@ import subprocess import tempfile from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_urlparse, + compat_kwargs, +) from ..utils import ( check_executable, determine_ext, @@ -158,7 +161,7 @@ class PhantomJSwrapper(object): cookie['rest'] = {'httpOnly': None} if 'expiry' in cookie: cookie['expire_time'] = cookie['expiry'] - self.extractor._set_cookie(**cookie) + self.extractor._set_cookie(**compat_kwargs(cookie)) def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): """ -- cgit v1.1 From 4d182955a2bbe95eb4b931f82204f402ffd5552d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 24 Sep 2017 00:19:27 +0700 Subject: [kakao] Fix _VALID_URL --- youtube_dl/extractor/kakao.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py index c9b438e..7fa140b 100644 --- a/youtube_dl/extractor/kakao.py +++ b/youtube_dl/extractor/kakao.py @@ -12,7 +12,7 @@ from ..utils import ( class KakaoIE(InfoExtractor): - _VALID_URL = r'https?://tv.kakao.com/channel/(?P\d+)/cliplink/(?P\d+)' + _VALID_URL = r'https?://tv\.kakao\.com/channel/(?P\d+)/cliplink/(?P\d+)' _API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks' _TESTS = [{ -- cgit v1.1 From 10cab6613fc52f64f0a5a81281fec2f8d2d92f16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 24 Sep 2017 00:21:34 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ChangeLog b/ChangeLog index 42ba879..87ceabe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,30 @@ Core + [options] Accept lrc as a subtitle conversion target format (#14292) * [utils] Fix handling raw TTML subtitles (#14191) +Extractors +* [24video] Fix timestamp extraction and make non fatal (#14295) ++ [24video] Add support for 24video.adult (#14295) ++ [kakao] Add support for tv.kakao.com (#12298, #14007) ++ [twitter] Add support for URLs without user id (#14270) ++ [americastestkitchen] Add support for americastestkitchen.com (#10764, + #13996) +* [generic] Fix support for multiple HTML5 videos on one page (#14080) +* [mixcloud] Fix extraction (#14088, #14132) ++ [lynda] Add support for educourse.ga (#14286) +* [beeg] Fix extraction (#14275) +* [nbcsports:vplayer] Correct theplatform URL (#13873) +* [twitter] Fix duration extraction (#14141) +* [tvplay] Bypass geo restriction ++ [heise] Add support for YouTube embeds (#14109) ++ [popcorntv] Add support for popcorntv.it (#5914, #14211) +* [viki] Update app data (#14181) +* [morningstar] Relax URL regular expression (#14222) +* [openload] Fix extraction (#14225, #14257) +* [noovo] Fix extraction (#14214) +* [dailymotion:playlist] Relax URL regular expression (#14219) ++ [twitch] Add support for go.twitch.tv URLs (#14215) +* [vgtv] Relax URL regular expression (#14223) + version 2017.09.15 -- cgit v1.1 From 9fc41bcb6b94ddb64885b1eb20282c69dc787929 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 24 Sep 2017 00:22:50 +0700 Subject: release 2017.09.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 98ab5b6..1954528 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.15*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.24*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.09.15 +[debug] youtube-dl version 2017.09.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 87ceabe..da60c1b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.09.24 Core + [options] Accept lrc as a subtitle conversion target format (#14292) diff --git a/README.md b/README.md index 28ee63f..7818e58 100644 --- a/README.md +++ b/README.md @@ -427,7 +427,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm {}' --convert-subs FORMAT Convert the subtitles to other format - (currently supported: srt|ass|vtt) + (currently supported: srt|ass|vtt|lrc) # CONFIGURATION diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 6b01dc9..d36a07c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -43,6 +43,7 @@ - **Allocine** - **AlphaPorno** - **AMCNetworks** + - **AmericasTestKitchen** - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **AnimeOnDemand** - **anitube.se** @@ -378,6 +379,7 @@ - **Jove** - **jpopsuki.tv** - **JWPlatform** + - **Kakao** - **Kaltura** - **Kamcord** - **KanalPlay**: Kanal 5/9/11 Play @@ -627,6 +629,7 @@ - **Pokemon** - **PolskieRadio** - **PolskieRadioCategory** + - **PopcornTV** - **PornCom** - **PornerBros** - **PornFlip** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8399c04..0e2e4dc 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.09.15' +__version__ = '2017.09.24' -- cgit v1.1 From 8b389f7e3ce4466824abc7ce2aaf657abf34a682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 24 Sep 2017 18:21:38 +0700 Subject: Credit the author of multiple generic HTML5 embeds fix --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 1d40c86..7e01224 100644 --- a/AUTHORS +++ b/AUTHORS @@ -230,3 +230,4 @@ Ashutosh Chaudhary John Dong Tatsuyuki Ishi Daniel Weber +Kay Bouché -- cgit v1.1 From db96252831e24ae040de92f4c6668d3eaef82165 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 24 Sep 2017 19:23:08 +0700 Subject: [xhamsterembed] Fix extraction (closes #14308) --- youtube_dl/extractor/xhamster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index c42b59e..be3624e 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -221,7 +221,7 @@ class XHamsterEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id, + r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]*\.html|videos/[^/]*-{0})[^"]*)"'.format(video_id), webpage, 'xhamster url', default=None) if not video_url: -- cgit v1.1 From 02d01e15f1a52a23cc4991c39219f98aed542136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 26 Sep 2017 21:47:18 +0700 Subject: [generic] Fix infinite recursion for twitter:player URLs (closes #14339) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7d0edf0..096e2aa 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2973,7 +2973,7 @@ class GenericIE(InfoExtractor): # be supported by youtube-dl thus this is checked the very last (see # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser) embed_url = self._html_search_meta('twitter:player', webpage, default=None) - if embed_url: + if embed_url and embed_url != url: return self.url_result(embed_url) if not found: -- cgit v1.1 From b14b2283a0b9f66f54e862641b5b1cd22c07bd62 Mon Sep 17 00:00:00 2001 From: Timendum Date: Wed, 27 Sep 2017 17:48:47 +0200 Subject: [gfycat] Add support for /gifs/detail URLs (closes #14322) --- youtube_dl/extractor/gfycat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index 45ccc11..a0670b6 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -11,7 +11,7 @@ from ..utils import ( class GfycatIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P[^/?#]+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { @@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor): 'categories': list, 'age_limit': 0, } + }, { + 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull', + 'only_matching': True }] def _real_extract(self, url): -- cgit v1.1 From 63d990d2859d0e981da2e416097655798334431b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 28 Sep 2017 00:29:42 +0700 Subject: [generic] Add support for Video.js embeds --- youtube_dl/extractor/generic.py | 56 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 096e2aa..a3d09a0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -22,6 +22,8 @@ from ..utils import ( HEADRequest, is_html, js_to_json, + KNOWN_EXTENSIONS, + mimetype2ext, orderedSet, sanitized_Request, smuggle_url, @@ -1130,6 +1132,22 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + { + # Video.js embed + 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html', + 'info_dict': { + 'id': 'yygqldloqIk', + 'ext': 'mp4', + 'title': 'SolidWorks. Урок 6 Настройка чертежа', + 'description': 'md5:baf95267792646afdbf030e4d06b2ab3', + 'upload_date': '20130314', + 'uploader': 'PROстое3D', + 'uploader_id': 'PROstoe3D', + }, + 'params': { + 'skip_download': True, + }, + }, # rtl.nl embed { 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', @@ -2880,6 +2898,44 @@ class GenericIE(InfoExtractor): jwplayer_data, video_id, require_title=False, base_url=url) return merge_dicts(info, info_dict) + # Video.js embed + mobj = re.search( + r'(?s)\bvideojs\s*$.+?\bplayer\.src\s*\(\s*(\[.+?\])\s*$\s*;', + webpage) + if mobj is not None: + sources = self._parse_json( + mobj.group(1), video_id, transform_source=js_to_json, + fatal=False) or [] + formats = [] + for source in sources: + src = source.get('src') + if not src or not isinstance(src, compat_str): + continue + src = compat_urlparse.urljoin(url, src) + src_type = source.get('type') + if isinstance(src_type, compat_str): + src_type = src_type.lower() + ext = determine_ext(src).lower() + if src_type == 'video/youtube': + return self.url_result(src, YoutubeIE.ie_key()) + if src_type == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + src, video_id, mpd_id='dash', fatal=False)) + elif src_type == 'application/x-mpegurl' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src, + 'ext': (mimetype2ext(src_type) or + ext if ext in KNOWN_EXTENSIONS else 'mp4'), + }) + if formats: + self._sort_formats(formats) + info_dict['formats'] = formats + return info_dict + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') -- cgit v1.1 From 3600fd591dc1ee4963f6940d19ef39d464d3a689 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 28 Sep 2017 00:46:48 +0700 Subject: [YoutubeDL] Document youtube_include_dash_manifest --- youtube_dl/YoutubeDL.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 0a7f36c..20aa1e4 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -304,6 +304,12 @@ class YoutubeDL(object): otherwise prefer avconv. postprocessor_args: A list of additional command-line arguments for the postprocessor. + + The following options are used by the Youtube extractor: + youtube_include_dash_manifest: If True (default), DASH manifests and related + data will be downloaded and processed by extractor. + You can reduce network I/O by disabling it if you don't + care about DASH. """ _NUMERIC_FIELDS = set(( -- cgit v1.1 From eb9a15be6091d09f46d4306516b58cf2fe91e647 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Sep 2017 22:47:03 +0700 Subject: [yahoo] Add support for custom brigthcove embeds (closes #14210) --- youtube_dl/extractor/yahoo.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 38f82bf..e923379 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -28,7 +28,7 @@ from .nbc import NBCSportsVPlayerIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?P(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P.+)?-(?P[0-9]+)(?:-[a-z]+)?(?:\.html)?)' + _VALID_URL = r'(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?:(?P.+)?-)?(?P[0-9]+)(?:-[a-z]+)?(?:\.html)?' _TESTS = [ { 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', @@ -227,13 +227,28 @@ class YahooIE(InfoExtractor): 'skip_download': True, }, }, + { + # custom brightcove + 'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37083565/clown-entertainers-say-it-is-hurting-their-business/', + 'info_dict': { + 'id': '5575377707001', + 'ext': 'mp4', + 'title': "Clown entertainers say 'It' is hurting their business", + 'description': 'Stephen King s horror film has much to answer for. Jelby and Mr Loopy the Clowns join us.', + 'timestamp': 1505341164, + 'upload_date': '20170913', + 'uploader_id': '2376984109001', + }, + 'params': { + 'skip_download': True, + }, + } ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') or self._match_id(url) page_id = mobj.group('id') - url = mobj.group('url') + display_id = mobj.group('display_id') or page_id host = mobj.group('host') webpage, urlh = self._download_webpage_handle(url, display_id) if 'err=404' in urlh.geturl(): @@ -262,6 +277,14 @@ class YahooIE(InfoExtractor): if bc_url: return self.url_result(bc_url, BrightcoveNewIE.ie_key()) + brightcove_id = self._search_regex( + r']+data-video-id=["\'](\d+)', webpage, 'brightcove id', + default=None) + if brightcove_id: + return self.url_result( + 'http://players.brightcove.net/2376984109001/default_default/index.html?videoId=%s' % brightcove_id, + BrightcoveNewIE.ie_key()) + # Query result is often embedded in webpage as JSON. Sometimes explicit requests # to video API results in a failure with geo restriction reason therefore using # embedded query result when present sounds reasonable. -- cgit v1.1 From fa3fdeb41fdd637283baeb6d681450a982043a1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Sep 2017 22:54:22 +0700 Subject: [yahoo] Fix some tests --- youtube_dl/extractor/yahoo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index e923379..a3bb1b0 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -50,6 +50,7 @@ class YahooIE(InfoExtractor): 'description': 'md5:66b627ab0a282b26352136ca96ce73c1', 'duration': 151, }, + 'skip': 'HTTP Error 404', }, { 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', @@ -142,7 +143,7 @@ class YahooIE(InfoExtractor): 'skip': 'Domain name in.lifestyle.yahoo.com gone', }, { 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html', - 'md5': '2a9752f74cb898af5d1083ea9f661b58', + 'md5': '989396ae73d20c6f057746fb226aa215', 'info_dict': { 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', 'ext': 'mp4', -- cgit v1.1 From 3836b02ce8df654ea70276f1d196b347ea9bc257 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Sep 2017 22:56:40 +0700 Subject: [YoutubeDL] PEP 8 --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 20aa1e4..474d6c9 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -304,7 +304,7 @@ class YoutubeDL(object): otherwise prefer avconv. postprocessor_args: A list of additional command-line arguments for the postprocessor. - + The following options are used by the Youtube extractor: youtube_include_dash_manifest: If True (default), DASH manifests and related data will be downloaded and processed by extractor. -- cgit v1.1 From 2c53bd51c6b0bef836d4e84d9a8f9127c3b67660 Mon Sep 17 00:00:00 2001 From: Giuseppe Fabiano Date: Sat, 30 Sep 2017 22:21:17 +0200 Subject: [rtve:alacarta] Fix extraction (closes #14290) --- youtube_dl/extractor/rtve.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 746677a..d9edf9d 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -10,6 +10,7 @@ from ..compat import ( compat_struct_unpack, ) from ..utils import ( + determine_ext, ExtractorError, float_or_none, remove_end, @@ -85,6 +86,18 @@ class RTVEALaCartaIE(InfoExtractor): }, 'skip': 'The f4m manifest can\'t be used yet', }, { + 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', + 'md5': 'e55e162379ad587e9640eda4f7353c0f', + 'info_dict': { + 'id': '4236788', + 'ext': 'mp4', + 'title': 'Servir y proteger - Capítulo 104 ', + 'duration': 3222.0, + }, + 'params': { + 'skip_download': True, # requires ffmpeg + }, + }, { 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 'only_matching': True, }, { @@ -107,24 +120,41 @@ class RTVEALaCartaIE(InfoExtractor): video_id)['page']['items'][0] if info['state'] == 'DESPU': raise ExtractorError('The video is no longer available', expected=True) + title = info['title'] png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id) png_request = sanitized_Request(png_url) png_request.add_header('Referer', url) png = self._download_webpage(png_request, video_id, 'Downloading url information') video_url = _decrypt_url(png) - if not video_url.endswith('.f4m'): + ext = determine_ext(video_url) + + formats = [] + if not video_url.endswith('.f4m') and ext != 'm3u8': if '?' not in video_url: video_url = video_url.replace('resources/', 'auth/resources/') video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve') + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id='hds', fatal=False)) + else: + formats.append({ + 'url': video_url, + }) + self._sort_formats(formats) + subtitles = None if info.get('sbtFile') is not None: subtitles = self.extract_subtitles(video_id, info['sbtFile']) return { 'id': video_id, - 'title': info['title'], - 'url': video_url, + 'title': title, + 'formats': formats, 'thumbnail': info.get('image'), 'page_url': url, 'subtitles': subtitles, -- cgit v1.1 From b69ca0ccfcd3bc7b64aa227339e92576baf5fc9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 Oct 2017 04:37:42 +0700 Subject: [yahoo] Use extracted brightcove account id (closes #14210) --- youtube_dl/extractor/yahoo.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index a3bb1b0..87615cd 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -12,11 +12,12 @@ from ..compat import ( ) from ..utils import ( clean_html, - unescapeHTML, + determine_ext, ExtractorError, + extract_attributes, int_or_none, mimetype2ext, - determine_ext, + unescapeHTML, ) from .brightcove import ( @@ -278,13 +279,21 @@ class YahooIE(InfoExtractor): if bc_url: return self.url_result(bc_url, BrightcoveNewIE.ie_key()) - brightcove_id = self._search_regex( - r']+data-video-id=["\'](\d+)', webpage, 'brightcove id', - default=None) - if brightcove_id: - return self.url_result( - 'http://players.brightcove.net/2376984109001/default_default/index.html?videoId=%s' % brightcove_id, - BrightcoveNewIE.ie_key()) + brightcove_iframe = self._search_regex( + r'(]+data-video-id=["\']\d+[^>]+>)', webpage, + 'brightcove iframe', default=None) + if brightcove_iframe: + attr = extract_attributes(brightcove_iframe) + src = attr.get('src') + if src: + parsed_src = compat_urlparse.urlparse(src) + qs = compat_urlparse.parse_qs(parsed_src.query) + account_id = qs.get('accountId', ['2376984109001'])[0] + brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0] + if account_id and brightcove_id: + return self.url_result( + 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (account_id, brightcove_id), + BrightcoveNewIE.ie_key()) # Query result is often embedded in webpage as JSON. Sometimes explicit requests # to video API results in a failure with geo restriction reason therefore using -- cgit v1.1 From c8da40d834697c343f64609b2cfbb3ff759c18a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 Oct 2017 04:49:27 +0700 Subject: [yahoo] Bypass geo restriction for brightcove (#14210) --- youtube_dl/extractor/yahoo.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 87615cd..552013a 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -17,6 +17,7 @@ from ..utils import ( extract_attributes, int_or_none, mimetype2ext, + smuggle_url, unescapeHTML, ) @@ -29,7 +30,7 @@ from .nbc import NBCSportsVPlayerIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?:(?P.+)?-)?(?P[0-9]+)(?:-[a-z]+)?(?:\.html)?' + _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2})\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?:(?P.+)?-)?(?P[0-9]+)(?:-[a-z]+)?(?:\.html)?' _TESTS = [ { 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', @@ -244,6 +245,11 @@ class YahooIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, + { + # custom brightcove, geo-restricted to Australia, bypassable + 'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37263964/sunrise-episode-wed-27-sep/', + 'only_matching': True, } ] @@ -274,10 +280,15 @@ class YahooIE(InfoExtractor): if bc_url: return self.url_result(bc_url, BrightcoveLegacyIE.ie_key()) + def brightcove_url_result(bc_url): + return self.url_result( + smuggle_url(bc_url, {'geo_countries': [mobj.group('country')]}), + BrightcoveNewIE.ie_key()) + # Look for Brightcove New Studio embeds bc_url = BrightcoveNewIE._extract_url(self, webpage) if bc_url: - return self.url_result(bc_url, BrightcoveNewIE.ie_key()) + return brightcove_url_result(bc_url) brightcove_iframe = self._search_regex( r'(]+data-video-id=["\']\d+[^>]+>)', webpage, @@ -291,9 +302,9 @@ class YahooIE(InfoExtractor): account_id = qs.get('accountId', ['2376984109001'])[0] brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0] if account_id and brightcove_id: - return self.url_result( - 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (account_id, brightcove_id), - BrightcoveNewIE.ie_key()) + return brightcove_url_result( + 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' + % (account_id, brightcove_id)) # Query result is often embedded in webpage as JSON. Sometimes explicit requests # to video API results in a failure with geo restriction reason therefore using -- cgit v1.1 From c5b7014a9c936ab8cd0c3b8514a278575c414aa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 Oct 2017 07:01:21 +0700 Subject: [generic] Add support for single format Video.js embeds (closes #14371) --- youtube_dl/extractor/generic.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a3d09a0..1721a3d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1133,7 +1133,7 @@ class GenericIE(InfoExtractor): } }, { - # Video.js embed + # Video.js embed, multiple formats 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html', 'info_dict': { 'id': 'yygqldloqIk', @@ -1148,6 +1148,19 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + { + # Video.js embed, single format + 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=', + 'info_dict': { + 'id': 'watch', + 'ext': 'mp4', + 'title': 'Step 1 - Good Foundation', + 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4', + }, + 'params': { + 'skip_download': True, + }, + }, # rtl.nl embed { 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', @@ -2900,12 +2913,14 @@ class GenericIE(InfoExtractor): # Video.js embed mobj = re.search( - r'(?s)\bvideojs\s*$.+?\bplayer\.src\s*\(\s*(\[.+?\])\s*$\s*;', + r'(?s)\bvideojs\s*$.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*$\s*;', webpage) if mobj is not None: sources = self._parse_json( mobj.group(1), video_id, transform_source=js_to_json, fatal=False) or [] + if not isinstance(sources, list): + sources = [sources] formats = [] for source in sources: src = source.get('src') -- cgit v1.1 From 4e599194d6cf3dbe214dceb3c66ba5f718c6579a Mon Sep 17 00:00:00 2001 From: Rafal Borczuch Date: Sun, 1 Oct 2017 12:59:00 +0100 Subject: [tvp] Add support for new URL schema (closes #14368) --- youtube_dl/extractor/tvp.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index c5b3288..3954f0b 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -15,16 +15,16 @@ from ..utils import ( class TVPIE(InfoExtractor): IE_NAME = 'tvp' IE_DESC = 'Telewizja Polska' - _VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P\d+)' + _VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P\d+)' _TESTS = [{ - 'url': 'http://vod.tvp.pl/194536/i-seria-odc-13', + 'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536', 'md5': '8aa518c15e5cc32dfe8db400dc921fbb', 'info_dict': { 'id': '194536', 'ext': 'mp4', 'title': 'Czas honoru, I seria – odc. 13', - 'description': 'md5:76649d2014f65c99477be17f23a4dead', + 'description': 'md5:381afa5bca72655fe94b05cfe82bf53d', }, }, { 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', @@ -37,12 +37,13 @@ class TVPIE(InfoExtractor): }, }, { # page id is not the same as video id(#7799) - 'url': 'http://vod.tvp.pl/22704887/08122015-1500', - 'md5': 'cf6a4705dfd1489aef8deb168d6ba742', + 'url': 'https://wiadomosci.tvp.pl/33908820/28092017-1930', + 'md5': '84cd3c8aec4840046e5ab712416b73d0', 'info_dict': { - 'id': '22680786', + 'id': '33908820', 'ext': 'mp4', - 'title': 'Wiadomości, 08.12.2015, 15:00', + 'title': 'Wiadomości, 28.09.2017, 19:30', + 'description': 'Wydanie główne codziennego serwisu informacyjnego.' }, }, { 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', -- cgit v1.1 From 1dd126180eb7c3266af5480a95a3e5c4eb81ed1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 Oct 2017 21:45:56 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index da60c1b..66bb790 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +version + +Core +* [YoutubeDL] Document youtube_include_dash_manifest + +Extractors ++ [tvp] Add support for new URL schema (#14368) ++ [generic] Add support for single format Video.js embeds (#14371) +* [yahoo] Bypass geo restriction for brightcove (#14210) +* [yahoo] Use extracted brightcove account id (#14210) +* [rtve:alacarta] Fix extraction (#14290) ++ [yahoo] Add support for custom brigthcove embeds (#14210) ++ [generic] Add support for Video.js embeds ++ [gfycat] Add support for /gifs/detail URLs (#14322) +* [generic] Fix infinite recursion for twitter:player URLs (#14339) +* [xhamsterembed] Fix extraction (#14308) + + version 2017.09.24 Core -- cgit v1.1 From fcdd37d053c97c1bc363635c46634c778ce3eee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 Oct 2017 21:54:11 +0700 Subject: release 2017.10.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1954528..3be3062 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.24*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.09.24 +[debug] youtube-dl version 2017.10.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 66bb790..80299d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.10.01 Core * [YoutubeDL] Document youtube_include_dash_manifest diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0e2e4dc..49fa02d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.09.24' +__version__ = '2017.10.01' -- cgit v1.1 From 839728f5bfad3a6166be3839009b13963f00dfac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Oct 2017 03:28:25 +0700 Subject: [afreecatv] Add support for adult videos (closes #14376) --- youtube_dl/extractor/afreecatv.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index c8cb91d..2c58f46 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -139,6 +139,23 @@ class AfreecaTVIE(InfoExtractor): 'skip_download': True, }, }, { + # adult video + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731', + 'info_dict': { + 'id': '20171001_F1AE1711_196617479_1', + 'ext': 'mp4', + 'title': '[생]서아 초심 찾기 방송 (part 1)', + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'uploader': 'BJ서아', + 'uploader_id': 'bjdyrksu', + 'upload_date': '20171001', + 'duration': 3600, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', 'only_matching': True, }, { @@ -160,7 +177,15 @@ class AfreecaTVIE(InfoExtractor): video_xml = self._download_xml( 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', - video_id, query={'nTitleNo': video_id}) + video_id, query={ + 'nTitleNo': video_id, + 'partialView': 'SKIP_ADULT', + }) + + flag = xpath_text(video_xml, './track/flag', 'flag', default=None) + if flag and flag != 'SUCCEED': + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, flag), expected=True) video_element = video_xml.findall(compat_xpath('./track/video'))[1] if video_element is None or video_element.text is None: -- cgit v1.1 From 117589dfa2ac32566dc705ee7e712139105e6dd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Oct 2017 04:14:36 +0700 Subject: [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een --- youtube_dl/extractor/canvas.py | 142 +++++++++++++++++++++++++------------ youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 99 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index aada029..6899f84 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -3,24 +3,104 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import float_or_none +from ..utils import ( + float_or_none, + strip_or_none, +) class CanvasIE(InfoExtractor): + _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?Pcanvas|een|ketnet)/assets/(?Pm[dz]-ast-[^/?#&]+)' + _TESTS = [{ + 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', + 'md5': '90139b746a0a9bd7bb631283f6e2a64e', + 'info_dict': { + 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', + 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', + 'ext': 'flv', + 'title': 'Nachtwacht: De Greystook', + 'description': 'md5:1db3f5dc4c7109c821261e7512975be7', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1468.03, + }, + 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'], + }, { + 'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + site_id, video_id = mobj.group('site_id'), mobj.group('id') + + data = self._download_json( + 'https://mediazone.vrt.be/api/v1/%s/assets/%s' + % (site_id, video_id), video_id) + + title = data['title'] + description = data.get('description') + + formats = [] + for target in data['targetUrls']: + format_url, format_type = target.get('url'), target.get('type') + if not format_url or not format_type: + continue + if format_type == 'HLS': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=format_type, fatal=False)) + elif format_type == 'HDS': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_type, fatal=False)) + elif format_type == 'MPEG_DASH': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id=format_type, fatal=False)) + elif format_type == 'HSS': + formats.extend(self._extract_ism_formats( + format_url, video_id, ism_id='mss', fatal=False)) + else: + formats.append({ + 'format_id': format_type, + 'url': format_url, + }) + self._sort_formats(formats) + + subtitles = {} + subtitle_urls = data.get('subtitleUrls') + if isinstance(subtitle_urls, list): + for subtitle in subtitle_urls: + subtitle_url = subtitle.get('url') + if subtitle_url and subtitle.get('type') == 'CLOSED': + subtitles.setdefault('nl', []).append({'url': subtitle_url}) + + return { + 'id': video_id, + 'display_id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + 'duration': float_or_none(data.get('duration'), 1000), + 'thumbnail': data.get('posterImageUrl'), + 'subtitles': subtitles, + } + + +class CanvasEenIE(InfoExtractor): IE_DESC = 'canvas.be and een.be' _VALID_URL = r'https?://(?:www\.)?(?Pcanvas|een)\.be/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', - 'md5': 'ea838375a547ac787d4064d8c7860a6c', + 'md5': 'ed66976748d12350b118455979cca293', 'info_dict': { 'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', 'display_id': 'de-afspraak-veilt-voor-de-warmste-week', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'De afspraak veilt voor de Warmste Week', 'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 49.02, - } + }, + 'expected_warnings': ['is not a supported codec'], }, { # with subtitles 'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167', @@ -40,7 +120,8 @@ class CanvasIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, + 'skip': 'Pagina niet gevonden', }, { 'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles', 'info_dict': { @@ -54,7 +135,8 @@ class CanvasIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, + 'skip': 'Episode no longer available', }, { 'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend', 'only_matching': True, @@ -66,55 +148,21 @@ class CanvasIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - title = (self._search_regex( + title = strip_or_none(self._search_regex( r']+class="video__body__header__title"[^>]*>(.+?)', webpage, 'title', default=None) or self._og_search_title( - webpage)).strip() + webpage, default=None)) video_id = self._html_search_regex( - r'data-video=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id', group='id') - - data = self._download_json( - 'https://mediazone.vrt.be/api/v1/%s/assets/%s' - % (site_id, video_id), display_id) - - formats = [] - for target in data['targetUrls']: - format_url, format_type = target.get('url'), target.get('type') - if not format_url or not format_type: - continue - if format_type == 'HLS': - formats.extend(self._extract_m3u8_formats( - format_url, display_id, entry_protocol='m3u8_native', - ext='mp4', preference=0, fatal=False, m3u8_id=format_type)) - elif format_type == 'HDS': - formats.extend(self._extract_f4m_formats( - format_url, display_id, f4m_id=format_type, fatal=False)) - elif format_type == 'MPEG_DASH': - formats.extend(self._extract_mpd_formats( - format_url, display_id, mpd_id=format_type, fatal=False)) - else: - formats.append({ - 'format_id': format_type, - 'url': format_url, - }) - self._sort_formats(formats) - - subtitles = {} - subtitle_urls = data.get('subtitleUrls') - if isinstance(subtitle_urls, list): - for subtitle in subtitle_urls: - subtitle_url = subtitle.get('url') - if subtitle_url and subtitle.get('type') == 'CLOSED': - subtitles.setdefault('nl', []).append({'url': subtitle_url}) + r'data-video=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id', + group='id') return { + '_type': 'url_transparent', + 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id), + 'ie_key': CanvasIE.ie_key(), 'id': video_id, 'display_id': display_id, 'title': title, 'description': self._og_search_description(webpage), - 'formats': formats, - 'duration': float_or_none(data.get('duration'), 1000), - 'thumbnail': data.get('posterImageUrl'), - 'subtitles': subtitles, } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4232a4f..24e9acd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -150,7 +150,10 @@ from .camdemy import ( from .camwithher import CamWithHerIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE -from .canvas import CanvasIE +from .canvas import ( + CanvasIE, + CanvasEenIE, +) from .carambatv import ( CarambaTVIE, CarambaTVPageIE, -- cgit v1.1 From 544ffb7790ea1ac7bbc081d2e486101c0382b900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Oct 2017 04:15:12 +0700 Subject: [ketnet] Add support for videos without direct sources (closes #14377) --- youtube_dl/extractor/ketnet.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py index fb9c2db..93a98e1 100644 --- a/youtube_dl/extractor/ketnet.py +++ b/youtube_dl/extractor/ketnet.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +from .canvas import CanvasIE from .common import InfoExtractor @@ -7,7 +8,7 @@ class KetnetIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes', - 'md5': 'd907f7b1814ef0fa285c0475d9994ed7', + 'md5': '6bdeb65998930251bbd1c510750edba9', 'info_dict': { 'id': 'zomerse-filmpjes', 'ext': 'mp4', @@ -16,6 +17,20 @@ class KetnetIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', } }, { + # mzid in playerConfig instead of sources + 'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook', + 'md5': '90139b746a0a9bd7bb631283f6e2a64e', + 'info_dict': { + 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', + 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', + 'ext': 'flv', + 'title': 'Nachtwacht: De Greystook', + 'description': 'md5:1db3f5dc4c7109c821261e7512975be7', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1468.03, + }, + 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'], + }, { 'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016', 'only_matching': True, }, { @@ -38,6 +53,12 @@ class KetnetIE(InfoExtractor): 'player config'), video_id) + mzid = config.get('mzid') + if mzid: + return self.url_result( + 'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid, + CanvasIE.ie_key(), video_id=mzid) + title = config['title'] formats = [] -- cgit v1.1 From d2ae7e24e5c574fa45621771a134e58b21443e5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Oct 2017 04:43:25 +0700 Subject: [postprocessor/ffmpeg] Convert to opus using libopus (closes #14381) --- youtube_dl/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index f71d413..3ea1afc 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -44,7 +44,7 @@ ACODECS = { 'aac': 'aac', 'flac': 'flac', 'm4a': 'aac', - 'opus': 'opus', + 'opus': 'libopus', 'vorbis': 'libvorbis', 'wav': None, } -- cgit v1.1 From b7e14f06a4a4fbaafc593c2f118e4b0f5d8d7937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ku=CC=88ch?= Date: Tue, 3 Oct 2017 15:17:28 +0200 Subject: Fix for JSON meta data download Added fixes according to #13651 and user @remitamine --- youtube_dl/extractor/nbc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 836a41f..35151f5 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -15,7 +15,7 @@ from ..utils import ( class NBCIE(AdobePassIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?Pn?\d+))' + _VALID_URL = r'https?(?P://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?Pn?\d+))' _TESTS = [ { @@ -72,6 +72,7 @@ class NBCIE(AdobePassIE): def _real_extract(self, url): permalink, video_id = re.match(self._VALID_URL, url).groups() + permalink = 'http' + permalink video_data = self._download_json( 'https://api.nbc.com/v3/videos', video_id, query={ 'filter[permalink]': permalink, -- cgit v1.1 From 3e4cedf9e8cd3157df2457df7274d0c842421945 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Tue, 3 Oct 2017 18:28:13 +0200 Subject: [tvn24] Relax _VALID_URL --- youtube_dl/extractor/tvn24.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 12ed603..6590e1f 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -9,7 +9,7 @@ from ..utils import ( class TVN24IE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P[^/]+)\.html' + _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P[^/]+)' _TESTS = [{ 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', 'md5': 'fbdec753d7bc29d96036808275f2130c', @@ -18,7 +18,7 @@ class TVN24IE(InfoExtractor): 'ext': 'mp4', 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', - 'thumbnail': 're:http://.*[.]jpeg', + 'thumbnail': 're:https?://.*[.]jpeg', } }, { 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', @@ -29,6 +29,9 @@ class TVN24IE(InfoExtractor): }, { 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html', 'only_matching': True, + }, { + 'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158', + 'only_matching': True, }] def _real_extract(self, url): -- cgit v1.1 From 9524dca3accc1e60cddd141f06924ed7404db9bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Oct 2017 02:53:20 +0700 Subject: [README.md] Use revision bound link to YoutubeDL options (closes #14401) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7818e58..2879aad 100644 --- a/README.md +++ b/README.md @@ -1167,7 +1167,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object. +Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: -- cgit v1.1 From c110944fa2f21af733b4f3168764e1b008e11514 Mon Sep 17 00:00:00 2001 From: "M.K" Date: Tue, 3 Oct 2017 22:50:27 +0200 Subject: [extractor/common] Fix typo in _parse_mpd_formats --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2bbbf8f..a878550 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1920,7 +1920,7 @@ class InfoExtractor(object): # can't be used at the same time if '%(Number' in media_template and 's' not in representation_ms_info: segment_duration = None - if 'total_number' not in representation_ms_info and 'segment_duration': + if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info: segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) representation_ms_info['fragments'] = [{ -- cgit v1.1 From 6e736d86e7d03d14279c403202fe2a632e6e5023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Oct 2017 04:27:42 +0700 Subject: [beeg] Fix extraction (closes #14403) --- youtube_dl/extractor/beeg.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index bbeae4b..bf22a41 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -60,9 +60,13 @@ class BeegIE(InfoExtractor): beeg_version = beeg_version or '2185' beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H' - video = self._download_json( - 'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id), - video_id) + for api_path in ('', 'api.'): + video = self._download_json( + 'https://%sbeeg.com/api/v6/%s/video/%s' + % (api_path, beeg_version, video_id), video_id, + fatal=api_path == 'api.') + if video: + break def split(o, e): def cut(s, x): -- cgit v1.1 From 6b46285e850f8bc8155a93adf13920752537e55d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 4 Oct 2017 07:45:13 +0200 Subject: [comedycentral] new shortcut :theopposition for "The Opposition" show --- youtube_dl/extractor/comedycentral.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 4cac294..d08b909 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -120,13 +120,16 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor): class ComedyCentralShortnameIE(InfoExtractor): - _VALID_URL = r'^:(?Ptds|thedailyshow)$' + _VALID_URL = r'^:(?Ptds|thedailyshow|theopposition)$' _TESTS = [{ 'url': ':tds', 'only_matching': True, }, { 'url': ':thedailyshow', 'only_matching': True, + }, { + 'url': ':theopposition', + 'only_matching': True, }] def _real_extract(self, url): @@ -134,5 +137,6 @@ class ComedyCentralShortnameIE(InfoExtractor): shortcut_map = { 'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', 'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', + 'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes', } return self.url_result(shortcut_map[video_id]) -- cgit v1.1 From cf5f6ed5be8bab252f3ded345b6b1fdf31426661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Oct 2017 00:27:24 +0700 Subject: [xvideos] Add support for embed URLs and improve extraction (closes #14409) --- youtube_dl/extractor/xvideos.py | 42 ++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index eca6030..085c8d4 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -14,8 +14,16 @@ from ..utils import ( class XVideosIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P[0-9]+)(?:.*)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?xvideos\.com/video| + flashservice\.xvideos\.com/embedframe/| + static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video= + ) + (?P[0-9]+) + ''' + _TESTS = [{ 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', 'md5': '14cea69fcb84db54293b1e971466c2e1', 'info_dict': { @@ -25,21 +33,33 @@ class XVideosIE(InfoExtractor): 'duration': 108, 'age_limit': 18, } - } + }, { + 'url': 'https://flashservice.xvideos.com/embedframe/4588838', + 'only_matching': True, + }, { + 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + + webpage = self._download_webpage( + 'http://www.xvideos.com/video%s/' % video_id, video_id) mobj = re.search(r'

(.+?)

', webpage) if mobj: raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) - video_title = self._html_search_regex( - r'(.*?)\s+-\s+XVID', webpage, 'title') - video_thumbnail = self._search_regex( + title = self._html_search_regex( + (r'<title>(?P<title>.+?)\s+-\s+XVID', + r'setVideoTitle\s*\(\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), + webpage, 'title', default=None, + group='title') or self._og_search_title(webpage) + + thumbnail = self._search_regex( r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False) - video_duration = int_or_none(self._og_search_property( + duration = int_or_none(self._og_search_property( 'duration', webpage, default=None)) or parse_duration( self._search_regex( r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)', @@ -74,8 +94,8 @@ class XVideosIE(InfoExtractor): return { 'id': video_id, 'formats': formats, - 'title': video_title, - 'duration': video_duration, - 'thumbnail': video_thumbnail, + 'title': title, + 'duration': duration, + 'thumbnail': thumbnail, 'age_limit': 18, } -- cgit v1.1 From 6be08ce60205a65a6739667783eead56ccc34456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:13:53 +0700 Subject: [utils] Use in OnDemandPagedList by default Not using cache results in redundant network I/O due to downloading the same pages while using --playlist-items n-m --- youtube_dl/extractor/mixcloud.py | 2 +- youtube_dl/extractor/nba.py | 2 +- youtube_dl/utils.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index f331db8..7b2bb6e 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -291,7 +291,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): functools.partial( self._tracks_page_func, '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type), - self._PAGE_SIZE, use_cache=True) + self._PAGE_SIZE) return self.playlist_result( entries, video_id, '%s (%s)' % (username, list_type), description) diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 5356196..be295a7 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -122,7 +122,7 @@ class NBAIE(TurnerBaseIE): playlist_title = self._og_search_title(webpage, fatal=False) entries = OnDemandPagedList( functools.partial(self._fetch_page, team, video_id), - self._PAGE_SIZE, use_cache=True) + self._PAGE_SIZE) return self.playlist_result(entries, team, playlist_title) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 92b22e6..59fb334 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1933,7 +1933,7 @@ class PagedList(object): class OnDemandPagedList(PagedList): - def __init__(self, pagefunc, pagesize, use_cache=False): + def __init__(self, pagefunc, pagesize, use_cache=True): self._pagefunc = pagefunc self._pagesize = pagesize self._use_cache = use_cache -- cgit v1.1 From 7e85e8729f1e2ed9817466acef30fa6dc7e03e1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:34:46 +0700 Subject: [YoutubeDL] Fix out of range --playlist-items for iterable playlists and reduce code duplication (closes #14425) --- youtube_dl/YoutubeDL.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 474d6c9..9036f0f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -911,12 +911,22 @@ class YoutubeDL(object): playlistitems = iter_playlistitems(playlistitems_str) ie_entries = ie_result['entries'] + + def make_playlistitems_entries(list_ie_entries): + num_entries = len(list_ie_entries) + return [ + list_ie_entries[i - 1] for i in playlistitems + if -num_entries <= i - 1 < num_entries] + + def report_download(num_entries): + self.to_screen( + '[%s] playlist %s: Downloading %d videos' % + (ie_result['extractor'], playlist, num_entries)) + if isinstance(ie_entries, list): n_all_entries = len(ie_entries) if playlistitems: - entries = [ - ie_entries[i - 1] for i in playlistitems - if -n_all_entries <= i - 1 < n_all_entries] + entries = make_playlistitems_entries(ie_entries) else: entries = ie_entries[playliststart:playlistend] n_entries = len(entries) @@ -934,20 +944,15 @@ class YoutubeDL(object): entries = ie_entries.getslice( playliststart, playlistend) n_entries = len(entries) - self.to_screen( - '[%s] playlist %s: Downloading %d videos' % - (ie_result['extractor'], playlist, n_entries)) + report_download(n_entries) else: # iterable if playlistitems: - entry_list = list(ie_entries) - entries = [entry_list[i - 1] for i in playlistitems] + entries = make_playlistitems_entries(list(ie_entries)) else: entries = list(itertools.islice( ie_entries, playliststart, playlistend)) n_entries = len(entries) - self.to_screen( - '[%s] playlist %s: Downloading %d videos' % - (ie_result['extractor'], playlist, n_entries)) + report_download(n_entries) if self.params.get('playlistreverse', False): entries = entries[::-1] -- cgit v1.1 From 86a15ed64b410336b2145f4a6b8e8a22cbf24f51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:41:28 +0700 Subject: [test_YoutubeDL] Add test for #14425 --- test/test_YoutubeDL.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e70cbcd..5ac34e6 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -770,6 +770,9 @@ class TestYoutubeDL(unittest.TestCase): result = get_ids({'playlist_items': '10'}) self.assertEqual(result, []) + result = get_ids({'playlist_items': '3-10'}) + self.assertEqual(result, [3, 4]) + def test_urlopen_no_file_protocol(self): # see https://github.com/rg3/youtube-dl/issues/8227 ydl = YDL() -- cgit v1.1 From cd6fc19ed76af6687fb2cb5f87d9ed4c3071c203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:46:57 +0700 Subject: [YoutubeDL] Ignore duplicates in --playlist-items E.g. '--playlist-items 2-4,3-4,3' should result in '[2,3,4]', not '[2,3,4,3,4,3]' --- test/test_YoutubeDL.py | 3 +++ youtube_dl/YoutubeDL.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 5ac34e6..db936bf 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -773,6 +773,9 @@ class TestYoutubeDL(unittest.TestCase): result = get_ids({'playlist_items': '3-10'}) self.assertEqual(result, [3, 4]) + result = get_ids({'playlist_items': '2-4,3-4,3'}) + self.assertEqual(result, [2, 3, 4]) + def test_urlopen_no_file_protocol(self): # see https://github.com/rg3/youtube-dl/issues/8227 ydl = YDL() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9036f0f..855d6b8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -65,6 +65,7 @@ from .utils import ( locked_file, make_HTTPS_handler, MaxDownloadsReached, + orderedSet, PagedList, parse_filesize, PerRequestProxyHandler, @@ -908,7 +909,7 @@ class YoutubeDL(object): yield int(item) else: yield int(string_segment) - playlistitems = iter_playlistitems(playlistitems_str) + playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) ie_entries = ie_result['entries'] -- cgit v1.1 From ac93c09ab2c4d099a628c1ed59670bef008db89c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:53:32 +0700 Subject: [xtube] Add support for embedded URLs (closes #14417) --- youtube_dl/extractor/xtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index bea9b87..c6c0b32 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -18,7 +18,7 @@ class XTubeIE(InfoExtractor): _VALID_URL = r'''(?x) (?: xtube:| - https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-) + https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?:embedded/)?(?P<display_id>[^/]+)-) ) (?P<id>[^/?&#]+) ''' @@ -64,6 +64,9 @@ class XTubeIE(InfoExtractor): }, { 'url': 'xtube:kVTUy_G222_', 'only_matching': True, + }, { + 'url': 'https://www.xtube.com/video-watch/embedded/milf-tara-and-teen-shared-and-cum-covered-extreme-bukkake-32203482?embedsize=big', + 'only_matching': True, }] def _real_extract(self, url): -- cgit v1.1 From 2e2a8e97d53d7759d663be2a1dc3f4108342ea40 Mon Sep 17 00:00:00 2001 From: Jalaz Kumar <jaykay12@users.noreply.github.com> Date: Fri, 6 Oct 2017 22:26:31 +0530 Subject: [pornflip] Extend _VALID_URL (closes #14405) --- youtube_dl/extractor/pornflip.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py index a4a5d39..ee04936 100644 --- a/youtube_dl/extractor/pornflip.py +++ b/youtube_dl/extractor/pornflip.py @@ -14,7 +14,7 @@ from ..utils import ( class PornFlipIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})' + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z-]{11})' _TESTS = [{ 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', 'md5': '98c46639849145ae1fd77af532a9278c', @@ -34,6 +34,12 @@ class PornFlipIE(InfoExtractor): }, { 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep', 'only_matching': True, + }, { + 'url': 'https://www.pornflip.com/v/EkRD6-vS2-s', + 'only_matching': True, + }, { + 'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s', + 'only_matching': True, }] def _real_extract(self, url): -- cgit v1.1 From b1a7bf44b95dee2803e8638b13c4467a253b5b8f Mon Sep 17 00:00:00 2001 From: remis <r-pankevicius@users.noreply.github.com> Date: Fri, 6 Oct 2017 19:59:09 +0300 Subject: [lnkgo] Relax _VALID_URL --- youtube_dl/extractor/lnkgo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lnkgo.py b/youtube_dl/extractor/lnkgo.py index 068378c..cfec0d3 100644 --- a/youtube_dl/extractor/lnkgo.py +++ b/youtube_dl/extractor/lnkgo.py @@ -11,7 +11,7 @@ from ..utils import ( class LnkGoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)' + _VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)' _TESTS = [{ 'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162', 'info_dict': { @@ -42,6 +42,9 @@ class LnkGoIE(InfoExtractor): 'params': { 'skip_download': True, # HLS download }, + }, { + 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai', + 'only_matching': True, }] _AGE_LIMITS = { 'N-7': 7, -- cgit v1.1 From e95284754192ea4b5b4e32f1a2274e5767d980b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 00:57:09 +0700 Subject: [PULL_REQUEST_TEMPLATE.md] Add explicit entry on flake8 --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 46fa26f..ba4ca75 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -9,6 +9,7 @@ ### Before submitting a *pull request* make sure you have: - [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests +- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) ### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: - [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) -- cgit v1.1 From 665f42d8c14626404372b349624632b8d39f3c0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 01:40:00 +0700 Subject: [reddit] Sort formats (closes #14430) --- youtube_dl/extractor/reddit.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py index 01c85ee..4d44b9d 100644 --- a/youtube_dl/extractor/reddit.py +++ b/youtube_dl/extractor/reddit.py @@ -35,6 +35,8 @@ class RedditIE(InfoExtractor): 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) + return { 'id': video_id, 'title': video_id, -- cgit v1.1 From 3fc8f5b7c239ddd366012d2f0da754fcfe247297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 05:01:38 +0700 Subject: [ChangeLog] Actualize --- ChangeLog | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ChangeLog b/ChangeLog index 80299d5..760d090 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +version <unreleased> + +Core +* [YoutubeDL] Ignore duplicates in --playlist-items +* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and + reduce code duplication (#14425) ++ [utils] Use cache in OnDemandPagedList by default +* [postprocessor/ffmpeg] Convert to opus using libopus (#14381) + +Extractors +* [reddit] Sort formats (#14430) +* [lnkgo] Relax URL regular expression (#14423) +* [pornflip] Extend URL regular expression (#14405, #14406) ++ [xtube] Add support for embed URLs (#14417) ++ [xvideos] Add support for embed URLs and improve extraction (#14409) +* [beeg] Fix extraction (#14403) +* [tvn24] Relax URL regular expression (#14395) +* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378, + #14392, #14414, #14419, #14431) ++ [ketnet] Add support for videos without direct sources (#14377) +* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een ++ [afreecatv] Add support for adult videos (#14376) + + version 2017.10.01 Core -- cgit v1.1 From 8e751a185c53a81cd35900010118894fd8201b75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 05:02:53 +0700 Subject: release 2017.10.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3be3062..9fc425c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.07*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.10.01 +[debug] youtube-dl version 2017.10.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 760d090..2feb3dc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.10.07 Core * [YoutubeDL] Ignore duplicates in --playlist-items diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d36a07c..be9df7e 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -130,7 +130,8 @@ - **CamWithHer** - **canalc2.tv** - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - - **Canvas**: canvas.be and een.be + - **Canvas** + - **CanvasEen**: canvas.be and een.be - **CarambaTV** - **CarambaTVPage** - **CartoonNetwork** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 49fa02d..705fcad 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.10.01' +__version__ = '2017.10.07' -- cgit v1.1 From 8b561bfc9d15bf487be0e0bb5bb1d7248fd321d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 21:59:04 +0700 Subject: [youtube] Add support for hooktube.com (closes #14437) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ad2e933..edd8713 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -332,6 +332,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| (?:www\.)?deturl\.com/www\.youtube\.com/| (?:www\.)?pwnyoutube\.com/| + (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains -- cgit v1.1 From a22ccac1f02b5957f1f083a54e1f83d22a7e6f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Oct 2017 01:34:17 +0700 Subject: [fox] Delegate to uplynk:preplay (#14147) --- youtube_dl/extractor/fox.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index facc665..5f98d01 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .adobepass import AdobePassIE +from .uplynk import UplynkPreplayIE +from ..compat import compat_str from ..utils import ( + HEADRequest, int_or_none, parse_age_limit, parse_duration, @@ -53,14 +56,7 @@ class FOXIE(AdobePassIE): }) title = video['name'] - - m3u8_url = self._download_json( - video['videoRelease']['url'], video_id)['playURL'] - - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) + release_url = video['videoRelease']['url'] description = video.get('description') duration = int_or_none(video.get('durationInSeconds')) or int_or_none( @@ -84,7 +80,7 @@ class FOXIE(AdobePassIE): # TODO: AP pass - return { + info = { 'id': video_id, 'title': title, 'description': description, @@ -97,5 +93,22 @@ class FOXIE(AdobePassIE): 'episode': episode, 'episode_number': episode_number, 'release_year': release_year, - 'formats': formats, } + + urlh = self._request_webpage(HEADRequest(release_url), video_id) + video_url = compat_str(urlh.geturl()) + + if UplynkPreplayIE.suitable(video_url): + info.update({ + '_type': 'url_transparent', + 'url': video_url, + 'ie_key': UplynkPreplayIE.ie_key(), + }) + else: + m3u8_url = self._download_json(release_url, video_id)['playURL'] + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + info['formats'] = formats + return info -- cgit v1.1 From b0dde6686c7110c9c2515a808d803239a81e6505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksandar=20Topuzovi=C4=87?= <aleksandar.topuzovic@gmail.com> Date: Sat, 7 Oct 2017 23:40:08 +0100 Subject: [hrti] Relax _VALID_URL --- youtube_dl/extractor/hrti.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hrti.py b/youtube_dl/extractor/hrti.py index 656ce6d..4f03694 100644 --- a/youtube_dl/extractor/hrti.py +++ b/youtube_dl/extractor/hrti.py @@ -104,7 +104,7 @@ class HRTiIE(HRTiBaseIE): (?: hrti:(?P<short_id>[0-9]+)| https?:// - hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)? + hrti\.hrt\.hr/(?:\#/)?video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)? ) ''' _TESTS = [{ @@ -129,6 +129,9 @@ class HRTiIE(HRTiBaseIE): }, { 'url': 'hrti:2181385', 'only_matching': True, + }, { + 'url': 'https://hrti.hrt.hr/video/show/3873068/cuvar-dvorca-dramska-serija-14', + 'only_matching': True, }] def _real_extract(self, url): -- cgit v1.1 From 89923316210f8e17bb1a085278940e1c56fcff48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Oct 2017 21:36:50 +0700 Subject: [wdr] Relax media link regex (closes #14447) --- youtube_dl/extractor/wdr.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 8bb7362..621de1e 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -22,8 +22,13 @@ class WDRBaseIE(InfoExtractor): # for wdrmaus, in a tag with the class "videoButton" (previously a link # to the page in a multiline "videoLink"-tag) json_metadata = self._html_search_regex( - r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', - webpage, 'media link', default=None, flags=re.MULTILINE) + r'''(?sx)class= + (?: + (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+| + (["\'])videoLink\b.*?\2[\s]*>\n[^\n]* + )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3 + ''', + webpage, 'media link', default=None, group='data') if not json_metadata: return -- cgit v1.1 From 197224b7a4e37a6581bf1a0da18d0f67ea61a476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 Oct 2017 23:50:53 +0700 Subject: Fix some regexes --- youtube_dl/extractor/aenetworks.py | 2 +- youtube_dl/extractor/appletrailers.py | 4 ++-- youtube_dl/extractor/ard.py | 2 +- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/dailymotion.py | 2 +- youtube_dl/extractor/deezer.py | 2 +- youtube_dl/extractor/freespeech.py | 2 +- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/googleplus.py | 2 +- youtube_dl/extractor/hrti.py | 2 +- youtube_dl/extractor/ign.py | 2 +- youtube_dl/extractor/infoq.py | 6 +++--- youtube_dl/extractor/jeuxvideo.py | 2 +- youtube_dl/extractor/livestream.py | 2 +- youtube_dl/extractor/makertv.py | 2 +- youtube_dl/extractor/mangomolo.py | 2 +- youtube_dl/extractor/meipai.py | 2 +- youtube_dl/extractor/mtv.py | 2 +- youtube_dl/extractor/myvideo.py | 2 +- youtube_dl/extractor/nationalgeographic.py | 2 +- youtube_dl/extractor/naver.py | 2 +- youtube_dl/extractor/npo.py | 2 +- youtube_dl/extractor/ruhd.py | 2 +- youtube_dl/extractor/stanfordoc.py | 4 ++-- youtube_dl/extractor/theplatform.py | 2 +- youtube_dl/extractor/thisav.py | 4 ++-- youtube_dl/extractor/twitter.py | 2 +- youtube_dl/extractor/vice.py | 2 +- youtube_dl/extractor/videopremium.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 30 files changed, 35 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 2dcdba9..da1b566 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -131,7 +131,7 @@ class AENetworksIE(AENetworksBaseIE): r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'], webpage, 'video url', group='url') theplatform_metadata = self._download_theplatform_metadata(self._search_regex( - r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) + r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) info = self._parse_theplatform_metadata(theplatform_metadata) if theplatform_metadata.get('AETN$isBehindWall'): requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index b45b431..a9ef733 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -117,7 +117,7 @@ class AppleTrailersIE(InfoExtractor): continue formats.append({ 'format_id': '%s-%s' % (version, size), - 'url': re.sub(r'_(\d+p.mov)', r'_h\1', src), + 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), 'width': int_or_none(size_data.get('width')), 'height': int_or_none(size_data.get('height')), 'language': version[:2], @@ -179,7 +179,7 @@ class AppleTrailersIE(InfoExtractor): formats = [] for format in settings['metadata']['sizes']: # The src is a file pointing to the real video file - format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) + format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) formats.append({ 'url': format_url, 'format': format['type'], diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 3f248b1..915f886 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -195,7 +195,7 @@ class ARDMediathekIE(InfoExtractor): title = self._html_search_regex( [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', - r'<meta name="dcterms.title" content="(.*?)"/>', + r'<meta name="dcterms\.title" content="(.*?)"/>', r'<h4 class="headline">(.*?)</h4>'], webpage, 'title') description = self._html_search_meta( diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 8b20c03..5525f7c 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -386,7 +386,7 @@ class BBCCoUkIE(InfoExtractor): m3u8_id=format_id, fatal=False)) if re.search(self._USP_RE, href): usp_formats = self._extract_m3u8_formats( - re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href), + re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href), programme_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) for f in usp_formats: diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index e9d0dd19..21a2d02 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -235,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): # vevo embed vevo_id = self._search_regex( - r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)', + r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)', webpage, 'vevo embed', default=None) if vevo_id: return self.url_result('vevo:%s' % vevo_id, 'Vevo') diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py index ec87b94..a38b268 100644 --- a/youtube_dl/extractor/deezer.py +++ b/youtube_dl/extractor/deezer.py @@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor): 'id': '176747451', 'title': 'Best!', 'uploader': 'Anonymous', - 'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$', + 'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$', }, 'playlist_count': 30, 'skip': 'Only available in .de', diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py index 0a70ca7..7fa271b 100644 --- a/youtube_dl/extractor/freespeech.py +++ b/youtube_dl/extractor/freespeech.py @@ -27,7 +27,7 @@ class FreespeechIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) title = mobj.group('title') webpage = self._download_webpage(url, title) - info_json = self._search_regex(r'jQuery.extend$Drupal.settings, ({.*?})$;', webpage, 'info') + info_json = self._search_regex(r'jQuery\.extend$Drupal\.settings, ({.*?})$;', webpage, 'info') info = json.loads(info_json) return { diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1721a3d..68b6338 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2206,7 +2206,7 @@ class GenericIE(InfoExtractor): # And then there are the jokers who advertise that they use RTA, # but actually don't. AGE_LIMIT_MARKERS = [ - r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>', + r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>', ] if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS): age_limit = 18 diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 427499b..6b927bb 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor): 'width': int(width), 'height': int(height), } for width, height, video_url in re.findall( - r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)] + r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)] self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/hrti.py b/youtube_dl/extractor/hrti.py index 4f03694..7cef5f6 100644 --- a/youtube_dl/extractor/hrti.py +++ b/youtube_dl/extractor/hrti.py @@ -173,7 +173,7 @@ class HRTiIE(HRTiBaseIE): class HRTiPlaylistIE(HRTiBaseIE): - _VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?' + _VALID_URL = r'https?://hrti\.hrt\.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?' _TESTS = [{ 'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena', 'info_dict': { diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index c1367cf..a96ea80 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -203,7 +203,7 @@ class PCMagIE(IGNIE): _VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)' IE_NAME = 'pcmag' - _EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]' + _EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]' _TESTS = [{ 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data', diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index fe425e7..57c9b0c 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -69,9 +69,9 @@ class InfoQIE(BokeCCBaseIE): }] def _extract_cookies(self, webpage): - policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') - signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') - key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') + policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') + signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') + key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % ( policy, signature, key_pair_id) diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index 1a4227f..e9f4ed7 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor): webpage = self._download_webpage(url, title) title = self._html_search_meta('name', webpage) or self._og_search_title(webpage) config_url = self._html_search_regex( - r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"', + r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"', webpage, 'config URL') config_url = 'http://www.jeuxvideo.com' + config_url diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 7f946c6..317ebbc 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -338,7 +338,7 @@ class LivestreamOriginalIE(InfoExtractor): info = { 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), - 'thumbnail': self._search_regex(r'channelLogo.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None), + 'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None), } video_data = self._download_json(stream_url, content_id) is_live = video_data.get('isLive') diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py index 3c34d46..8eda69c 100644 --- a/youtube_dl/extractor/makertv.py +++ b/youtube_dl/extractor/makertv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class MakerTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' + _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' _TEST = { 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc', 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index 1885ac7..dbd761a 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -22,7 +22,7 @@ class MangomoloBaseIE(InfoExtractor): format_url = self._html_search_regex( [ - r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)', + r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)', r'<a[^>]+href="(rtsp://[^"]+)"' ], webpage, 'format url') formats = self._extract_wowza_formats( diff --git a/youtube_dl/extractor/meipai.py b/youtube_dl/extractor/meipai.py index c8eacb4..2445b8b 100644 --- a/youtube_dl/extractor/meipai.py +++ b/youtube_dl/extractor/meipai.py @@ -11,7 +11,7 @@ from ..utils import ( class MeipaiIE(InfoExtractor): IE_DESC = '美拍' - _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P<id>[0-9]+)' _TESTS = [{ # regular uploaded video 'url': 'http://www.meipai.com/media/531697625', diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 25af5dd..1154a35 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -258,7 +258,7 @@ class MTVServicesInfoExtractor(InfoExtractor): if mgid is None or ':' not in mgid: mgid = self._search_regex( - [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], + [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'], webpage, 'mgid', default=None) if not mgid: diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index 6bb64eb..367e811 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -160,7 +160,7 @@ class MyVideoIE(InfoExtractor): else: video_playpath = '' - video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj') + video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj') video_swfobj = compat_urllib_parse_unquote(video_swfobj) video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>", diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index b91d865..9e8d28f 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -111,7 +111,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE): release_url = self._search_regex( r'video_auth_playlist_url\s*=\s*"([^"]+)"', webpage, 'release url') - theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path') + theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path') video_id = theplatform_path.split('/')[-1] query = { 'mbr': 'true', diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index e813133..2047d44 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -43,7 +43,7 @@ class NaverIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', + m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"', webpage) if m_id is None: error = self._html_search_regex( diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index fa4ef20..b8fe244 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -469,7 +469,7 @@ class SchoolTVIE(NPODataMidEmbedIE): class HetKlokhuisIE(NPODataMidEmbedIE): IE_NAME = 'hetklokhuis' - _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)' _TEST = { 'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven', diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py index 2b830cf..3c8053a 100644 --- a/youtube_dl/extractor/ruhd.py +++ b/youtube_dl/extractor/ruhd.py @@ -25,7 +25,7 @@ class RUHDIE(InfoExtractor): video_url = self._html_search_regex( r'<param name="src" value="([^"]+)"', webpage, 'video url') title = self._html_search_regex( - r'<title>([^<]+) RUHD.ru - Видео Высокого качества №1 в России!', + r'([^<]+) RUHD\.ru - Видео Высокого качества №1 в России!', webpage, 'title') description = self._html_search_regex( r'(?s)

(.+?)', diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py index cce65fb..ae3dd13 100644 --- a/youtube_dl/extractor/stanfordoc.py +++ b/youtube_dl/extractor/stanfordoc.py @@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor): r'(?s)([^<]+)', coursepage, 'description', fatal=False) - links = orderedSet(re.findall(r'', coursepage)) + links = orderedSet(re.findall(r'', coursepage)) info['entries'] = [self.url_result( 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) ) for l in links] @@ -84,7 +84,7 @@ class StanfordOpenClassroomIE(InfoExtractor): rootpage = self._download_webpage(rootURL, info['id'], errnote='Unable to download course info page') - links = orderedSet(re.findall(r'', rootpage)) + links = orderedSet(re.findall(r'', rootpage)) info['entries'] = [self.url_result( 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) ) for l in links] diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index de236bb..b1a985f 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -216,7 +216,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): def hex_to_bytes(hex): return binascii.a2b_hex(hex.encode('ascii')) - relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1) + relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1) clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path)) checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() sig = flags + expiration_date + checksum + str_to_hex(sig_secret) diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 33683b1..dc3dd03 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -57,10 +57,10 @@ class ThisAVIE(InfoExtractor): info_dict = self._extract_jwplayer_data( webpage, video_id, require_title=False) uploader = self._html_search_regex( - r': ([^<]+)', + r': ([^<]+)', webpage, 'uploader name', fatal=False) uploader_id = self._html_search_regex( - r': (?:[^<]+)', + r': (?:[^<]+)', webpage, 'uploader id', fatal=False) info_dict.update({ diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 0df3ad7..1b0b963 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -174,7 +174,7 @@ class TwitterCardIE(TwitterBaseIE): webpage = self._download_webpage(url, video_id) iframe_url = self._html_search_regex( - r']+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', + r']+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', webpage, 'video iframe', default=None) if iframe_url: return self.url_result(iframe_url) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index b8b8bf9..bcc2869 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -198,7 +198,7 @@ class ViceShowIE(InfoExtractor): class ViceArticleIE(InfoExtractor): IE_NAME = 'vice:article' - _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P[^?#]+)' + _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P[^?#]+)' _TESTS = [{ 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah', diff --git a/youtube_dl/extractor/videopremium.py b/youtube_dl/extractor/videopremium.py index 5de8273..cf690d7 100644 --- a/youtube_dl/extractor/videopremium.py +++ b/youtube_dl/extractor/videopremium.py @@ -26,7 +26,7 @@ class VideoPremiumIE(InfoExtractor): webpage_url = 'http://videopremium.tv/' + video_id webpage = self._download_webpage(webpage_url, video_id) - if re.match(r'^]*>window.location\s*=', webpage): + if re.match(r'^]*>window\.location\s*=', webpage): # Download again, we need a cookie webpage = self._download_webpage( webpage_url, video_id, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index edd8713..54f5d72 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1683,7 +1683,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_uploader_id = None video_uploader_url = None mobj = re.search( - r'', + r'', video_webpage) if mobj is not None: video_uploader_id = mobj.group('uploader_id') -- cgit v1.1 From ae5af89079558e0e128dc4d7f033459e68ad81e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 9 Oct 2017 23:52:39 +0700 Subject: [hrti:playlist] Relax _VALID_URL --- youtube_dl/extractor/hrti.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hrti.py b/youtube_dl/extractor/hrti.py index 7cef5f6..6424d34 100644 --- a/youtube_dl/extractor/hrti.py +++ b/youtube_dl/extractor/hrti.py @@ -173,7 +173,7 @@ class HRTiIE(HRTiBaseIE): class HRTiPlaylistIE(HRTiBaseIE): - _VALID_URL = r'https?://hrti\.hrt\.hr/#/video/list/category/(?P[0-9]+)/(?P[^/]+)?' + _VALID_URL = r'https?://hrti\.hrt\.hr/(?:#/)?video/list/category/(?P[0-9]+)/(?P[^/]+)?' _TESTS = [{ 'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena', 'info_dict': { @@ -185,6 +185,9 @@ class HRTiPlaylistIE(HRTiBaseIE): }, { 'url': 'https://hrti.hrt.hr/#/video/list/category/212/', 'only_matching': True, + }, { + 'url': 'https://hrti.hrt.hr/video/list/category/212/ekumena', + 'only_matching': True, }] def _real_extract(self, url): -- cgit v1.1 From 9e71f88105e546573b8615d49f9d0c064496d6e6 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Mon, 9 Oct 2017 22:48:26 +0200 Subject: [vvvvid] Fix typo --- youtube_dl/extractor/vvvvid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py index d44ec85..656a4b9 100644 --- a/youtube_dl/extractor/vvvvid.py +++ b/youtube_dl/extractor/vvvvid.py @@ -133,7 +133,7 @@ class VVVVIDIE(InfoExtractor): 'season_id': season_id, 'season_number': video_data.get('season_number'), 'episode_id': str_or_none(video_data.get('id')), - 'epidode_number': int_or_none(video_data.get('number')), + 'episode_number': int_or_none(video_data.get('number')), 'episode_title': video_data['title'], 'view_count': int_or_none(video_data.get('views')), 'like_count': int_or_none(video_data.get('video_likes')), -- cgit v1.1 From 01c742ecd09be734ca4f3db08aba73424683ac1b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 10 Oct 2017 23:20:38 +0800 Subject: [facebook] Support thumbnails (closes #14416) --- ChangeLog | 6 ++++++ youtube_dl/extractor/facebook.py | 18 ++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2feb3dc..2684cbc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [facebook] Support thumbnails (#14416) + + version 2017.10.07 Core diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 4b3f6cc..220ada3 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -67,9 +67,9 @@ class FacebookIE(InfoExtractor): 'uploader': 'Tennis on Facebook', 'upload_date': '20140908', 'timestamp': 1410199200, - } + }, + 'skip': 'Requires logging in', }, { - 'note': 'Video without discernible title', 'url': 'https://www.facebook.com/video.php?v=274175099429670', 'info_dict': { 'id': '274175099429670', @@ -78,6 +78,7 @@ class FacebookIE(InfoExtractor): 'uploader': 'Asif Nawab Butt', 'upload_date': '20140506', 'timestamp': 1399398998, + 'thumbnail': r're:^https?://.*', }, 'expected_warnings': [ 'title' @@ -94,6 +95,7 @@ class FacebookIE(InfoExtractor): 'upload_date': '20160110', 'timestamp': 1452431627, }, + 'skip': 'Requires logging in', }, { 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570', 'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6', @@ -121,7 +123,11 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '10153664894881749', 'ext': 'mp4', - 'title': 'Facebook video #10153664894881749', + 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...', + 'thumbnail': r're:^https?://.*', + 'timestamp': 1456259628, + 'upload_date': '20160223', + 'uploader': 'Barack Obama', }, }, { # have 1080P, but only up to 720p in swf params @@ -130,10 +136,11 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '10155529876156509', 'ext': 'mp4', - 'title': 'Holocaust survivor becomes US citizen', + 'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...', 'timestamp': 1477818095, 'upload_date': '20161030', 'uploader': 'CNN', + 'thumbnail': r're:^https?://.*', }, }, { # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall @@ -158,6 +165,7 @@ class FacebookIE(InfoExtractor): 'timestamp': 1477305000, 'upload_date': '20161024', 'uploader': 'La Guía Del Varón', + 'thumbnail': r're:^https?://.*', }, 'params': { 'skip_download': True, @@ -376,6 +384,7 @@ class FacebookIE(InfoExtractor): timestamp = int_or_none(self._search_regex( r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) + thumbnail = self._og_search_thumbnail(webpage) info_dict = { 'id': video_id, @@ -383,6 +392,7 @@ class FacebookIE(InfoExtractor): 'formats': formats, 'uploader': uploader, 'timestamp': timestamp, + 'thumbnail': thumbnail, } return webpage, info_dict -- cgit v1.1 From d0f2d6411406a35c9593bbb85375c2d7f8300c77 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Tue, 10 Oct 2017 18:45:10 +0200 Subject: [slideslive] Add extractor (closes #2680) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/slideslive.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 youtube_dl/extractor/slideslive.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 24e9acd..d0f71ae 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -943,6 +943,7 @@ from .skynewsarabia import ( ) from .skysports import SkySportsIE from .slideshare import SlideshareIE +from .slideslive import SlidesLiveIE from .slutload import SlutloadIE from .smotri import ( SmotriIE, diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py new file mode 100644 index 0000000..1045760 --- /dev/null +++ b/youtube_dl/extractor/slideslive.py @@ -0,0 +1,34 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class SlidesLiveIE(InfoExtractor): + _VALID_URL = r'https?://slideslive\.com/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://slideslive.com/38902413/gcc-ia16-backend', + 'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f', + 'info_dict': { + 'id': 'LMtgR8ba0b0', + 'ext': 'mp4', + 'title': '38902413: external video', + 'description': '3890241320170925-9-1yd6ech.mp4', + 'uploader': 'SlidesLive Administrator', + 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', + 'upload_date': '20170925', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + url, video_id, headers={'Accept': 'application/json'}) + service_name = video_data['video_service_name'] + if service_name == 'YOUTUBE': + yt_video_id = video_data['video_service_id'] + return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id) + else: + raise ExtractorError( + 'Unsupported service name: {0}'.format(service_name), expected=True) -- cgit v1.1 From 04af3aca049588b6b3d4d4b57ee47224fdeee90f Mon Sep 17 00:00:00 2001 From: Khang Nguyen Date: Thu, 5 Oct 2017 21:37:18 +0700 Subject: Remove YoutubeSharedVideoIE https://github.com/rg3/youtube-dl/issues/14303 --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/youtube.py | 33 --------------------------------- 2 files changed, 34 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d0f71ae..d96eafb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1346,7 +1346,6 @@ from .youtube import ( YoutubeSearchDateIE, YoutubeSearchIE, YoutubeSearchURLIE, - YoutubeSharedVideoIE, YoutubeShowIE, YoutubeSubscriptionsIE, YoutubeTruncatedIDIE, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 54f5d72..6e2d57d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2040,39 +2040,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } -class YoutubeSharedVideoIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P[0-9A-Za-z_-]{11})' - IE_NAME = 'youtube:shared' - - _TEST = { - 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU', - 'info_dict': { - 'id': 'uPDB5I9wfp8', - 'ext': 'webm', - 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3', - 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d', - 'upload_date': '20160219', - 'uploader': 'Pocoyo - Português (BR)', - 'uploader_id': 'PocoyoBrazil', - }, - 'add_ie': ['Youtube'], - 'params': { - # There are already too many Youtube downloads - 'skip_download': True, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - real_video_id = self._html_search_meta( - 'videoId', webpage, 'YouTube video id', fatal=True) - - return self.url_result(real_video_id, YoutubeIE.ie_key()) - - class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com playlists' _VALID_URL = r"""(?x)(?: -- cgit v1.1 From dfc80bdd2e4ef3d30f161a93f99f3050537944ab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 11 Oct 2017 02:03:00 +0800 Subject: [ChangeLog] Update after #14420 --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index 2684cbc..c541c4f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +- [youtube:shared] Removed extractor (#14420) + [facebook] Support thumbnails (#14416) -- cgit v1.1 From cdab1df91242fb617b09a31c023822ef31ea37b8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 10:04:46 +0000 Subject: [afreecatv] remove AfreecaTVGlobalIE the website now show this message > Global AfreecaTV will be merged and integrated on July 20th, 2017. Every user around the world are now able to interact with one another on www.afreecatv.com! --- youtube_dl/extractor/afreecatv.py | 104 ------------------------------------- youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 1 insertion(+), 108 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 2c58f46..e6513c7 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -271,107 +271,3 @@ class AfreecaTVIE(InfoExtractor): }) return info - - -class AfreecaTVGlobalIE(AfreecaTVIE): - IE_NAME = 'afreecatv:global' - _VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P\d+)(?:/v/(?P\d+))?' - _TESTS = [{ - 'url': 'http://afreeca.tv/36853014/v/58301', - 'info_dict': { - 'id': '58301', - 'title': 'tryhard top100', - 'uploader_id': '36853014', - 'uploader': 'makgi Hearthstone Live!', - }, - 'playlist_count': 3, - }] - - def _real_extract(self, url): - channel_id, video_id = re.match(self._VALID_URL, url).groups() - video_type = 'video' if video_id else 'live' - query = { - 'pt': 'view', - 'bid': channel_id, - } - if video_id: - query['vno'] = video_id - video_data = self._download_json( - 'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type), - video_id or channel_id, query=query)['channel'] - - if video_data.get('result') != 1: - raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg'])) - - title = video_data['title'] - - info = { - 'thumbnail': video_data.get('thumb'), - 'view_count': int_or_none(video_data.get('vcnt')), - 'age_limit': int_or_none(video_data.get('grade')), - 'uploader_id': channel_id, - 'uploader': video_data.get('cname'), - } - - if video_id: - entries = [] - for i, f in enumerate(video_data.get('flist', [])): - video_key = self.parse_video_key(f.get('key', '')) - f_url = f.get('file') - if not video_key or not f_url: - continue - entries.append({ - 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), - 'title': title, - 'upload_date': video_key.get('upload_date'), - 'duration': int_or_none(f.get('length')), - 'url': f_url, - 'protocol': 'm3u8_native', - 'ext': 'mp4', - }) - - info.update({ - 'id': video_id, - 'title': title, - 'duration': int_or_none(video_data.get('length')), - }) - if len(entries) > 1: - info['_type'] = 'multi_video' - info['entries'] = entries - elif len(entries) == 1: - i = entries[0].copy() - i.update(info) - info = i - else: - formats = [] - for s in video_data.get('strm', []): - s_url = s.get('purl') - if not s_url: - continue - stype = s.get('stype') - if stype == 'HLS': - formats.extend(self._extract_m3u8_formats( - s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False)) - elif stype == 'RTMP': - format_id = [stype] - label = s.get('label') - if label: - format_id.append(label) - formats.append({ - 'format_id': '-'.join(format_id), - 'url': s_url, - 'tbr': int_or_none(s.get('bps')), - 'height': int_or_none(s.get('brt')), - 'ext': 'flv', - 'rtmp_live': True, - }) - self._sort_formats(formats) - - info.update({ - 'id': channel_id, - 'title': self._live_title(title), - 'is_live': True, - 'formats': formats, - }) - - return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d96eafb..a363d95 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -31,10 +31,7 @@ from .aenetworks import ( AENetworksIE, HistoryTopicIE, ) -from .afreecatv import ( - AfreecaTVIE, - AfreecaTVGlobalIE, -) +from .afreecatv import AfreecaTVIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE -- cgit v1.1 From 4fe4bda287f4b506850f0baa6ff86445423751e7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 11:36:05 +0000 Subject: [tubitv] add support for new url format(fixes #14460) --- youtube_dl/extractor/tubitv.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index c44018a..36f6c16 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -13,11 +13,11 @@ from ..utils import ( class TubiTvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P[0-9]+)' _LOGIN_URL = 'http://tubitv.com/login' _NETRC_MACHINE = 'tubitv' _GEO_COUNTRIES = ['US'] - _TEST = { + _TESTS = [{ 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', 'md5': '43ac06be9326f41912dc64ccf7a80320', 'info_dict': { @@ -27,7 +27,13 @@ class TubiTvIE(InfoExtractor): 'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.', 'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434', }, - } + }, { + 'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories', + 'only_matching': True, + }, { + 'url': 'http://tubitv.com/movies/383676/tracker', + 'only_matching': True, + }] def _login(self): (username, password) = self._get_login_info() -- cgit v1.1 From 5fe75f976f6c8da76bfea68e073e5f35c4300442 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 14:14:51 +0000 Subject: [tva] fix extraction(fixes #14328) --- youtube_dl/extractor/tva.py | 48 ++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py index 3ced098..b57abea 100644 --- a/youtube_dl/extractor/tva.py +++ b/youtube_dl/extractor/tva.py @@ -3,52 +3,50 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - int_or_none, - parse_iso8601, + float_or_none, smuggle_url, ) class TVAIE(InfoExtractor): - _VALID_URL = r'https?://videos\.tva\.ca/episode/(?P\d+)' + _VALID_URL = r'https?://videos\.tva\.ca/details/_(?P\d+)' _TEST = { - 'url': 'http://videos.tva.ca/episode/85538', + 'url': 'https://videos.tva.ca/details/_5596811470001', 'info_dict': { - 'id': '85538', + 'id': '5596811470001', 'ext': 'mp4', - 'title': 'Épisode du 25 janvier 2017', - 'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98', - 'upload_date': '20170126', - 'timestamp': 1485442329, + 'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !', + 'uploader_id': '5481942443001', + 'upload_date': '20171003', + 'timestamp': 1507064617, }, 'params': { # m3u8 download 'skip_download': True, } } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s' def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - "https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id, - video_id, query={ - '$expand': 'Metadata,CustomId', - '$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName', - '$format': 'json', + 'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={ + 'Accept': 'application/json', }) - metadata = video_data.get('Metadata', {}) + + def get_attribute(key): + for attribute in video_data.get('attributes', []): + if attribute.get('key') == key: + return attribute.get('value') + return None return { '_type': 'url_transparent', 'id': video_id, - 'title': video_data['Title'], - 'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}), - 'description': video_data.get('LongDescription') or video_data.get('ShortDescription'), - 'series': video_data.get('ShowName'), - 'episode': metadata.get('EpisodeTitle'), - 'episode_number': int_or_none(metadata.get('EpisodeNumber')), - 'categories': video_data.get('Categories'), - 'average_rating': video_data.get('AverageUserRating'), - 'timestamp': parse_iso8601(video_data.get('CreatedDate')), - 'ie_key': 'Ooyala', + 'title': get_attribute('title'), + 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}), + 'description': get_attribute('description'), + 'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'), + 'duration': float_or_none(get_attribute('video-duration'), 1000), + 'ie_key': 'BrightcoveNew', } -- cgit v1.1 From 26bae2d96509b5c5ec80c27c1ea754b53c53818c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 11 Oct 2017 21:59:30 +0700 Subject: [generic] Add support for channel9 embeds (closes #14469) --- youtube_dl/extractor/channel9.py | 6 ++++++ youtube_dl/extractor/generic.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index e928942..81108e7 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -81,6 +81,12 @@ class Channel9IE(InfoExtractor): _RSS_URL = 'http://channel9.msdn.com/%s/RSS' + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b', + webpage) + def _extract_list(self, video_id, rss_url=None): if not rss_url: rss_url = self._RSS_URL % video_id diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 68b6338..6dab4c7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -101,6 +101,7 @@ from .mediaset import MediasetIE from .joj import JojIE from .megaphone import MegaphoneIE from .vzaar import VzaarIE +from .channel9 import Channel9IE class GenericIE(InfoExtractor): @@ -2871,6 +2872,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key()) + channel9_urls = Channel9IE._extract_urls(webpage) + if channel9_urls: + return self.playlist_from_matches( + channel9_urls, video_id, video_title, ie=Channel9IE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): -- cgit v1.1 From 782195a9d417aab21cff4abe35ad9d705f4d8d83 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 15:48:32 +0000 Subject: [once] add support for vmap urls --- youtube_dl/extractor/gamespot.py | 2 +- youtube_dl/extractor/once.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 00d3111..02804d2 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -105,7 +105,7 @@ class GameSpotIE(OnceIE): onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') if onceux_url: formats.extend(self._extract_once_formats(re.sub( - r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', ''))) + r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url))) if not formats: for quality in ['sd', 'hd']: diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py index 1bf96ea..a637c8e 100644 --- a/youtube_dl/extractor/once.py +++ b/youtube_dl/extractor/once.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class OnceIE(InfoExtractor): - _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/[^/]+/[^/]+/(?P[^/]+)/(?P[^/]+)/(?:[^/]+/)?(?P[^/]+)/content\.(?:once|m3u8|mp4)' + _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P[^/]+)/(?P[^/]+)/(?:[^/]+/)?(?P[^/]+)/content\.(?:once|m3u8|mp4)' ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8' PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4' -- cgit v1.1 From 9e38dbb19ca6874c7350647ea2883d5bbb3b50a1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 15:50:00 +0000 Subject: [voxmedia] add support for recode.net(fixes #14173) --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/voxmedia.py | 66 ++++++++++++++++++++++++++++++-------- 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a363d95..5629c76 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1244,7 +1244,10 @@ from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicerepublic import VoiceRepublicIE from .voot import VootIE -from .voxmedia import VoxMediaIE +from .voxmedia import ( + VoxMediaVolumeIE, + VoxMediaIE, +) from .vporn import VpornIE from .vrt import VRTIE from .vrak import VrakIE diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index f8e3314..c7a0a88 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -2,11 +2,44 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .once import OnceIE from ..compat import compat_urllib_parse_unquote +from ..utils import ExtractorError + + +class VoxMediaVolumeIE(OnceIE): + _VALID_URL = r'https?://volume\.vox-cdn\.com/embed/(?P[0-9a-f]{9})' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_data = self._parse_json(self._search_regex( + r'Volume\.createVideo$({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}$;', webpage, 'video data'), video_id) + for provider_video_type in ('ooyala', 'youtube', 'brightcove'): + provider_video_id = video_data.get('%s_id' % provider_video_type) + if not provider_video_id: + continue + info = { + 'id': video_id, + 'title': video_data.get('title_short'), + 'description': video_data.get('description_long') or video_data.get('description_short'), + 'thumbnail': video_data.get('brightcove_thumbnail') + } + if provider_video_type == 'brightcove': + info['formats'] = self._extract_once_formats(provider_video_id) + self._sort_formats(info['formats']) + else: + info.update({ + '_type': 'url_transparent', + 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id), + 'ie_key': provider_video_type.capitalize(), + }) + return info + raise ExtractorError('Unable to find provider video id') class VoxMediaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P[^/?]+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com|recode\.net)/(?:[^/]+/)*(?P[^/?]+)' _TESTS = [{ 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of', 'info_dict': { @@ -31,6 +64,7 @@ class VoxMediaIE(InfoExtractor): 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', }, 'add_ie': ['Ooyala'], + 'skip': 'Video Not Found', }, { # volume embed 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', @@ -84,6 +118,17 @@ class VoxMediaIE(InfoExtractor): 'description': 'md5:e02d56b026d51aa32c010676765a690d', }, }], + }, { + # volume embed, Brightcove Once + 'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya', + 'md5': '01571a896281f77dc06e084138987ea2', + 'info_dict': { + 'id': '1231c973d', + 'ext': 'mp4', + 'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella', + 'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.', + }, + 'add_ie': ['VoxMediaVolume'], }] def _real_extract(self, url): @@ -91,9 +136,14 @@ class VoxMediaIE(InfoExtractor): webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id)) def create_entry(provider_video_id, provider_video_type, title=None, description=None): + video_url = { + 'youtube': '%s', + 'ooyala': 'ooyala:%s', + 'volume': 'http://volume.vox-cdn.com/embed/%s', + }[provider_video_type] % provider_video_id return { '_type': 'url_transparent', - 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id), + 'url': video_url, 'title': title or self._og_search_title(webpage), 'description': description or self._og_search_description(webpage), } @@ -124,17 +174,7 @@ class VoxMediaIE(InfoExtractor): volume_uuid = self._search_regex( r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None) if volume_uuid: - volume_webpage = self._download_webpage( - 'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid) - video_data = self._parse_json(self._search_regex( - r'Volume\.createVideo$({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}$;', volume_webpage, 'video data'), volume_uuid) - for provider_video_type in ('ooyala', 'youtube'): - provider_video_id = video_data.get('%s_id' % provider_video_type) - if provider_video_id: - description = video_data.get('description_long') or video_data.get('description_short') - entries.append(create_entry( - provider_video_id, provider_video_type, video_data.get('title_short'), description)) - break + entries.append(create_entry(volume_uuid, 'volume')) if len(entries) == 1: return entries[0] -- cgit v1.1 From af0f74288dc1b46147bc8f6b5692d2a21c6e178b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 11 Oct 2017 23:45:03 +0700 Subject: [YoutubeDL] Improve _default_format_spec (closes #14461) --- test/test_YoutubeDL.py | 10 ++++++++-- youtube_dl/YoutubeDL.py | 27 ++++++++++++++++----------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index db936bf..4af92fb 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -466,12 +466,18 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL({'simulate': True}) self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') + ydl = YDL({'is_live': True}) + self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') + + ydl = YDL({'simulate': True, 'is_live': True}) + self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') + ydl = YDL({'outtmpl': '-'}) - self.assertEqual(ydl._default_format_spec({}), 'best') + self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') ydl = YDL({}) self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best') - self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best') + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') class TestYoutubeDL(unittest.TestCase): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 855d6b8..342d6b4 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1078,22 +1078,27 @@ class YoutubeDL(object): return _filter def _default_format_spec(self, info_dict, download=True): - req_format_list = [] - def can_have_partial_formats(): + def can_merge(): + merger = FFmpegMergerPP(self) + return merger.available and merger.can_merge() + + def prefer_best(): if self.params.get('simulate', False): - return True + return False if not download: - return True - if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': return False + if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': + return True if info_dict.get('is_live'): - return False - merger = FFmpegMergerPP(self) - return merger.available and merger.can_merge() - if can_have_partial_formats(): - req_format_list.append('bestvideo+bestaudio') - req_format_list.append('best') + return True + if not can_merge(): + return True + return False + + req_format_list = ['bestvideo+bestaudio', 'best'] + if prefer_best(): + req_format_list.reverse() return '/'.join(req_format_list) def build_format_selector(self, format_spec): -- cgit v1.1 From 694b61545cc3fa37c31d5f62d26101fb2620a01d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 Oct 2017 00:41:20 +0700 Subject: [nexx] Add support for shortcuts and relax domain id extraction --- youtube_dl/extractor/nexx.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index d0235fd..071879b 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -18,7 +18,13 @@ from ..utils import ( class NexxIE(InfoExtractor): - _VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P\d+)/videos/byid/(?P\d+)' + _VALID_URL = r'''(?x) + (?: + https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P\d+)/videos/byid/| + nexx:(?P\d+): + ) + (?P\d+) + ''' _TESTS = [{ # movie 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907', @@ -62,9 +68,19 @@ class NexxIE(InfoExtractor): }, { 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907', 'only_matching': True, + }, { + 'url': 'nexx:748:128907', + 'only_matching': True, }] @staticmethod + def _extract_domain_id(webpage): + mobj = re.search( + r']+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P\d+)', + webpage) + return mobj.group('id') if mobj else None + + @staticmethod def _extract_urls(webpage): # Reference: # 1. https://nx-s.akamaized.net/files/201510/44.pdf @@ -72,11 +88,8 @@ class NexxIE(InfoExtractor): entries = [] # JavaScript Integration - mobj = re.search( - r']+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P\d+)', - webpage) - if mobj: - domain_id = mobj.group('id') + domain_id = NexxIE._extract_domain_id(webpage) + if domain_id: for video_id in re.findall( r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)', webpage): @@ -112,7 +125,8 @@ class NexxIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - domain_id, video_id = mobj.group('domain_id', 'id') + domain_id = mobj.group('domain_id') or mobj.group('domain_id_s') + video_id = mobj.group('id') # Reverse engineered from JS code (see getDeviceID function) device_id = '%d:%d:%d%d' % ( -- cgit v1.1 From ff3f1a62f087332fa6409b5cbc39871d49e74f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 Oct 2017 00:44:13 +0700 Subject: [funk] Add extractor (closes #14464) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/funk.py | 43 ++++++++++++++++++++++++++++++++++++++ youtube_dl/extractor/generic.py | 16 -------------- 3 files changed, 44 insertions(+), 16 deletions(-) create mode 100644 youtube_dl/extractor/funk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5629c76..ecb33bc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -381,6 +381,7 @@ from .freesound import FreesoundIE from .freespeech import FreespeechIE from .freshlive import FreshLiveIE from .funimation import FunimationIE +from .funk import FunkIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py new file mode 100644 index 0000000..ce5c67f --- /dev/null +++ b/youtube_dl/extractor/funk.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .nexx import NexxIE +from ..utils import extract_attributes + + +class FunkIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P[^?/#]+)' + _TESTS = [{ + 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/', + 'md5': '4d40974481fa3475f8bccfd20c5361f8', + 'info_dict': { + 'id': '716599', + 'ext': 'mp4', + 'title': 'Neue Rechte Welle', + 'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69', + 'timestamp': 1501337639, + 'upload_date': '20170729', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, { + 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + domain_id = NexxIE._extract_domain_id(webpage) or '741' + nexx_id = extract_attributes(self._search_regex( + r'(]id=["\']mediaplayer-funk[^>]+>)', + webpage, 'media player'))['data-id'] + + return self.url_result( + 'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(), + video_id=nexx_id) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6dab4c7..39630b6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1613,22 +1613,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['BrightcoveLegacy'], }, - # Nexx embed - { - 'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503', - 'info_dict': { - 'id': '247746', - 'ext': 'mp4', - 'title': "Yesterday's Jam (OV)", - 'description': 'md5:09bc0984723fed34e2581624a84e05f0', - 'timestamp': 1492594816, - 'upload_date': '20170419', - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - }, # Facebook