From 32ad8acade51269177cd01c5aa3c7169557eeac8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 8 Nov 2017 04:34:01 -0200 Subject: New upstream version 2017.11.06 --- ChangeLog | 63 +++++++++- README.md | 2 + README.txt | 2 + docs/supportedsites.md | 10 +- test/test_InfoExtractor.py | 26 ++++ test/test_utils.py | 1 + youtube-dl | Bin 1588299 -> 1594421 bytes youtube_dl/downloader/f4m.py | 25 ++-- youtube_dl/downloader/fragment.py | 2 +- youtube_dl/downloader/hls.py | 1 + youtube_dl/extractor/arte.py | 15 ++- youtube_dl/extractor/azmedien.py | 2 +- youtube_dl/extractor/canvas.py | 146 +++++++++++++++++++++- youtube_dl/extractor/common.py | 33 +++-- youtube_dl/extractor/dctp.py | 68 ++++++++--- youtube_dl/extractor/dramafever.py | 2 +- youtube_dl/extractor/drtv.py | 1 + youtube_dl/extractor/egghead.py | 79 +++++++++--- youtube_dl/extractor/eporner.py | 5 +- youtube_dl/extractor/extractors.py | 18 ++- youtube_dl/extractor/fxnetworks.py | 23 ++-- youtube_dl/extractor/gamespot.py | 9 +- youtube_dl/extractor/gigya.py | 22 ++++ youtube_dl/extractor/hotstar.py | 97 ++++++++++++--- youtube_dl/extractor/medialaan.py | 17 +-- youtube_dl/extractor/megaphone.py | 2 +- youtube_dl/extractor/myvideo.py | 177 --------------------------- youtube_dl/extractor/nbc.py | 8 +- youtube_dl/extractor/ndtv.py | 105 ++++++++++++---- youtube_dl/extractor/nick.py | 34 +++++- youtube_dl/extractor/niconico.py | 29 ++++- youtube_dl/extractor/parliamentliveuk.py | 8 +- youtube_dl/extractor/pbs.py | 6 +- youtube_dl/extractor/servus.py | 43 +++++++ youtube_dl/extractor/skysports.py | 21 +++- youtube_dl/extractor/soundcloud.py | 2 +- youtube_dl/extractor/soundgasm.py | 35 ++++-- youtube_dl/extractor/spankbang.py | 5 + youtube_dl/extractor/twentytwotracks.py | 86 ------------- youtube_dl/extractor/twitch.py | 2 +- youtube_dl/extractor/unity.py | 32 +++++ youtube_dl/extractor/vimeo.py | 2 +- youtube_dl/extractor/younow.py | 202 +++++++++++++++++++++++++++++++ youtube_dl/extractor/youtube.py | 15 ++- youtube_dl/utils.py | 12 +- youtube_dl/version.py | 2 +- 46 files changed, 1058 insertions(+), 439 deletions(-) create mode 100644 youtube_dl/extractor/gigya.py delete mode 100644 youtube_dl/extractor/myvideo.py create mode 100644 youtube_dl/extractor/servus.py delete mode 100644 youtube_dl/extractor/twentytwotracks.py create mode 100644 youtube_dl/extractor/unity.py create mode 100644 youtube_dl/extractor/younow.py diff --git a/ChangeLog b/ChangeLog index d728e4d..8af3682 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,64 @@ +version 2017.11.06 + +Core ++ [extractor/common] Add protocol for f4m formats +* [f4m] Prefer baseURL for relative URLs (#14660) +* [extractor/common] Respect URL query in _extract_wowza_formats (14645) + +Extractors ++ [hotstar:playlist] Add support for playlists (#12465) +* [hotstar] Bypass geo restriction (#14672) +- [22tracks] Remove extractor (#11024, #14628) ++ [skysport] Sdd support ooyala videos protected with embed_token (#14641) +* [gamespot] Extract formats referenced with new data fields (#14652) +* [spankbang] Detect unavailable videos (#14644) + + +version 2017.10.29 + +Core +* [extractor/common] Prefix format id for audio only HLS formats ++ [utils] Add support for zero years and months in parse_duration + +Extractors +* [egghead] Fix extraction (#14388) ++ [fxnetworks] Extract series metadata (#14603) ++ [younow] Add support for younow.com (#9255, #9432, #12436) +* [dctptv] Fix extraction (#14599) +* [youtube] Restrict embed regex (#14600) +* [vimeo] Restrict iframe embed regex (#14600) +* [soundgasm] Improve extraction (#14588) +- [myvideo] Remove extractor (#8557) ++ [nbc] Add support for classic-tv videos (#14575) ++ [vrtnu] Add support for cookies authentication and simplify (#11873) ++ [canvas] Add support for vrt.be/vrtnu (#11873) +* [twitch:clips] Fix title extraction (#14566) ++ [ndtv] Add support for sub-sites (#14534) +* [dramafever] Fix login error message extraction ++ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt, + ro, hu) (#14553) + + +version 2017.10.20 + +Core +* [downloader/fragment] Report warning instead of error on inconsistent + download state +* [downloader/hls] Fix total fragments count when ad fragments exist + +Extractors +* [parliamentliveuk] Fix extraction (#14524) +* [soundcloud] Update client id (#14546) ++ [servus] Add support for servus.com (#14362) ++ [unity] Add support for unity3d.com (#14528) +* [youtube] Replace youtube redirect URLs in description (#14517) +* [pbs] Restrict direct video URL regular expression (#14519) +* [drtv] Respect preference for direct HTTP formats (#14509) ++ [eporner] Add support for embed URLs (#14507) +* [arte] Capture and output error message +* [niconico] Improve uploader metadata extraction robustness (#14135) + + version 2017.10.15.1 Core @@ -834,7 +895,7 @@ version 2017.04.14 Core + [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955) -+ [adobepass] Improve Comcast and Verison login code (#10803) ++ [adobepass] Improve Comcast and Verizon login code (#10803) + [adobepass] Add support for Verizon (#10803) Extractors diff --git a/README.md b/README.md index 2879aad..ea321d5 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl) + youtube-dl - download videos from youtube.com or other video platforms - [INSTALLATION](#installation) diff --git a/README.txt b/README.txt index a42d837..4b7adfd 100644 --- a/README.txt +++ b/README.txt @@ -1,3 +1,5 @@ +[Build Status] + youtube-dl - download videos from youtube.com or other video platforms - INSTALLATION diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7071450..6009df5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -3,8 +3,6 @@ - **1up.com** - **20min** - **220.ro** - - **22tracks:genre** - - **22tracks:track** - **24video** - **3qsdn**: 3Q SDN - **3sat** @@ -342,6 +340,7 @@ - **HornBunny** - **HotNewHipHop** - **HotStar** + - **hotstar:playlist** - **Howcast** - **HowStuffWorks** - **HRTi** @@ -498,7 +497,6 @@ - **MySpace:album** - **MySpass** - **Myvi** - - **myvideo** (Currently broken) - **MyVidster** - **n-tv.de** - **natgeo** @@ -728,6 +726,7 @@ - **SenateISVP** - **SendtoNews** - **ServingSys** + - **Servus** - **Sexu** - **Shahid** - **Shared**: shared.sx @@ -887,6 +886,7 @@ - **UDNEmbed**: 聯合影音 - **UKTVPlay** - **Unistra** + - **Unity** - **uol.com.br** - **uplynk** - **uplynk:preplay** @@ -975,6 +975,7 @@ - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Vrak** - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be + - **VrtNU**: VrtNU.be - **vrv** - **vrv:series** - **VShare** @@ -1033,6 +1034,9 @@ - **YouJizz** - **youku**: 优酷 - **youku:show** + - **YouNowChannel** + - **YouNowLive** + - **YouNowMoment** - **YouPorn** - **YourUpload** - **youtube**: YouTube.com diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index f18a823..686c63e 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -574,6 +574,32 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + def test_parse_f4m_formats(self): + _TEST_CASES = [ + ( + # https://github.com/rg3/youtube-dl/issues/14660 + 'custom_base_url', + 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', + [{ + 'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', + 'ext': 'flv', + 'format_id': '2148', + 'protocol': 'f4m', + 'tbr': 2148, + 'width': 1280, + 'height': 720, + }] + ), + ] + + for f4m_file, f4m_url, expected_formats in _TEST_CASES: + with io.open('./test/testdata/f4m/%s.f4m' % f4m_file, + mode='r', encoding='utf-8') as f: + formats = self.ie._parse_f4m_formats( + compat_etree_fromstring(f.read().encode('utf-8')), + f4m_url, None) + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index efa73d0..cc13f79 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -540,6 +540,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('87 Min.'), 5220) self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) self.assertEqual(parse_duration('PT00H03M30SZ'), 210) + self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88) def test_fix_xml_ampersands(self): self.assertEqual( diff --git a/youtube-dl b/youtube-dl index 15c016a..3b69288 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index c8fde9a..fdb80f4 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -243,8 +243,17 @@ def remove_encrypted_media(media): media)) -def _add_ns(prop): - return '{http://ns.adobe.com/f4m/1.0}%s' % prop +def _add_ns(prop, ver=1): + return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) + + +def get_base_url(manifest): + base_url = xpath_text( + manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)], + 'base URL', default=None) + if base_url: + base_url = base_url.strip() + return base_url class F4mFD(FragmentFD): @@ -330,13 +339,13 @@ class F4mFD(FragmentFD): rate, media = list(filter( lambda f: int(f[0]) == requested_bitrate, formats))[0] - base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) + # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. + man_base_url = get_base_url(doc) or man_url + + base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url']) bootstrap_node = doc.find(_add_ns('bootstrapInfo')) - # From Adobe F4M 3.0 spec: - # The element SHALL be the base URL for all relative - # (HTTP-based) URLs in the manifest. If is not present, said - # URLs should be relative to the location of the containing document. - boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url) + boot_info, bootstrap_url = self._parse_bootstrap_node( + bootstrap_node, man_base_url) live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 7e891b9..93002e4 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -158,7 +158,7 @@ class FragmentFD(FileDownloader): if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): self._read_ytdl_file(ctx) if ctx['fragment_index'] > 0 and resume_len == 0: - self.report_error( + self.report_warning( 'Inconsistent state of incomplete fragment download. ' 'Restarting from the beginning...') ctx['fragment_index'] = resume_len = 0 diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 7955ca5..1a6e226 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -88,6 +88,7 @@ class HlsFD(FragmentFD): if line.startswith('#'): if anvato_ad(line): ad_frags += 1 + ad_frag_next = True continue if ad_frag_next: ad_frag_next = False diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 5cde90c..ffc3218 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, + compat_str, compat_urllib_parse_urlparse, ) from ..utils import ( @@ -15,6 +16,7 @@ from ..utils import ( int_or_none, NO_DEFAULT, qualities, + try_get, unified_strdate, ) @@ -80,12 +82,15 @@ class ArteTVBaseIE(InfoExtractor): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] - vsr = player_info['VSR'] - + vsr = try_get(player_info, lambda x: x['VSR'], dict) if not vsr: - raise ExtractorError( - 'Video %s is not available' % player_info.get('VID') or video_id, - expected=True) + error = None + if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error': + error = try_get( + player_info, lambda x: x['custom_msg']['msg'], compat_str) + if not error: + error = 'Video %s is not available' % player_info.get('VID') or video_id + raise ExtractorError(error, expected=True) upload_date_str = player_info.get('shootingDate') if not upload_date_str: diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index f4e07d9..68f26e2 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -47,7 +47,7 @@ class AZMedienIE(AZMedienBaseIE): 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', 'info_dict': { 'id': '1_2444peh4', - 'ext': 'mov', + 'ext': 'mp4', 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', 'uploader_id': 'TeleZ?ri', diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index 6899f84..3faa760 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -1,16 +1,22 @@ from __future__ import unicode_literals import re +import json from .common import InfoExtractor +from .gigya import GigyaBaseIE +from ..compat import compat_HTTPError from ..utils import ( - float_or_none, + ExtractorError, strip_or_none, + float_or_none, + int_or_none, + parse_iso8601, ) class CanvasIE(InfoExtractor): - _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?Pcanvas|een|ketnet)/assets/(?Pm[dz]-ast-[^/?#&]+)' + _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?Pcanvas|een|ketnet|vrtvideo)/assets/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', 'md5': '90139b746a0a9bd7bb631283f6e2a64e', @@ -166,3 +172,139 @@ class CanvasEenIE(InfoExtractor): 'title': title, 'description': self._og_search_description(webpage), } + + +class VrtNUIE(GigyaBaseIE): + IE_DESC = 'VrtNU.be' + _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?Pvrtnu)/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/', + 'info_dict': { + 'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', + 'ext': 'flv', + 'title': 'De zwarte weduwe', + 'description': 'md5:d90c21dced7db869a85db89a623998d4', + 'duration': 1457.04, + 'thumbnail': r're:^https?://.*\.jpg$', + 'season': '1', + 'season_number': 1, + 'episode_number': 1, + }, + 'skip': 'This video is only available for registered users' + }] + _NETRC_MACHINE = 'vrtnu' + _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' + _CONTEXT_ID = 'R3595707040' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + auth_data = { + 'APIKey': self._APIKEY, + 'targetEnv': 'jssdk', + 'loginID': username, + 'password': password, + 'authMode': 'cookie', + } + + auth_info = self._gigya_login(auth_data) + + # Sometimes authentication fails for no good reason, retry + login_attempt = 1 + while login_attempt <= 3: + try: + # When requesting a token, no actual token is returned, but the + # necessary cookies are set. + self._request_webpage( + 'https://token.vrt.be', + None, note='Requesting a token', errnote='Could not get a token', + headers={ + 'Content-Type': 'application/json', + 'Referer': 'https://www.vrt.be/vrtnu/', + }, + data=json.dumps({ + 'uid': auth_info['UID'], + 'uidsig': auth_info['UIDSignature'], + 'ts': auth_info['signatureTimestamp'], + 'email': auth_info['profile']['email'], + }).encode('utf-8')) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + login_attempt += 1 + self.report_warning('Authentication failed') + self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again') + else: + raise e + else: + break + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_regex( + r'(?ms)

(.+?)

', + webpage, 'title').strip() + + description = self._html_search_regex( + r'(?ms)
(.+?)
', + webpage, 'description', default=None) + + season = self._html_search_regex( + [r'''(?xms)\s* + seizoen\ (.+?)\s* + ''', + r'