From b21ab85088345323d1e6d988b2cdce8e02fe6bdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Oct 2017 01:46:05 +0700 Subject: [scrippsnetworks:watch] Fix extraction (closes #14389) --- youtube_dl/extractor/scrippsnetworks.py | 211 +++++++++++++++++++++++++------- 1 file changed, 167 insertions(+), 44 deletions(-) (limited to 'youtube_dl/extractor/scrippsnetworks.py') diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py index 597d6f5..30bb31d 100644 --- a/youtube_dl/extractor/scrippsnetworks.py +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -1,60 +1,183 @@ # coding: utf-8 from __future__ import unicode_literals -from .adobepass import AdobePassIE +import datetime +import json +import hashlib +import hmac +import re + +from .common import InfoExtractor +from .anvato import AnvatoIE from ..utils import ( - int_or_none, - smuggle_url, - update_url_query, + urlencode_postdata, + xpath_text, ) -class ScrippsNetworksWatchIE(AdobePassIE): +class ScrippsNetworksWatchIE(InfoExtractor): IE_NAME = 'scrippsnetworks:watch' - _VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P\d+)' - _TEST = { - 'url': 'http://watch.hgtv.com/player.HNT.html#0256538', + _VALID_URL = r'''(?x) + https?:// + watch\. + (?Phgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/ + (?: + player\.[A-Z0-9]+\.html\#| + show/(?:[^/]+/){2} + ) + (?P\d+) + ''' + _TESTS = [{ + 'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/', 'md5': '26545fd676d939954c6808274bdb905a', 'info_dict': { - 'id': '0256538', + 'id': '4173834', 'ext': 'mp4', - 'title': 'Seeking a Wow House', - 'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.', - 'uploader': 'SCNI', - 'upload_date': '20170207', - 'timestamp': 1486450493, + 'title': 'Best Ever Treehouses', + 'description': "We're searching for the most over the top treehouses.", + 'uploader': 'ANV', + 'upload_date': '20170922', + 'timestamp': 1506056400, + }, + 'params': { + 'skip_download': True, }, - 'skip': 'requires TV provider authentication', + 'add_ie': [AnvatoIE.ie_key()], + }, { + 'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/', + 'only_matching': True, + }, { + 'url': 'http://watch.diynetwork.com/player.HNT.html#2656646', + 'only_matching': True, + }] + + _SNI_TABLE = { + 'hgtv': 'hgtv', + 'diynetwork': 'diy', + 'foodnetwork': 'food', + 'cookingchanneltv': 'cook', + 'travelchannel': 'trav', + 'geniuskitchen': 'geniuskitchen', } + _SNI_HOST = 'web.api.video.snidigital.com' + + _AWS_REGION = 'us-east-1' + _AWS_IDENTITY_ID_JSON = json.dumps({ + 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION + }) + _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' + _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' + _AWS_SERVICE = 'execute-api' + _AWS_REQUEST = 'aws4_request' + _AWS_SIGNED_HEADERS = ';'.join([ + 'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key']) + _AWS_CANONICAL_REQUEST_TEMPLATE = '''GET +%(uri)s + +host:%(host)s +x-amz-date:%(date)s +x-amz-security-token:%(token)s +x-api-key:%(key)s + +%(signed_headers)s +%(payload_hash)s''' def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - channel = self._parse_json(self._search_regex( - r'"channels"\s*:\s*(\[.+\])', - webpage, 'channels'), video_id)[0] - video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id) - title = video_data['title'] - release_url = video_data['releaseUrl'] - if video_data.get('restricted'): - requestor_id = self._search_regex( - r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id') - resource = self._get_mvpd_resource( - requestor_id, title, video_id, - video_data.get('ratings', [{}])[0].get('rating')) - auth = self._extract_mvpd_auth( - url, video_id, requestor_id, resource) - release_url = update_url_query(release_url, {'auth': auth}) - - return { - '_type': 'url_transparent', - 'id': video_id, - 'title': title, - 'url': smuggle_url(release_url, {'force_smil_url': True}), - 'description': video_data.get('description'), - 'thumbnail': video_data.get('thumbnailUrl'), - 'series': video_data.get('showTitle'), - 'season_number': int_or_none(video_data.get('season')), - 'episode_number': int_or_none(video_data.get('episodeNumber')), - 'ie_key': 'ThePlatform', + mobj = re.match(self._VALID_URL, url) + site_id, video_id = mobj.group('site', 'id') + + def aws_hash(s): + return hashlib.sha256(s.encode('utf-8')).hexdigest() + + token = self._download_json( + 'https://cognito-identity.us-east-1.amazonaws.com/', video_id, + data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'), + headers={ + 'Accept': '*/*', + 'Content-Type': 'application/x-amz-json-1.1', + 'Referer': url, + 'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON), + 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', + 'X-Amz-User-Agent': self._AWS_USER_AGENT, + })['Token'] + + sts = self._download_xml( + 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({ + 'Action': 'AssumeRoleWithWebIdentity', + 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role', + 'RoleSessionName': 'web-identity', + 'Version': '2011-06-15', + 'WebIdentityToken': token, + }), headers={ + 'Referer': url, + 'X-Amz-User-Agent': self._AWS_USER_AGENT, + 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', + }) + + def get(key): + return xpath_text( + sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, + fatal=True) + + access_key_id = get('AccessKeyId') + secret_access_key = get('SecretAccessKey') + session_token = get('SessionToken') + + # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html + uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id) + datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') + date = datetime_now[:8] + canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % { + 'uri': uri, + 'host': self._SNI_HOST, + 'date': datetime_now, + 'token': session_token, + 'key': self._AWS_API_KEY, + 'signed_headers': self._AWS_SIGNED_HEADERS, + 'payload_hash': aws_hash(''), } + + # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html + credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]) + string_to_sign = '\n'.join([ + 'AWS4-HMAC-SHA256', datetime_now, credential_string, + aws_hash(canonical_string)]) + + # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html + def aws_hmac(key, msg): + return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) + + def aws_hmac_digest(key, msg): + return aws_hmac(key, msg).digest() + + def aws_hmac_hexdigest(key, msg): + return aws_hmac(key, msg).hexdigest() + + k_secret = 'AWS4' + secret_access_key + k_date = aws_hmac_digest(k_secret.encode('utf-8'), date) + k_region = aws_hmac_digest(k_date, self._AWS_REGION) + k_service = aws_hmac_digest(k_region, self._AWS_SERVICE) + k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST) + + signature = aws_hmac_hexdigest(k_signing, string_to_sign) + + auth_header = ', '.join([ + 'AWS4-HMAC-SHA256 Credential=%s' % '/'.join( + [access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]), + 'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS, + 'Signature=%s' % signature, + ]) + + mcp_id = self._download_json( + 'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={ + 'Accept': '*/*', + 'Referer': url, + 'Authorization': auth_header, + 'X-Amz-Date': datetime_now, + 'X-Amz-Security-Token': session_token, + 'X-Api-Key': self._AWS_API_KEY, + })['results'][0]['mcpId'] + + return self.url_result( + 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, + AnvatoIE.ie_key(), video_id=mcp_id) -- cgit v1.1 From 57eb45b11110bdc6d99b02b738129565239a12b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Oct 2017 02:01:16 +0700 Subject: [scrippsnetworks:watch] Add support for geniuskitchen.com --- youtube_dl/extractor/scrippsnetworks.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'youtube_dl/extractor/scrippsnetworks.py') diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py index 30bb31d..411fef8 100644 --- a/youtube_dl/extractor/scrippsnetworks.py +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -20,10 +20,11 @@ class ScrippsNetworksWatchIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// watch\. - (?Phgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/ + (?Phgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/ (?: player\.[A-Z0-9]+\.html\#| - show/(?:[^/]+/){2} + show/(?:[^/]+/){2}| + player/ ) (?P\d+) ''' @@ -49,6 +50,9 @@ class ScrippsNetworksWatchIE(InfoExtractor): }, { 'url': 'http://watch.diynetwork.com/player.HNT.html#2656646', 'only_matching': True, + }, { + 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/', + 'only_matching': True, }] _SNI_TABLE = { @@ -57,7 +61,7 @@ class ScrippsNetworksWatchIE(InfoExtractor): 'foodnetwork': 'food', 'cookingchanneltv': 'cook', 'travelchannel': 'trav', - 'geniuskitchen': 'geniuskitchen', + 'geniuskitchen': 'genius', } _SNI_HOST = 'web.api.video.snidigital.com' -- cgit v1.1 From 4827270526621cdabe74275deb38b04c2ef1b0d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Oct 2017 06:11:35 +0700 Subject: [scrippsnetworks:watch] Bypass geo restriction --- youtube_dl/extractor/scrippsnetworks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/scrippsnetworks.py') diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py index 411fef8..b446a02 100644 --- a/youtube_dl/extractor/scrippsnetworks.py +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -10,6 +10,7 @@ import re from .common import InfoExtractor from .anvato import AnvatoIE from ..utils import ( + smuggle_url, urlencode_postdata, xpath_text, ) @@ -183,5 +184,7 @@ x-api-key:%(key)s })['results'][0]['mcpId'] return self.url_result( - 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, + smuggle_url( + 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, + {'geo_countries': ['US']}), AnvatoIE.ie_key(), video_id=mcp_id) -- cgit v1.1 From 78466fcab519d1b92fd9846bc8073885308a7e22 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 19 Dec 2017 02:00:13 +0100 Subject: [shahid] add support for show pages(closes #7401) --- youtube_dl/extractor/scrippsnetworks.py | 103 +++++--------------------------- 1 file changed, 16 insertions(+), 87 deletions(-) (limited to 'youtube_dl/extractor/scrippsnetworks.py') diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py index b446a02..4023aee 100644 --- a/youtube_dl/extractor/scrippsnetworks.py +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -1,13 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -import datetime import json import hashlib -import hmac import re -from .common import InfoExtractor +from .aws import AWSIE from .anvato import AnvatoIE from ..utils import ( smuggle_url, @@ -16,7 +14,7 @@ from ..utils import ( ) -class ScrippsNetworksWatchIE(InfoExtractor): +class ScrippsNetworksWatchIE(AWSIE): IE_NAME = 'scrippsnetworks:watch' _VALID_URL = r'''(?x) https?:// @@ -64,44 +62,27 @@ class ScrippsNetworksWatchIE(InfoExtractor): 'travelchannel': 'trav', 'geniuskitchen': 'genius', } - _SNI_HOST = 'web.api.video.snidigital.com' - _AWS_REGION = 'us-east-1' - _AWS_IDENTITY_ID_JSON = json.dumps({ - 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION - }) - _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' - _AWS_SERVICE = 'execute-api' - _AWS_REQUEST = 'aws4_request' - _AWS_SIGNED_HEADERS = ';'.join([ - 'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key']) - _AWS_CANONICAL_REQUEST_TEMPLATE = '''GET -%(uri)s - -host:%(host)s -x-amz-date:%(date)s -x-amz-security-token:%(token)s -x-api-key:%(key)s + _AWS_PROXY_HOST = 'web.api.video.snidigital.com' -%(signed_headers)s -%(payload_hash)s''' + _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) site_id, video_id = mobj.group('site', 'id') - def aws_hash(s): - return hashlib.sha256(s.encode('utf-8')).hexdigest() - + aws_identity_id_json = json.dumps({ + 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION + }).encode('utf-8') token = self._download_json( - 'https://cognito-identity.us-east-1.amazonaws.com/', video_id, - data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'), + 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id, + data=aws_identity_id_json, headers={ 'Accept': '*/*', 'Content-Type': 'application/x-amz-json-1.1', 'Referer': url, - 'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON), + 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(), 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', 'X-Amz-User-Agent': self._AWS_USER_AGENT, })['Token'] @@ -124,64 +105,12 @@ x-api-key:%(key)s sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, fatal=True) - access_key_id = get('AccessKeyId') - secret_access_key = get('SecretAccessKey') - session_token = get('SessionToken') - - # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html - uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id) - datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') - date = datetime_now[:8] - canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % { - 'uri': uri, - 'host': self._SNI_HOST, - 'date': datetime_now, - 'token': session_token, - 'key': self._AWS_API_KEY, - 'signed_headers': self._AWS_SIGNED_HEADERS, - 'payload_hash': aws_hash(''), - } - - # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html - credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]) - string_to_sign = '\n'.join([ - 'AWS4-HMAC-SHA256', datetime_now, credential_string, - aws_hash(canonical_string)]) - - # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html - def aws_hmac(key, msg): - return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) - - def aws_hmac_digest(key, msg): - return aws_hmac(key, msg).digest() - - def aws_hmac_hexdigest(key, msg): - return aws_hmac(key, msg).hexdigest() - - k_secret = 'AWS4' + secret_access_key - k_date = aws_hmac_digest(k_secret.encode('utf-8'), date) - k_region = aws_hmac_digest(k_date, self._AWS_REGION) - k_service = aws_hmac_digest(k_region, self._AWS_SERVICE) - k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST) - - signature = aws_hmac_hexdigest(k_signing, string_to_sign) - - auth_header = ', '.join([ - 'AWS4-HMAC-SHA256 Credential=%s' % '/'.join( - [access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]), - 'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS, - 'Signature=%s' % signature, - ]) - - mcp_id = self._download_json( - 'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={ - 'Accept': '*/*', - 'Referer': url, - 'Authorization': auth_header, - 'X-Amz-Date': datetime_now, - 'X-Amz-Security-Token': session_token, - 'X-Api-Key': self._AWS_API_KEY, - })['results'][0]['mcpId'] + mcp_id = self._aws_execute_api({ + 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id), + 'access_key': get('AccessKeyId'), + 'secret_key': get('SecretAccessKey'), + 'session_token': get('SessionToken'), + }, video_id)['results'][0]['mcpId'] return self.url_result( smuggle_url( -- cgit v1.1