aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRogério Brito <rbrito@ime.usp.br>2017-03-08 22:53:09 -0300
committerRogério Brito <rbrito@ime.usp.br>2017-03-08 22:53:09 -0300
commit1d3fd83f473663fce3e0a10303473a38d80cc3d0 (patch)
tree80d9943427637e0e06a725c5791e9fc9368d7f98
parent4e090bc3ceacc4e3cd464d12ea97700e3acad37d (diff)
downloadyoutube-dl-1d3fd83f473663fce3e0a10303473a38d80cc3d0.zip
youtube-dl-1d3fd83f473663fce3e0a10303473a38d80cc3d0.tar.gz
youtube-dl-1d3fd83f473663fce3e0a10303473a38d80cc3d0.tar.bz2
New upstream version 2017.03.07
-rw-r--r--ChangeLog95
-rw-r--r--docs/supportedsites.md11
-rw-r--r--test/test_aes.py9
-rw-r--r--test/test_utils.py12
-rwxr-xr-xyoutube-dlbin1474003 -> 1486997 bytes
-rwxr-xr-xyoutube_dl/YoutubeDL.py2
-rw-r--r--youtube_dl/__init__.py8
-rw-r--r--youtube_dl/aes.py28
-rw-r--r--youtube_dl/compat.py6
-rw-r--r--youtube_dl/downloader/common.py5
-rw-r--r--youtube_dl/downloader/external.py9
-rw-r--r--youtube_dl/extractor/addanime.py3
-rw-r--r--youtube_dl/extractor/adobepass.py5
-rw-r--r--youtube_dl/extractor/amcnetworks.py8
-rw-r--r--youtube_dl/extractor/azmedien.py41
-rw-r--r--youtube_dl/extractor/brightcove.py9
-rwxr-xr-xyoutube_dl/extractor/cda.py5
-rw-r--r--youtube_dl/extractor/common.py114
-rw-r--r--youtube_dl/extractor/crunchyroll.py20
-rw-r--r--youtube_dl/extractor/dailymotion.py7
-rw-r--r--youtube_dl/extractor/daisuki.py159
-rw-r--r--youtube_dl/extractor/douyutv.py60
-rw-r--r--youtube_dl/extractor/drtv.py5
-rw-r--r--youtube_dl/extractor/etonline.py39
-rw-r--r--youtube_dl/extractor/extractors.py13
-rw-r--r--youtube_dl/extractor/facebook.py2
-rw-r--r--youtube_dl/extractor/freshlive.py84
-rw-r--r--youtube_dl/extractor/generic.py24
-rw-r--r--youtube_dl/extractor/go.py8
-rw-r--r--youtube_dl/extractor/mdr.py20
-rw-r--r--youtube_dl/extractor/njpwworld.py83
-rw-r--r--youtube_dl/extractor/npo.py470
-rw-r--r--youtube_dl/extractor/openload.py47
-rw-r--r--youtube_dl/extractor/redbulltv.py106
-rw-r--r--youtube_dl/extractor/rutube.py11
-rw-r--r--youtube_dl/extractor/ruutu.py3
-rw-r--r--youtube_dl/extractor/scivee.py57
-rw-r--r--youtube_dl/extractor/soundcloud.py2
-rw-r--r--youtube_dl/extractor/tunepk.py90
-rw-r--r--youtube_dl/extractor/tvigle.py12
-rw-r--r--youtube_dl/extractor/twentyfourvideo.py14
-rw-r--r--youtube_dl/extractor/twitch.py76
-rw-r--r--youtube_dl/extractor/vevo.py22
-rw-r--r--youtube_dl/extractor/vier.py33
-rw-r--r--youtube_dl/extractor/vrak.py80
-rw-r--r--youtube_dl/extractor/xhamster.py11
-rw-r--r--youtube_dl/extractor/youtube.py35
-rw-r--r--youtube_dl/utils.py76
-rw-r--r--youtube_dl/version.py2
49 files changed, 1561 insertions, 480 deletions
diff --git a/ChangeLog b/ChangeLog
index add8a67..601aad9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,98 @@
+version 2017.03.07
+
+Core
+* Metadata are now added after conversion (#5594)
+
+Extractors
+* [soundcloud] Update client id (#12376)
+* [openload] Fix extraction (#10408, #12357)
+
+
+version 2017.03.06
+
+Core
++ [utils] Process bytestrings in urljoin (#12369)
+* [extractor/common] Improve height extraction and extract bitrate
+* [extractor/common] Move jwplayer formats extraction in separate method
++ [external:ffmpeg] Limit test download size to 10KiB (#12362)
+
+Extractors
++ [drtv] Add geo countries to GeoRestrictedError
++ [drtv:live] Bypass geo restriction
++ [tunepk] Add extractor (#12197, #12243)
+
+
+version 2017.03.05
+
+Extractors
++ [twitch] Add basic support for two-factor authentication (#11974)
++ [vier] Add support for vijf.be (#12304)
++ [redbulltv] Add support for redbull.tv (#3919, #11948)
+* [douyutv] Switch to the PC API to escape the 5-min limitation (#12316)
++ [generic] Add support for rutube embeds
++ [rutube] Relax URL regular expression
++ [vrak] Add support for vrak.tv (#11452)
++ [brightcove:new] Add ability to smuggle geo_countries into URL
++ [brightcove:new] Raise GeoRestrictedError
+* [go] Relax URL regular expression (#12341)
+* [24video] Use original host for requests (#12339)
+* [ruutu] Disable DASH formats (#12322)
+
+
+version 2017.03.02
+
+Core
++ [adobepass] Add support for Charter Spectrum (#11465)
+* [YoutubeDL] Don't sanitize identifiers in output template (#12317)
+
+Extractors
+* [facebook] Fix extraction (#12323, #12330)
+* [youtube] Mark errors about rental videos as expected (#12324)
++ [npo] Add support for audio
+* [npo] Adapt to app.php API (#12311, #12320)
+
+
+version 2017.02.28
+
+Core
++ [utils] Add bytes_to_long and long_to_bytes
++ [utils] Add pkcs1pad
++ [aes] Add aes_cbc_encrypt
+
+Extractors
++ [azmedien:showplaylist] Add support for show playlists (#12160)
++ [youtube:playlist] Recognize another playlist pattern (#11928, #12286)
++ [daisuki] Add support for daisuki.net (#2486, #3186, #4738, #6175, #7776,
+ #10060)
+* [douyu] Fix extraction (#12301)
+
+
+version 2017.02.27
+
+Core
+* [downloader/common] Limit displaying 2 digits after decimal point in sleep
+ interval message (#12183)
++ [extractor/common] Add preference to _parse_html5_media_entries
+
+Extractors
++ [npo] Add support for zapp.nl
++ [npo] Add support for hetklokhuis.nl (#12293)
+- [scivee] Remove extractor (#9315)
++ [cda] Decode download URL (#12255)
++ [crunchyroll] Improve uploader extraction (#12267)
++ [youtube] Raise GeoRestrictedError
++ [dailymotion] Raise GeoRestrictedError
++ [mdr] Recognize more URL patterns (#12169)
++ [tvigle] Raise GeoRestrictedError
+* [vevo] Fix extraction for videos with the new streams/streamsV3 format
+ (#11719)
++ [freshlive] Add support for freshlive.tv (#12175)
++ [xhamster] Capture and output videoClosed error (#12263)
++ [etonline] Add support for etonline.com (#12236)
++ [njpwworld] Add support for njpwworld.com (#11561)
+* [amcnetworks] Relax URL regular expression (#12127)
+
+
version 2017.02.24.1
Extractors
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index f973973..85c59ca 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -78,6 +78,7 @@
- **awaan:video**
- **AZMedien**: AZ Medien videos
- **AZMedienPlaylist**: AZ Medien playlists
+ - **AZMedienShowPlaylist**: AZ Medien show playlists
- **Azubu**
- **AzubuLive**
- **BaiduVideo**: 百度视频
@@ -191,6 +192,8 @@
- **dailymotion:playlist**
- **dailymotion:user**
- **DailymotionCloud**
+ - **Daisuki**
+ - **DaisukiPlaylist**
- **daum.net**
- **daum.net:clip**
- **daum.net:playlist**
@@ -239,6 +242,7 @@
- **ESPN**
- **ESPNArticle**
- **EsriVideo**
+ - **ETOnline**
- **Europa**
- **EveryonesMixtape**
- **ExpoTV**
@@ -274,6 +278,7 @@
- **francetvinfo.fr**
- **Freesound**
- **freespeech.org**
+ - **FreshLive**
- **Funimation**
- **FunnyOrDie**
- **Fusion**
@@ -310,6 +315,7 @@
- **HellPorno**
- **Helsinki**: helsinki.fi
- **HentaiStigma**
+ - **hetklokhuis**
- **hgtv.com:show**
- **HistoricFilms**
- **history:topic**: History.com Topic
@@ -511,6 +517,7 @@
- **Nintendo**
- **njoy**: N-JOY
- **njoy:embed**
+ - **NJPWWorld**: 新日本プロレスワールド
- **NobelPrize**
- **Noco**
- **Normalboots**
@@ -619,6 +626,7 @@
- **RaiTV**
- **RBMARadio**
- **RDS**: RDS.ca
+ - **RedBullTV**
- **RedTube**
- **RegioTV**
- **RENTV**
@@ -666,7 +674,6 @@
- **savefrom.net**
- **SBS**: sbs.com.au
- **schooltv**
- - **SciVee**
- **screen.yahoo:search**: Yahoo screen search
- **Screencast**
- **ScreencastOMatic**
@@ -791,6 +798,7 @@
- **tunein:program**
- **tunein:station**
- **tunein:topic**
+ - **TunePk**
- **Turbo**
- **Tutv**
- **tv.dfb.de**
@@ -910,6 +918,7 @@
- **VoxMedia**
- **Vporn**
- **vpro**: npo.nl and ntr.nl
+ - **Vrak**
- **VRT**
- **vube**: Vube.com
- **VuClip**
diff --git a/test/test_aes.py b/test/test_aes.py
index 54078a6..78a2875 100644
--- a/test/test_aes.py
+++ b/test/test_aes.py
@@ -8,7 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
+from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
import base64
@@ -34,6 +34,13 @@ class TestAES(unittest.TestCase):
decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+ def test_cbc_encrypt(self):
+ data = bytes_to_intlist(self.secret_msg)
+ encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv))
+ self.assertEqual(
+ encrypted,
+ b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd")
+
def test_decrypt_text(self):
password = intlist_to_bytes(self.key).decode('utf-8')
encrypted = base64.b64encode(
diff --git a/test/test_utils.py b/test/test_utils.py
index 3cdb21d..173c495 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -52,6 +52,7 @@ from youtube_dl.utils import (
parse_filesize,
parse_count,
parse_iso8601,
+ pkcs1pad,
read_batch_urls,
sanitize_filename,
sanitize_path,
@@ -454,6 +455,9 @@ class TestUtil(unittest.TestCase):
def test_urljoin(self):
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
@@ -1104,6 +1108,14 @@ The first line
ohdave_rsa_encrypt(b'aa111222', e, N),
'726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
+ def test_pkcs1pad(self):
+ data = [1, 2, 3]
+ padded_data = pkcs1pad(data, 32)
+ self.assertEqual(padded_data[:2], [0, 2])
+ self.assertEqual(padded_data[28:], [0, 1, 2, 3])
+
+ self.assertRaises(ValueError, pkcs1pad, data, 8)
+
def test_encode_base_n(self):
self.assertEqual(encode_base_n(0, 30), '0')
self.assertEqual(encode_base_n(80, 30), '2k')
diff --git a/youtube-dl b/youtube-dl
index bc236d2..e815236 100755
--- a/youtube-dl
+++ b/youtube-dl
Binary files differ
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index f725456..13a3a90 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -616,7 +616,7 @@ class YoutubeDL(object):
sanitize = lambda k, v: sanitize_filename(
compat_str(v),
restricted=self.params.get('restrictfilenames'),
- is_id=(k == 'id'))
+ is_id=(k == 'id' or k.endswith('_id')))
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
for k, v in template_dict.items()
if v is not None and not isinstance(v, (list, tuple, dict)))
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 0c401ba..ad5f13d 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -242,14 +242,11 @@ def _real_main(argv=None):
# PostProcessors
postprocessors = []
- # Add the metadata pp first, the other pps will copy it
if opts.metafromtitle:
postprocessors.append({
'key': 'MetadataFromTitle',
'titleformat': opts.metafromtitle
})
- if opts.addmetadata:
- postprocessors.append({'key': 'FFmpegMetadata'})
if opts.extractaudio:
postprocessors.append({
'key': 'FFmpegExtractAudio',
@@ -279,6 +276,11 @@ def _real_main(argv=None):
})
if not already_have_thumbnail:
opts.writethumbnail = True
+ # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
+ # FFmpegExtractAudioPP as containers before conversion may not support
+ # metadata (3gp, webm, etc.)
+ if opts.addmetadata:
+ postprocessors.append({'key': 'FFmpegMetadata'})
# XAttrMetadataPP should be run after post-processors that may change file
# contents
if opts.xattrs:
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
index b8ff454..c5bb3c4 100644
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@@ -60,6 +60,34 @@ def aes_cbc_decrypt(data, key, iv):
return decrypted_data
+def aes_cbc_encrypt(data, key, iv):
+ """
+ Encrypt with aes in CBC mode. Using PKCS#7 padding
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte IV
+ @returns {int[]} encrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ encrypted_data = []
+ previous_cipher_block = iv
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ remaining_length = BLOCK_SIZE_BYTES - len(block)
+ block += [remaining_length] * remaining_length
+ mixed_block = xor(block, previous_cipher_block)
+
+ encrypted_block = aes_encrypt(mixed_block, expanded_key)
+ encrypted_data += encrypted_block
+
+ previous_cipher_block = encrypted_block
+
+ return encrypted_data
+
+
def key_expansion(data):
"""
Generate key schedule
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index b257e2e..0c119e4 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2760,8 +2760,10 @@ else:
compat_kwargs = lambda kwargs: kwargs
-compat_numeric_types = ((int, float, long, complex) if sys.version_info[0] < 3
- else (int, float, complex))
+try:
+ compat_numeric_types = (int, float, long, complex)
+except NameError: # Python 3
+ compat_numeric_types = (int, float, complex)
if sys.version_info < (2, 7):
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 3dc144b..2c4470a 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -347,7 +347,10 @@ class FileDownloader(object):
if min_sleep_interval:
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
- self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
+ self.to_screen(
+ '[download] Sleeping %s seconds...' % (
+ int(sleep_interval) if sleep_interval.is_integer()
+ else '%.2f' % sleep_interval))
time.sleep(sleep_interval)
return self.real_download(filename, info_dict)
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index bdd3545..e13cf54 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -6,7 +6,10 @@ import sys
import re
from .common import FileDownloader
-from ..compat import compat_setenv
+from ..compat import (
+ compat_setenv,
+ compat_str,
+)
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
from ..utils import (
cli_option,
@@ -270,6 +273,10 @@ class FFmpegFD(ExternalFD):
args += ['-rtmp_live', 'live']
args += ['-i', url, '-c', 'copy']
+
+ if self.params.get('test', False):
+ args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
+
if protocol in ('m3u8', 'm3u8_native'):
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
args += ['-f', 'mpegts']
diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py
index 55a9322..9f8a712 100644
--- a/youtube_dl/extractor/addanime.py
+++ b/youtube_dl/extractor/addanime.py
@@ -25,7 +25,8 @@ class AddAnimeIE(InfoExtractor):
'ext': 'mp4',
'description': 'One Piece 606',
'title': 'One Piece 606',
- }
+ },
+ 'skip': 'Video is gone',
}, {
'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
'only_matching': True,
diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py
index 4d655bd..d4816ab 100644
--- a/youtube_dl/extractor/adobepass.py
+++ b/youtube_dl/extractor/adobepass.py
@@ -36,6 +36,11 @@ MSO_INFO = {
'username_field': 'Ecom_User_ID',
'password_field': 'Ecom_Password',
},
+ 'Charter_Direct': {
+ 'name': 'Charter Spectrum',
+ 'username_field': 'IDToken1',
+ 'password_field': 'IDToken2',
+ },
'thr030': {
'name': '3 Rivers Communications'
},
diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py
index b71d1a0..3a0ec67 100644
--- a/youtube_dl/extractor/amcnetworks.py
+++ b/youtube_dl/extractor/amcnetworks.py
@@ -10,7 +10,7 @@ from ..utils import (
class AMCNetworksIE(ThePlatformIE):
- _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
'md5': '',
@@ -44,6 +44,12 @@ class AMCNetworksIE(ThePlatformIE):
}, {
'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
'only_matching': True,
+ }, {
+ 'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py
index cbc3ed5..f4e07d9 100644
--- a/youtube_dl/extractor/azmedien.py
+++ b/youtube_dl/extractor/azmedien.py
@@ -1,3 +1,4 @@
+# coding: utf-8
from __future__ import unicode_literals
import re
@@ -5,6 +6,7 @@ import re
from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import (
+ get_element_by_class,
get_element_by_id,
strip_or_none,
urljoin,
@@ -170,3 +172,42 @@ class AZMedienPlaylistIE(AZMedienBaseIE):
'video-title', webpage)), group='title')
return self.playlist_result(entries, show_id, title)
+
+
+class AZMedienShowPlaylistIE(AZMedienBaseIE):
+ IE_DESC = 'AZ Medien show playlists'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ telezueri\.ch|
+ telebaern\.tv|
+ telem1\.ch
+ )/
+ (?:
+ all-episodes|
+ alle-episoden
+ )/
+ (?P<id>[^/?#&]+)
+ '''
+
+ _TEST = {
+ 'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
+ 'info_dict': {
+ 'id': 'astrotalk',
+ 'title': 'TeleZüri: AstroTalk - alle episoden',
+ 'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
+ },
+ 'playlist_mincount': 13,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+ episodes = get_element_by_class('search-mobile-box', webpage)
+ entries = [self.url_result(
+ urljoin(url, m.group('url'))) for m in re.finditer(
+ r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
+ title = self._og_search_title(webpage, fatal=False)
+ description = self._og_search_description(webpage)
+ return self.playlist_result(entries, playlist_id, title, description)
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 27685ee..66c8cb2 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -515,6 +515,9 @@ class BrightcoveNewIE(InfoExtractor):
return entries
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
@@ -544,8 +547,10 @@ class BrightcoveNewIE(InfoExtractor):
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
- raise ExtractorError(
- json_data.get('message') or json_data['error_code'], expected=True)
+ message = json_data.get('message') or json_data['error_code']
+ if json_data.get('error_subcode') == 'CLIENT_GEO':
+ self.raise_geo_restricted(msg=message)
+ raise ExtractorError(message, expected=True)
raise
title = json_data['name'].strip()
diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py
index ae7af2f..1ee35b5 100755
--- a/youtube_dl/extractor/cda.py
+++ b/youtube_dl/extractor/cda.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import codecs
import re
from .common import InfoExtractor
@@ -96,6 +97,10 @@ class CDAIE(InfoExtractor):
if not video or 'file' not in video:
self.report_warning('Unable to extract %s version information' % version)
return
+ if video['file'].startswith('uggc'):
+ video['file'] = codecs.decode(video['file'], 'rot_13')
+ if video['file'].endswith('adc.mp4'):
+ video['file'] = video['file'].replace('adc.mp4', '.mp4')
f = {
'url': video['file'],
}
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 4252d68..78dc5be 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2010,7 +2010,7 @@ class InfoExtractor(object):
})
return formats
- def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None):
+ def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
def absolute_url(video_url):
return compat_urlparse.urljoin(base_url, video_url)
@@ -2032,7 +2032,8 @@ class InfoExtractor(object):
is_plain_url = False
formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4',
- entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
+ entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
+ preference=preference)
elif ext == 'mpd':
is_plain_url = False
formats = self._extract_mpd_formats(
@@ -2197,56 +2198,9 @@ class InfoExtractor(object):
this_video_id = video_id or video_data['mediaid']
- formats = []
- for source in video_data['sources']:
- source_url = self._proto_relative_url(source['file'])
- if base_url:
- source_url = compat_urlparse.urljoin(base_url, source_url)
- source_type = source.get('type') or ''
- ext = mimetype2ext(source_type) or determine_ext(source_url)
- if source_type == 'hls' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
- elif ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- source_url, this_video_id, mpd_id=mpd_id, fatal=False))
- # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
- elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
- formats.append({
- 'url': source_url,
- 'vcodec': 'none',
- 'ext': ext,
- })
- else:
- height = int_or_none(source.get('height'))
- if height is None:
- # Often no height is provided but there is a label in
- # format like 1080p.
- height = int_or_none(self._search_regex(
- r'^(\d{3,})[pP]$', source.get('label') or '',
- 'height', default=None))
- a_format = {
- 'url': source_url,
- 'width': int_or_none(source.get('width')),
- 'height': height,
- 'ext': ext,
- }
- if source_url.startswith('rtmp'):
- a_format['ext'] = 'flv'
-
- # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
- # of jwplayer.flash.swf
- rtmp_url_parts = re.split(
- r'((?:mp4|mp3|flv):)', source_url, 1)
- if len(rtmp_url_parts) == 3:
- rtmp_url, prefix, play_path = rtmp_url_parts
- a_format.update({
- 'url': rtmp_url,
- 'play_path': prefix + play_path,
- })
- if rtmp_params:
- a_format.update(rtmp_params)
- formats.append(a_format)
+ formats = self._parse_jwplayer_formats(
+ video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
+ mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
self._sort_formats(formats)
subtitles = {}
@@ -2277,6 +2231,62 @@ class InfoExtractor(object):
else:
return self.playlist_result(entries)
+ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
+ m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+ formats = []
+ for source in jwplayer_sources_data:
+ source_url = self._proto_relative_url(source['file'])
+ if base_url:
+ source_url = compat_urlparse.urljoin(base_url, source_url)
+ source_type = source.get('type') or ''
+ ext = mimetype2ext(source_type) or determine_ext(source_url)
+ if source_type == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=m3u8_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ source_url, video_id, mpd_id=mpd_id, fatal=False))
+ # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+ elif source_type.startswith('audio') or ext in (
+ 'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+ formats.append({
+ 'url': source_url,
+ 'vcodec': 'none',
+ 'ext': ext,
+ })
+ else:
+ height = int_or_none(source.get('height'))
+ if height is None:
+ # Often no height is provided but there is a label in
+ # format like "1080p", "720p SD", or 1080.
+ height = int_or_none(self._search_regex(
+ r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
+ 'height', default=None))
+ a_format = {
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ 'height': height,
+ 'tbr': int_or_none(source.get('bitrate')),
+ 'ext': ext,
+ }
+ if source_url.startswith('rtmp'):
+ a_format['ext'] = 'flv'
+ # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+ # of jwplayer.flash.swf
+ rtmp_url_parts = re.split(
+ r'((?:mp4|mp3|flv):)', source_url, 1)
+ if len(rtmp_url_parts) == 3:
+ rtmp_url, prefix, play_path = rtmp_url_parts
+ a_format.update({
+ 'url': rtmp_url,
+ 'play_path': prefix + play_path,
+ })
+ if rtmp_params:
+ a_format.update(rtmp_params)
+ formats.append(a_format)
+ return formats
+
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index a1fc6a7..9c6cf00 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -207,6 +207,21 @@ class CrunchyrollIE(CrunchyrollBaseIE):
# Just test metadata extraction
'skip_download': True,
},
+ }, {
+ # make sure we can extract an uploader name that's not a link
+ 'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
+ 'info_dict': {
+ 'id': '606899',
+ 'ext': 'mp4',
+ 'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
+ 'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
+ 'uploader': 'Geneon Entertainment',
+ 'upload_date': '20120717',
+ },
+ 'params': {
+ # just test metadata extraction
+ 'skip_download': True,
+ },
}]
_FORMAT_IDS = {
@@ -388,8 +403,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
if video_upload_date:
video_upload_date = unified_strdate(video_upload_date)
video_uploader = self._html_search_regex(
- r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
- 'video_uploader', fatal=False)
+ # try looking for both an uploader that's a link and one that's not
+ [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
+ webpage, 'video_uploader', fatal=False)
available_fmts = []
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index b312401..246efde 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -282,9 +282,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
}
def _check_error(self, info):
+ error = info.get('error')
if info.get('error') is not None:
+ title = error['title']
+ # See https://developer.dailymotion.com/api#access-error
+ if error.get('code') == 'DM007':
+ self.raise_geo_restricted(msg=title)
raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, info['error']['title']), expected=True)
+ '%s said: %s' % (self.IE_NAME, title), expected=True)
def _get_subtitles(self, video_id, webpage):
try:
diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py
new file mode 100644
index 0000000..58cc986
--- /dev/null
+++ b/youtube_dl/extractor/daisuki.py
@@ -0,0 +1,159 @@
+from __future__ import unicode_literals
+
+import base64
+import json
+import random
+import re
+
+from .common import InfoExtractor
+from ..aes import (
+ aes_cbc_decrypt,
+ aes_cbc_encrypt,
+)
+from ..utils import (
+ bytes_to_intlist,
+ bytes_to_long,
+ clean_html,
+ ExtractorError,
+ intlist_to_bytes,
+ get_element_by_id,
+ js_to_json,
+ int_or_none,
+ long_to_bytes,
+ pkcs1pad,
+ remove_end,
+)
+
+
+class DaisukiIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?daisuki\.net/[^/]+/[^/]+/[^/]+/watch\.[^.]+\.(?P<id>\d+)\.html'
+
+ _TEST = {
+ 'url': 'http://www.daisuki.net/tw/en/anime/watch.TheIdolMasterCG.11213.html',
+ 'info_dict': {
+ 'id': '11213',
+ 'ext': 'mp4',
+ 'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS',
+ 'subtitles': {
+ 'mul': [{
+ 'ext': 'ttml',
+ }],
+ },
+ 'creator': 'BANDAI NAMCO Entertainment',
+ },
+ 'params': {
+ 'skip_download': True, # AES-encrypted HLS stream
+ },
+ }
+
+ # The public key in PEM format can be found in clientlibs_anime_watch.min.js
+ _RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ flashvars = self._parse_json(self._search_regex(
+ r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
+ video_id, transform_source=js_to_json)
+
+ iv = [0] * 16
+
+ data = {}
+ for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'):
+ data[key] = flashvars.get(key, '')
+
+ encrypted_rtn = None
+
+ # Some AES keys are rejected. Try it with different AES keys
+ for idx in range(5):
+ aes_key = [random.randint(0, 254) for _ in range(32)]
+ padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128))
+
+ n, e = self._RSA_KEY
+ encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n))
+ init_data = self._download_json('http://www.daisuki.net/bin/bgn/init', video_id, query={
+ 's': flashvars.get('s', ''),
+ 'c': flashvars.get('ss3_prm', ''),
+ 'e': url,
+ 'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt(
+ bytes_to_intlist(json.dumps(data)),
+ aes_key, iv))).decode('ascii'),
+ 'a': base64.b64encode(encrypted_aeskey).decode('ascii'),
+ }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else ''))
+
+ if 'rtn' in init_data:
+ encrypted_rtn = init_data['rtn']
+ break
+
+ self._sleep(5, video_id)
+
+ if encrypted_rtn is None:
+ raise ExtractorError('Failed to fetch init data')
+
+ rtn = self._parse_json(
+ intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
+ base64.b64decode(encrypted_rtn)),
+ aes_key, iv)).decode('utf-8').rstrip('\0'),
+ video_id)
+
+ formats = self._extract_m3u8_formats(
+ rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native')
+
+ title = remove_end(self._og_search_title(webpage), ' - DAISUKI')
+
+ creator = self._html_search_regex(
+ r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False)
+
+ subtitles = {}
+ caption_url = rtn.get('caption_url')
+ if caption_url:
+ # mul: multiple languages
+ subtitles['mul'] = [{
+ 'url': caption_url,
+ 'ext': 'ttml',
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'creator': creator,
+ }
+
+
+class DaisukiPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)daisuki\.net/[^/]+/[^/]+/[^/]+/detail\.(?P<id>[a-zA-Z0-9]+)\.html'
+
+ _TEST = {
+ 'url': 'http://www.daisuki.net/tw/en/anime/detail.TheIdolMasterCG.html',
+ 'info_dict': {
+ 'id': 'TheIdolMasterCG',
+ 'title': 'THE IDOLM@STER CINDERELLA GIRLS',
+ 'description': 'md5:0f2c028a9339f7a2c7fbf839edc5c5d8',
+ },
+ 'playlist_count': 26,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ episode_pattern = r'''(?sx)
+ <img[^>]+delay="[^"]+/(\d+)/movie\.jpg".+?
+ <p[^>]+class=".*?\bepisodeNumber\b.*?">(?:<a[^>]+>)?([^<]+)'''
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': url.replace('detail', 'watch').replace('.html', '.' + movie_id + '.html'),
+ 'episode_id': episode_id,
+ 'episode_number': int_or_none(episode_id),
+ } for movie_id, episode_id in re.findall(episode_pattern, webpage)]
+
+ playlist_title = remove_end(
+ self._og_search_title(webpage, fatal=False), ' - Anime - DAISUKI')
+ playlist_description = clean_html(get_element_by_id('synopsisTxt', webpage))
+
+ return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py
index 9115944..82d8a04 100644
--- a/youtube_dl/extractor/douyutv.py
+++ b/youtube_dl/extractor/douyutv.py
@@ -1,15 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals
-import hashlib
import time
-import uuid
+import hashlib
from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urllib_parse_urlencode,
-)
from ..utils import (
ExtractorError,
unescapeHTML,
@@ -25,7 +20,7 @@ class DouyuTVIE(InfoExtractor):
'id': '17732',
'display_id': 'iseven',
'ext': 'flv',
- 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': r're:.*m7show@163\.com.*',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '7师傅',
@@ -56,7 +51,7 @@ class DouyuTVIE(InfoExtractor):
'id': '17732',
'display_id': '17732',
'ext': 'flv',
- 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': r're:.*m7show@163\.com.*',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '7师傅',
@@ -74,10 +69,6 @@ class DouyuTVIE(InfoExtractor):
'only_matching': True,
}]
- # Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf
- # is encrypted originally, but ffdec can dump memory to get the decrypted one.
- _API_KEY = 'A12Svb&%1UUmf@hC'
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -88,6 +79,7 @@ class DouyuTVIE(InfoExtractor):
room_id = self._html_search_regex(
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
+ # Grab metadata from mobile API
room = self._download_json(
'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
note='Downloading room info')['data']
@@ -96,38 +88,22 @@ class DouyuTVIE(InfoExtractor):
if room.get('show_status') == '2':
raise ExtractorError('Live stream is offline', expected=True)
- tt = compat_str(int(time.time() / 60))
- did = uuid.uuid4().hex.upper()
-
- sign_content = ''.join((room_id, did, self._API_KEY, tt))
- sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest()
-
- flv_data = compat_urllib_parse_urlencode({
- 'cdn': 'ws',
- 'rate': '0',
- 'tt': tt,
- 'did': did,
- 'sign': sign,
- })
-
- video_info = self._download_json(
- 'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id,
- data=flv_data, note='Downloading video info',
- headers={'Content-Type': 'application/x-www-form-urlencoded'})
-
- error_code = video_info.get('error', 0)
- if error_code is not 0:
- raise ExtractorError(
- '%s reported error %i' % (self.IE_NAME, error_code),
- expected=True)
-
- base_url = video_info['data']['rtmp_url']
- live_path = video_info['data']['rtmp_live']
-
- video_url = '%s/%s' % (base_url, live_path)
+ # Grab the URL from PC client API
+ # The m3u8 url from mobile API requires re-authentication every 5 minutes
+ tt = int(time.time())
+ signContent = 'lapi/live/thirdPart/getPlay/%s?aid=pcclient&rate=0&time=%d9TUk5fjjUjg9qIMH3sdnh' % (room_id, tt)
+ sign = hashlib.md5(signContent.encode('ascii')).hexdigest()
+ video_url = self._download_json(
+ 'http://coapi.douyucdn.cn/lapi/live/thirdPart/getPlay/' + room_id,
+ video_id, note='Downloading video URL info',
+ query={'rate': 0}, headers={
+ 'auth': sign,
+ 'time': str(tt),
+ 'aid': 'pcclient'
+ })['data']['live_url']
title = self._live_title(unescapeHTML(room['room_name']))
- description = room.get('notice')
+ description = room.get('show_details')
thumbnail = room.get('room_src')
uploader = room.get('nickname')
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py
index e966d74..e491701 100644
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@@ -15,6 +15,8 @@ from ..utils import (
class DRTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['DK']
IE_NAME = 'drtv'
_TESTS = [{
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
@@ -137,7 +139,7 @@ class DRTVIE(InfoExtractor):
if not formats and restricted_to_denmark:
self.raise_geo_restricted(
'Unfortunately, DR is not allowed to show this program outside Denmark.',
- expected=True)
+ countries=self._GEO_COUNTRIES)
self._sort_formats(formats)
@@ -156,6 +158,7 @@ class DRTVIE(InfoExtractor):
class DRTVLiveIE(InfoExtractor):
IE_NAME = 'drtv:live'
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
+ _GEO_COUNTRIES = ['DK']
_TEST = {
'url': 'https://www.dr.dk/tv/live/dr1',
'info_dict': {
diff --git a/youtube_dl/extractor/etonline.py b/youtube_dl/extractor/etonline.py
new file mode 100644
index 0000000..17d7cfe
--- /dev/null
+++ b/youtube_dl/extractor/etonline.py
@@ -0,0 +1,39 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class ETOnlineIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?etonline\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.etonline.com/tv/211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale/',
+ 'info_dict': {
+ 'id': '211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale',
+ 'title': 'md5:a21ec7d3872ed98335cbd2a046f34ee6',
+ 'description': 'md5:8b94484063f463cca709617c79618ccd',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.etonline.com/media/video/here_are_the_stars_who_love_bringing_their_moms_as_dates_to_the_oscars-211359/',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911076001/default_default/index.html?videoId=ref:%s'
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % video_id, 'BrightcoveNew', video_id)
+ for video_id in re.findall(
+ r'site\.brightcove\s*\([^,]+,\s*["\'](title_\d+)', webpage)]
+
+ return self.playlist_result(
+ entries, playlist_id,
+ self._og_search_title(webpage, fatal=False),
+ self._og_search_description(webpage))
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 83a170f..b056dff 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -83,6 +83,7 @@ from .awaan import (
from .azmedien import (
AZMedienIE,
AZMedienPlaylistIE,
+ AZMedienShowPlaylistIE,
)
from .azubu import AzubuIE, AzubuLiveIE
from .baidu import BaiduVideoIE
@@ -227,6 +228,10 @@ from .dailymotion import (
DailymotionUserIE,
DailymotionCloudIE,
)
+from .daisuki import (
+ DaisukiIE,
+ DaisukiPlaylistIE,
+)
from .daum import (
DaumIE,
DaumClipIE,
@@ -288,6 +293,7 @@ from .espn import (
ESPNArticleIE,
)
from .esri import EsriVideoIE
+from .etonline import ETOnlineIE
from .europa import EuropaIE
from .everyonesmixtape import EveryonesMixtapeIE
from .expotv import ExpoTVIE
@@ -338,6 +344,7 @@ from .francetv import (
)
from .freesound import FreesoundIE
from .freespeech import FreespeechIE
+from .freshlive import FreshLiveIE
from .funimation import FunimationIE
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
@@ -637,6 +644,7 @@ from .ninecninemedia import (
from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
+from .njpwworld import NJPWWorldIE
from .nobelprize import NobelPrizeIE
from .noco import NocoIE
from .normalboots import NormalbootsIE
@@ -666,6 +674,7 @@ from .npo import (
NPORadioIE,
NPORadioFragmentIE,
SchoolTVIE,
+ HetKlokhuisIE,
VPROIE,
WNLIE,
)
@@ -784,6 +793,7 @@ from .rai import (
)
from .rbmaradio import RBMARadioIE
from .rds import RDSIE
+from .redbulltv import RedBullTVIE
from .redtube import RedTubeIE
from .regiotv import RegioTVIE
from .rentv import (
@@ -835,7 +845,6 @@ from .safari import (
from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE
-from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE
from .scrippsnetworks import ScrippsNetworksWatchIE
@@ -991,6 +1000,7 @@ from .tunein import (
TuneInTopicIE,
TuneInShortenerIE,
)
+from .tunepk import TunePkIE
from .turbo import TurboIE
from .tutv import TutvIE
from .tv2 import (
@@ -1157,6 +1167,7 @@ from .voicerepublic import VoiceRepublicIE
from .voxmedia import VoxMediaIE
from .vporn import VpornIE
from .vrt import VRTIE
+from .vrak import VrakIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vvvvid import VVVVIDIE
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 70b8c95..6315d40 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -303,7 +303,7 @@ class FacebookIE(InfoExtractor):
if not video_data:
server_js_data = self._parse_json(
self._search_regex(
- r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)',
+ r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
webpage, 'js data', default='{}'),
video_id, transform_source=js_to_json, fatal=False)
if server_js_data:
diff --git a/youtube_dl/extractor/freshlive.py b/youtube_dl/extractor/freshlive.py
new file mode 100644
index 0000000..a90f915
--- /dev/null
+++ b/youtube_dl/extractor/freshlive.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class FreshLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://freshlive\.tv/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://freshlive.tv/satotv/74712',
+ 'md5': '9f0cf5516979c4454ce982df3d97f352',
+ 'info_dict': {
+ 'id': '74712',
+ 'ext': 'mp4',
+ 'title': 'テスト',
+ 'description': 'テスト',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1511,
+ 'timestamp': 1483619655,
+ 'upload_date': '20170105',
+ 'uploader': 'サトTV',
+ 'uploader_id': 'satotv',
+ 'view_count': int,
+ 'comment_count': int,
+ 'is_live': False,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ options = self._parse_json(
+ self._search_regex(
+ r'window\.__CONTEXT__\s*=\s*({.+?});\s*</script>',
+ webpage, 'initial context'),
+ video_id)
+
+ info = options['context']['dispatcher']['stores']['ProgramStore']['programs'][video_id]
+
+ title = info['title']
+
+ if info.get('status') == 'upcoming':
+ raise ExtractorError('Stream %s is upcoming' % video_id, expected=True)
+
+ stream_url = info.get('liveStreamUrl') or info['archiveStreamUrl']
+
+ is_live = info.get('liveStreamUrl') is not None
+
+ formats = self._extract_m3u8_formats(
+ stream_url, video_id, ext='mp4',
+ entry_protocol='m3u8' if is_live else 'm3u8_native',
+ m3u8_id='hls')
+
+ if is_live:
+ title = self._live_title(title)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': info.get('description'),
+ 'thumbnail': info.get('thumbnailUrl'),
+ 'duration': int_or_none(info.get('airTime')),
+ 'timestamp': unified_timestamp(info.get('createdAt')),
+ 'uploader': try_get(
+ info, lambda x: x['channel']['title'], compat_str),
+ 'uploader_id': try_get(
+ info, lambda x: x['channel']['code'], compat_str),
+ 'uploader_url': try_get(
+ info, lambda x: x['channel']['permalink'], compat_str),
+ 'view_count': int_or_none(info.get('viewCount')),
+ 'comment_count': int_or_none(info.get('commentCount')),
+ 'tags': info.get('tags', []),
+ 'is_live': is_live,
+ }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 9868ca6..ebab950 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -84,6 +84,7 @@ from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE
from .openload import OpenloadIE
from .videopress import VideoPressIE
+from .rutube import RutubeIE
class GenericIE(InfoExtractor):
@@ -1503,6 +1504,23 @@ class GenericIE(InfoExtractor):
'add_ie': [VideoPressIE.ie_key()],
},
{
+ # Rutube embed
+ 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
+ 'info_dict': {
+ 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
+ 'ext': 'flv',
+ 'title': 'Магаззино: Казань 2',
+ 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
+ 'uploader': 'Магаззино',
+ 'upload_date': '20170228',
+ 'uploader_id': '996642',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [RutubeIE.ie_key()],
+ },
+ {
# ThePlatform embedded with whitespaces in URLs
'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
'only_matching': True,
@@ -2480,6 +2498,12 @@ class GenericIE(InfoExtractor):
return _playlist_from_matches(
videopress_urls, ie=VideoPressIE.ie_key())
+ # Look for Rutube embeds
+ rutube_urls = RutubeIE._extract_urls(webpage)
+ if rutube_urls:
+ return _playlist_from_matches(
+ rutube_urls, ie=RutubeIE.ie_key())
+
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')
diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py
index 21ed846..4c9be47 100644
--- a/youtube_dl/extractor/go.py
+++ b/youtube_dl/extractor/go.py
@@ -36,7 +36,7 @@ class GoIE(AdobePassIE):
'requestor_id': 'DisneyXD',
}
}
- _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
+ _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
_TESTS = [{
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
'info_dict': {
@@ -52,6 +52,12 @@ class GoIE(AdobePassIE):
}, {
'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
'only_matching': True,
+ }, {
+ 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py
index 6e4290a..322e5b4 100644
--- a/youtube_dl/extractor/mdr.py
+++ b/youtube_dl/extractor/mdr.py
@@ -14,7 +14,7 @@ from ..utils import (
class MDRIE(InfoExtractor):
IE_DESC = 'MDR.DE and KiKA'
- _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+-?(?P<id>\d+)(?:_.+?)?\.html'
+ _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
_TESTS = [{
# MDR regularly deletes its videos
@@ -31,6 +31,7 @@ class MDRIE(InfoExtractor):
'duration': 250,
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
},
+ 'skip': '404 not found',
}, {
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
'md5': '4930515e36b06c111213e80d1e4aad0e',
@@ -41,6 +42,7 @@ class MDRIE(InfoExtractor):
'duration': 134,
'uploader': 'KIKA',
},
+ 'skip': '404 not found',
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
@@ -49,12 +51,22 @@ class MDRIE(InfoExtractor):
'ext': 'mp4',
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
- 'timestamp': 1450950000,
- 'upload_date': '20151224',
+ 'timestamp': 1482541200,
+ 'upload_date': '20161224',
'duration': 4628,
'uploader': 'KIKA',
},
}, {
+ # audio with alternative playerURL pattern
+ 'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
+ 'info_dict': {
+ 'id': '100',
+ 'ext': 'mp4',
+ 'title': 'Feature: Operation Mindfuck - Robert Anton Wilson',
+ 'duration': 3239,
+ 'uploader': 'MITTELDEUTSCHER RUNDFUNK',
+ },
+ }, {
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
'only_matching': True,
}, {
@@ -71,7 +83,7 @@ class MDRIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
data_url = self._search_regex(
- r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
+ r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+?-avCustom\.xml)\1',
webpage, 'data url', group='url').replace(r'\/', '/')
doc = self._download_xml(
diff --git a/youtube_dl/extractor/njpwworld.py b/youtube_dl/extractor/njpwworld.py
new file mode 100644
index 0000000..f5e3f68
--- /dev/null
+++ b/youtube_dl/extractor/njpwworld.py
@@ -0,0 +1,83 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ get_element_by_class,
+ urlencode_postdata,
+)
+
+
+class NJPWWorldIE(InfoExtractor):
+ _VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
+ IE_DESC = '新日本プロレスワールド'
+ _NETRC_MACHINE = 'njpwworld'
+
+ _TEST = {
+ 'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
+ 'info_dict': {
+ 'id': 's_series_00155_1_9',
+ 'ext': 'mp4',
+ 'title': '第9試合 ランディ・サベージ vs リック・スタイナー',
+ 'tags': list,
+ },
+ 'params': {
+ 'skip_download': True, # AES-encrypted m3u8
+ },
+ 'skip': 'Requires login',
+ }
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ # No authentication to be performed
+ if not username:
+ return True
+
+ webpage, urlh = self._download_webpage_handle(
+ 'https://njpwworld.com/auth/login', None,
+ note='Logging in', errnote='Unable to login',
+ data=urlencode_postdata({'login_id': username, 'pw': password}))
+ # /auth/login will return 302 for successful logins
+ if urlh.geturl() == 'https://njpwworld.com/auth/login':
+ self.report_warning('unable to login')
+ return False
+
+ return True
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = []
+ for player_url, kind in re.findall(r'<a[^>]+href="(/player[^"]+)".+?<img[^>]+src="[^"]+qf_btn_([^".]+)', webpage):
+ player_url = compat_urlparse.urljoin(url, player_url)
+
+ player_page = self._download_webpage(
+ player_url, video_id, note='Downloading player page')
+
+ entries = self._parse_html5_media_entries(
+ player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
+ m3u8_entry_protocol='m3u8_native',
+ preference=2 if 'hq' in kind else 1)
+ formats.extend(entries[0]['formats'])
+
+ self._sort_formats(formats)
+
+ post_content = get_element_by_class('post-content', webpage)
+ tags = re.findall(
+ r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content
+ ) if post_content else None
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'formats': formats,
+ 'tags': tags,
+ }
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 9624371..38fefe4 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -3,41 +3,27 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
from ..utils import (
+ determine_ext,
+ ExtractorError,
fix_xml_ampersands,
orderedSet,
parse_duration,
qualities,
strip_jsonp,
unified_strdate,
- ExtractorError,
)
class NPOBaseIE(InfoExtractor):
def _get_token(self, video_id):
- token_page = self._download_webpage(
- 'http://ida.omroep.nl/npoplayer/i.js',
- video_id, note='Downloading token')
- token = self._search_regex(
- r'npoplayer\.token = "(.+?)"', token_page, 'token')
- # Decryption algorithm extracted from http://npoplayer.omroep.nl/csjs/npoplayer-min.js
- token_l = list(token)
- first = second = None
- for i in range(5, len(token_l) - 4):
- if token_l[i].isdigit():
- if first is None:
- first = i
- elif second is None:
- second = i
- if first is None or second is None:
- first = 12
- second = 13
-
- token_l[first], token_l[second] = token_l[second], token_l[first]
-
- return ''.join(token_l)
+ return self._download_json(
+ 'http://ida.omroep.nl/app.php/auth', video_id,
+ note='Downloading token')['token']
class NPOIE(NPOBaseIE):
@@ -51,97 +37,120 @@ class NPOIE(NPOBaseIE):
(?:
npo\.nl/(?!live|radio)(?:[^/]+/){2}|
ntr\.nl/(?:[^/]+/){2,}|
- omroepwnl\.nl/video/fragment/[^/]+__
+ omroepwnl\.nl/video/fragment/[^/]+__|
+ zapp\.nl/[^/]+/[^/]+/
)
)
(?P<id>[^/?#]+)
'''
- _TESTS = [
- {
- 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
- 'md5': '4b3f9c429157ec4775f2c9cb7b911016',
- 'info_dict': {
- 'id': 'VPWON_1220719',
- 'ext': 'm4v',
- 'title': 'Nieuwsuur',
- 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
- 'upload_date': '20140622',
- },
+ _TESTS = [{
+ 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
+ 'md5': '4b3f9c429157ec4775f2c9cb7b911016',
+ 'info_dict': {
+ 'id': 'VPWON_1220719',
+ 'ext': 'm4v',
+ 'title': 'Nieuwsuur',
+ 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
+ 'upload_date': '20140622',
},
- {
- 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
- 'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
- 'info_dict': {
- 'id': 'VARA_101191800',
- 'ext': 'm4v',
- 'title': 'De Mega Mike & Mega Thomas show: The best of.',
- 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
- 'upload_date': '20090227',
- 'duration': 2400,
- },
+ }, {
+ 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
+ 'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
+ 'info_dict': {
+ 'id': 'VARA_101191800',
+ 'ext': 'm4v',
+ 'title': 'De Mega Mike & Mega Thomas show: The best of.',
+ 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
+ 'upload_date': '20090227',
+ 'duration': 2400,
},
- {
- 'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
- 'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
- 'info_dict': {
- 'id': 'VPWON_1169289',
- 'ext': 'm4v',
- 'title': 'Tegenlicht: De toekomst komt uit Afrika',
- 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
- 'upload_date': '20130225',
- 'duration': 3000,
- },
+ }, {
+ 'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
+ 'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
+ 'info_dict': {
+ 'id': 'VPWON_1169289',
+ 'ext': 'm4v',
+ 'title': 'Tegenlicht: Zwart geld. De toekomst komt uit Afrika',
+ 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
+ 'upload_date': '20130225',
+ 'duration': 3000,
},
- {
- 'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
- 'info_dict': {
- 'id': 'WO_VPRO_043706',
- 'ext': 'wmv',
- 'title': 'De nieuwe mens - Deel 1',
- 'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
- 'duration': 4680,
- },
- 'params': {
- # mplayer mms download
- 'skip_download': True,
- }
+ }, {
+ 'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
+ 'info_dict': {
+ 'id': 'WO_VPRO_043706',
+ 'ext': 'm4v',
+ 'title': 'De nieuwe mens - Deel 1',
+ 'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
+ 'duration': 4680,
},
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
# non asf in streams
- {
- 'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771',
- 'md5': 'b3da13de374cbe2d5332a7e910bef97f',
- 'info_dict': {
- 'id': 'WO_NOS_762771',
- 'ext': 'mp4',
- 'title': 'Hoe gaat Europa verder na Parijs?',
- },
+ 'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771',
+ 'info_dict': {
+ 'id': 'WO_NOS_762771',
+ 'ext': 'mp4',
+ 'title': 'Hoe gaat Europa verder na Parijs?',
},
- {
- 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
- 'md5': '01c6a2841675995da1f0cf776f03a9c3',
- 'info_dict': {
- 'id': 'VPWON_1233944',
- 'ext': 'm4v',
- 'title': 'Aap, poot, pies',
- 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde',
- 'upload_date': '20150508',
- 'duration': 599,
- },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
+ 'info_dict': {
+ 'id': 'VPWON_1233944',
+ 'ext': 'm4v',
+ 'title': 'Aap, poot, pies',
+ 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde',
+ 'upload_date': '20150508',
+ 'duration': 599,
},
- {
- 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
- 'md5': 'd30cd8417b8b9bca1fdff27428860d08',
- 'info_dict': {
- 'id': 'POW_00996502',
- 'ext': 'm4v',
- 'title': '''"Dit is wel een 'landslide'..."''',
- 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
- 'upload_date': '20150508',
- 'duration': 462,
- },
+ 'params': {
+ 'skip_download': True,
}
- ]
+ }, {
+ 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
+ 'info_dict': {
+ 'id': 'POW_00996502',
+ 'ext': 'm4v',
+ 'title': '''"Dit is wel een 'landslide'..."''',
+ 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
+ 'upload_date': '20150508',
+ 'duration': 462,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # audio
+ 'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437',
+ 'info_dict': {
+ 'id': 'RBX_FUNX_6683215',
+ 'ext': 'mp3',
+ 'title': 'Jouw Stad Rotterdam',
+ 'description': 'md5:db251505244f097717ec59fabc372d9f',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
+ 'only_matching': True,
+ }, {
+ # live stream
+ 'url': 'npo:LI_NL1_4188102',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -170,70 +179,115 @@ class NPOIE(NPOBaseIE):
token = self._get_token(video_id)
formats = []
+ urls = set()
+
+ quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
+ items = self._download_json(
+ 'http://ida.omroep.nl/app.php/%s' % video_id, video_id,
+ 'Downloading formats JSON', query={
+ 'adaptive': 'yes',
+ 'token': token,
+ })['items'][0]
+ for num, item in enumerate(items):
+ item_url = item.get('url')
+ if not item_url or item_url in urls:
+ continue
+ urls.add(item_url)
+ format_id = self._search_regex(
+ r'video/ida/([^/]+)', item_url, 'format id',
+ default=None)
+
+ def add_format_url(format_url):
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ })
+
+ # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
+ if item.get('contentType') in ('url', 'audio'):
+ add_format_url(item_url)
+ continue
- pubopties = metadata.get('pubopties')
- if pubopties:
- quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
- for format_id in pubopties:
- format_info = self._download_json(
- 'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s'
- % (video_id, format_id, token),
- video_id, 'Downloading %s JSON' % format_id)
- if format_info.get('error_code', 0) or format_info.get('errorcode', 0):
+ try:
+ stream_info = self._download_json(
+ item_url + '&type=json', video_id,
+ 'Downloading %s stream JSON'
+ % item.get('label') or item.get('format') or format_id or num)
+ except ExtractorError as ee:
+ if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+ error = (self._parse_json(
+ ee.cause.read().decode(), video_id,
+ fatal=False) or {}).get('errorstring')
+ if error:
+ raise ExtractorError(error, expected=True)
+ raise
+ # Stream URL instead of JSON, example: npo:LI_NL1_4188102
+ if isinstance(stream_info, compat_str):
+ if not stream_info.startswith('http'):
continue
- streams = format_info.get('streams')
- if streams:
- try:
- video_info = self._download_json(
- streams[0] + '&type=json',
- video_id, 'Downloading %s stream JSON' % format_id)
- except ExtractorError as ee:
- if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
- error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring')
- if error:
- raise ExtractorError(error, expected=True)
- raise
- else:
- video_info = format_info
- video_url = video_info.get('url')
- if not video_url:
+ video_url = stream_info
+ # JSON
+ else:
+ video_url = stream_info.get('url')
+ if not video_url or video_url in urls:
+ continue
+ urls.add(item_url)
+ if determine_ext(video_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+ else:
+ add_format_url(video_url)
+
+ is_live = metadata.get('medium') == 'live'
+
+ if not is_live:
+ for num, stream in enumerate(metadata.get('streams', [])):
+ stream_url = stream.get('url')
+ if not stream_url or stream_url in urls:
continue
- if format_id == 'adaptive':
- formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
- else:
+ urls.add(stream_url)
+ # smooth streaming is not supported
+ stream_type = stream.get('type', '').lower()
+ if stream_type in ['ss', 'ms']:
+ continue
+ if stream_type == 'hds':
+ f4m_formats = self._extract_f4m_formats(
+ stream_url, video_id, fatal=False)
+ # f4m downloader downloads only piece of live stream
+ for f4m_format in f4m_formats:
+ f4m_format['preference'] = -1
+ formats.extend(f4m_formats)
+ elif stream_type == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, ext='mp4', fatal=False))
+ # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
+ elif '.asf' in stream_url:
+ asx = self._download_xml(
+ stream_url, video_id,
+ 'Downloading stream %d ASX playlist' % num,
+ transform_source=fix_xml_ampersands, fatal=False)
+ if not asx:
+ continue
+ ref = asx.find('./ENTRY/Ref')
+ if ref is None:
+ continue
+ video_url = ref.get('href')
+ if not video_url or video_url in urls:
+ continue
+ urls.add(video_url)
formats.append({
'url': video_url,
- 'format_id': format_id,
- 'quality': quality(format_id),
+ 'ext': stream.get('formaat', 'asf'),
+ 'quality': stream.get('kwaliteit'),
+ 'preference': -10,
})
-
- streams = metadata.get('streams')
- if streams:
- for i, stream in enumerate(streams):
- stream_url = stream.get('url')
- if not stream_url:
- continue
- if '.asf' not in stream_url:
+ else:
formats.append({
'url': stream_url,
'quality': stream.get('kwaliteit'),
})
- continue
- asx = self._download_xml(
- stream_url, video_id,
- 'Downloading stream %d ASX playlist' % i,
- transform_source=fix_xml_ampersands)
- ref = asx.find('./ENTRY/Ref')
- if ref is None:
- continue
- video_url = ref.get('href')
- if not video_url:
- continue
- formats.append({
- 'url': video_url,
- 'ext': stream.get('formaat', 'asf'),
- 'quality': stream.get('kwaliteit'),
- })
self._sort_formats(formats)
@@ -246,28 +300,28 @@ class NPOIE(NPOBaseIE):
return {
'id': video_id,
- 'title': title,
+ 'title': self._live_title(title) if is_live else title,
'description': metadata.get('info'),
'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
'upload_date': unified_strdate(metadata.get('gidsdatum')),
'duration': parse_duration(metadata.get('tijdsduur')),
'formats': formats,
'subtitles': subtitles,
+ 'is_live': is_live,
}
class NPOLiveIE(NPOBaseIE):
IE_NAME = 'npo.nl:live'
- _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://www.npo.nl/live/npo-1',
'info_dict': {
- 'id': 'LI_NEDERLAND1_136692',
+ 'id': 'LI_NL1_4188102',
'display_id': 'npo-1',
'ext': 'mp4',
- 'title': 're:^Nederland 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'description': 'Livestream',
+ 'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'is_live': True,
},
'params': {
@@ -283,58 +337,12 @@ class NPOLiveIE(NPOBaseIE):
live_id = self._search_regex(
r'data-prid="([^"]+)"', webpage, 'live id')
- metadata = self._download_json(
- 'http://e.omroep.nl/metadata/%s' % live_id,
- display_id, transform_source=strip_jsonp)
-
- token = self._get_token(display_id)
-
- formats = []
-
- streams = metadata.get('streams')
- if streams:
- for stream in streams:
- stream_type = stream.get('type').lower()
- # smooth streaming is not supported
- if stream_type in ['ss', 'ms']:
- continue
- stream_info = self._download_json(
- 'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
- % (stream.get('url'), token),
- display_id, 'Downloading %s JSON' % stream_type)
- if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0):
- continue
- stream_url = self._download_json(
- stream_info['stream'], display_id,
- 'Downloading %s URL' % stream_type,
- 'Unable to download %s URL' % stream_type,
- transform_source=strip_jsonp, fatal=False)
- if not stream_url:
- continue
- if stream_type == 'hds':
- f4m_formats = self._extract_f4m_formats(stream_url, display_id)
- # f4m downloader downloads only piece of live stream
- for f4m_format in f4m_formats:
- f4m_format['preference'] = -1
- formats.extend(f4m_formats)
- elif stream_type == 'hls':
- formats.extend(self._extract_m3u8_formats(stream_url, display_id, 'mp4'))
- else:
- formats.append({
- 'url': stream_url,
- 'preference': -10,
- })
-
- self._sort_formats(formats)
-
return {
+ '_type': 'url_transparent',
+ 'url': 'npo:%s' % live_id,
+ 'ie_key': NPOIE.ie_key(),
'id': live_id,
'display_id': display_id,
- 'title': self._live_title(metadata['titel']),
- 'description': metadata['info'],
- 'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
- 'formats': formats,
- 'is_live': True,
}
@@ -416,7 +424,21 @@ class NPORadioFragmentIE(InfoExtractor):
}
-class SchoolTVIE(InfoExtractor):
+class NPODataMidEmbedIE(InfoExtractor):
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id')
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'NPO',
+ 'url': 'npo:%s' % video_id,
+ 'display_id': display_id
+ }
+
+
+class SchoolTVIE(NPODataMidEmbedIE):
IE_NAME = 'schooltv'
_VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)'
@@ -435,17 +457,25 @@ class SchoolTVIE(InfoExtractor):
}
}
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- video_id = self._search_regex(
- r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id')
- return {
- '_type': 'url_transparent',
- 'ie_key': 'NPO',
- 'url': 'npo:%s' % video_id,
- 'display_id': display_id
+
+class HetKlokhuisIE(NPODataMidEmbedIE):
+ IE_NAME = 'hetklokhuis'
+ _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
+
+ _TEST = {
+ 'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',
+ 'info_dict': {
+ 'id': 'VPWON_1260528',
+ 'display_id': 'Zwaartekrachtsgolven',
+ 'ext': 'm4v',
+ 'title': 'Het Klokhuis: Zwaartekrachtsgolven',
+ 'description': 'md5:c94f31fb930d76c2efa4a4a71651dd48',
+ 'upload_date': '20170223',
+ },
+ 'params': {
+ 'skip_download': True
}
+ }
class NPOPlaylistBaseIE(NPOIE):
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
index fc7ff43..25f6a9a 100644
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@@ -75,22 +75,37 @@ class OpenloadIE(InfoExtractor):
'<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
webpage, 'openload ID')
- first_char = int(ol_id[0])
- urlcode = []
- num = 1
-
- while num < len(ol_id):
- i = ord(ol_id[num])
- key = 0
- if i <= 90:
- key = i - 65
- elif i >= 97:
- key = 25 + i - 97
- urlcode.append((key, compat_chr(int(ol_id[num + 2:num + 5]) // int(ol_id[num + 1]) - first_char)))
- num += 5
-
- video_url = 'https://openload.co/stream/' + ''.join(
- [value for _, value in sorted(urlcode, key=lambda x: x[0])])
+ video_url_chars = []
+
+ first_char = ord(ol_id[0])
+ key = first_char - 55
+ maxKey = max(2, key)
+ key = min(maxKey, len(ol_id) - 14)
+ t = ol_id[key:key + 12]
+
+ hashMap = {}
+ v = ol_id.replace(t, "")
+ h = 0
+
+ while h < len(t):
+ f = t[h:h + 2]
+ i = int(f, 16)
+ hashMap[h / 2] = i
+ h += 2
+
+ h = 0
+
+ while h < len(v):
+ B = v[h:h + 2]
+ i = int(B, 16)
+ index = (h / 2) % 6
+ A = hashMap[index]
+ i = i ^ A
+ video_url_chars.append(compat_chr(i))
+ h += 2
+
+ video_url = 'https://openload.co/stream/%s?mime=true'
+ video_url = video_url % (''.join(video_url_chars))
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py
new file mode 100644
index 0000000..5c73d5b
--- /dev/null
+++ b/youtube_dl/extractor/redbulltv.py
@@ -0,0 +1,106 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class RedBullTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film)/(?P<id>AP-\w+)'
+ _TESTS = [{
+ # film
+ 'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
+ 'md5': '78e860f631d7a846e712fab8c5fe2c38',
+ 'info_dict': {
+ 'id': 'AP-1Q756YYX51W11',
+ 'ext': 'mp4',
+ 'title': 'ABC of...WRC',
+ 'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31',
+ 'duration': 1582.04,
+ 'timestamp': 1488405786,
+ 'upload_date': '20170301',
+ },
+ }, {
+ # episode
+ 'url': 'https://www.redbull.tv/video/AP-1PMT5JCWH1W11/grime?playlist=shows:shows-playall:web',
+ 'info_dict': {
+ 'id': 'AP-1PMT5JCWH1W11',
+ 'ext': 'mp4',
+ 'title': 'Grime - Hashtags S2 E4',
+ 'description': 'md5:334b741c8c1ce65be057eab6773c1cf5',
+ 'duration': 904.6,
+ 'timestamp': 1487290093,
+ 'upload_date': '20170217',
+ 'series': 'Hashtags',
+ 'season_number': 2,
+ 'episode_number': 4,
+ },
+ }, {
+ 'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ access_token = self._download_json(
+ 'https://api-v2.redbull.tv/start', video_id,
+ note='Downloading access token', query={
+ 'build': '4.0.9',
+ 'category': 'smartphone',
+ 'os_version': 23,
+ 'os_family': 'android',
+ })['auth']['access_token']
+
+ info = self._download_json(
+ 'https://api-v2.redbull.tv/views/%s' % video_id,
+ video_id, note='Downloading video information',
+ headers={'Authorization': 'Bearer ' + access_token}
+ )['blocks'][0]['top'][0]
+
+ video = info['video_product']
+
+ title = info['title'].strip()
+ m3u8_url = video['url']
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ subtitles = {}
+ for _, captions in (try_get(
+ video, lambda x: x['attachments']['captions'],
+ dict) or {}).items():
+ if not captions or not isinstance(captions, list):
+ continue
+ for caption in captions:
+ caption_url = caption.get('url')
+ if not caption_url:
+ continue
+ subtitles.setdefault(caption.get('lang') or 'en', []).append({
+ 'url': caption_url,
+ 'ext': caption.get('format'),
+ })
+
+ subheading = info.get('subheading')
+ if subheading:
+ title += ' - %s' % subheading
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': info.get('long_description') or info.get(
+ 'short_description'),
+ 'duration': float_or_none(video.get('duration'), scale=1000),
+ 'timestamp': unified_timestamp(info.get('published')),
+ 'series': info.get('show_title'),
+ 'season_number': int_or_none(info.get('season_number')),
+ 'episode_number': int_or_none(info.get('episode_number')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py
index fd1df92..889fa76 100644
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -17,7 +17,7 @@ from ..utils import (
class RutubeIE(InfoExtractor):
IE_NAME = 'rutube'
IE_DESC = 'Rutube videos'
- _VALID_URL = r'https?://rutube\.ru/(?:video|play/embed)/(?P<id>[\da-z]{32})'
+ _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
_TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
@@ -39,8 +39,17 @@ class RutubeIE(InfoExtractor):
}, {
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True,
+ }, {
+ 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
+ 'only_matching': True,
}]
+ @staticmethod
+ def _extract_urls(webpage):
+ return [mobj.group('url') for mobj in re.finditer(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1',
+ webpage)]
+
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py
index 20d0175..6c09df2 100644
--- a/youtube_dl/extractor/ruutu.py
+++ b/youtube_dl/extractor/ruutu.py
@@ -82,6 +82,9 @@ class RuutuIE(InfoExtractor):
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id='hds', fatal=False))
elif ext == 'mpd':
+ # video-only and audio-only streams are of different
+ # duration resulting in out of sync issue
+ continue
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
else:
diff --git a/youtube_dl/extractor/scivee.py b/youtube_dl/extractor/scivee.py
deleted file mode 100644
index b1ca12f..0000000
--- a/youtube_dl/extractor/scivee.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class SciVeeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?scivee\.tv/node/(?P<id>\d+)'
-
- _TEST = {
- 'url': 'http://www.scivee.tv/node/62352',
- 'md5': 'b16699b74c9e6a120f6772a44960304f',
- 'info_dict': {
- 'id': '62352',
- 'ext': 'mp4',
- 'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting',
- 'description': 'md5:81f1710638e11a481358fab1b11059d7',
- },
- 'skip': 'Not accessible from Travis CI server',
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- # annotations XML is malformed
- annotations = self._download_webpage(
- 'http://www.scivee.tv/assets/annotations/%s' % video_id, video_id, 'Downloading annotations')
-
- title = self._html_search_regex(r'<title>([^<]+)</title>', annotations, 'title')
- description = self._html_search_regex(r'<abstract>([^<]+)</abstract>', annotations, 'abstract', fatal=False)
- filesize = int_or_none(self._html_search_regex(
- r'<filesize>([^<]+)</filesize>', annotations, 'filesize', fatal=False))
-
- formats = [
- {
- 'url': 'http://www.scivee.tv/assets/audio/%s' % video_id,
- 'ext': 'mp3',
- 'format_id': 'audio',
- },
- {
- 'url': 'http://www.scivee.tv/assets/video/%s' % video_id,
- 'ext': 'mp4',
- 'format_id': 'video',
- 'filesize': filesize,
- },
- ]
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index b3aa4ce..0ee4a8f 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -121,7 +121,7 @@ class SoundcloudIE(InfoExtractor):
},
]
- _CLIENT_ID = 'fDoItMDbsbZz8dY16ZzARCZmzgHBPotA'
+ _CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@staticmethod
diff --git a/youtube_dl/extractor/tunepk.py b/youtube_dl/extractor/tunepk.py
new file mode 100644
index 0000000..9d42651
--- /dev/null
+++ b/youtube_dl/extractor/tunepk.py
@@ -0,0 +1,90 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class TunePkIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?tune\.pk/(?:video/|player/embed_player.php?.*?\bvid=)|
+ embed\.tune\.pk/play/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://tune.pk/video/6919541/maudie-2017-international-trailer-1-ft-ethan-hawke-sally-hawkins',
+ 'md5': '0c537163b7f6f97da3c5dd1e3ef6dd55',
+ 'info_dict': {
+ 'id': '6919541',
+ 'ext': 'mp4',
+ 'title': 'Maudie (2017) | International Trailer # 1 ft Ethan Hawke, Sally Hawkins',
+ 'description': 'md5:eb5a04114fafef5cec90799a93a2d09c',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1487327564,
+ 'upload_date': '20170217',
+ 'uploader': 'Movie Trailers',
+ 'duration': 107,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://tune.pk/player/embed_player.php?vid=6919541&folder=2017/02/17/&width=600&height=350&autoplay=no',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.tune.pk/play/6919541?autoplay=no&ssl=yes&inline=true',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://tune.pk/video/%s' % video_id, video_id)
+
+ details = self._parse_json(
+ self._search_regex(
+ r'new\s+TunePlayer\(({.+?})\)\s*;\s*\n', webpage, 'tune player'),
+ video_id)['details']
+
+ video = details['video']
+ title = video.get('title') or self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'title', webpage, 'title', fatal=True)
+
+ formats = self._parse_jwplayer_formats(
+ details['player']['sources'], video_id)
+ self._sort_formats(formats)
+
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'description', webpage, 'description')
+
+ thumbnail = video.get('thumb') or self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_meta(
+ 'thumbnail', webpage, 'thumbnail')
+
+ timestamp = unified_timestamp(video.get('date_added'))
+ uploader = try_get(
+ video, lambda x: x['uploader']['name'],
+ compat_str) or self._html_search_meta('author', webpage, 'author')
+
+ duration = int_or_none(video.get('duration'))
+ view_count = int_or_none(video.get('views'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py
index f3817ab..3475ef4 100644
--- a/youtube_dl/extractor/tvigle.py
+++ b/youtube_dl/extractor/tvigle.py
@@ -17,6 +17,9 @@ class TvigleIE(InfoExtractor):
IE_DESC = 'Интернет-телевидение Tvigle.ru'
_VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))'
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['RU']
+
_TESTS = [
{
'url': 'http://www.tvigle.ru/video/sokrat/',
@@ -72,8 +75,13 @@ class TvigleIE(InfoExtractor):
error_message = item.get('errorMessage')
if not videos and error_message:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
+ if item.get('isGeoBlocked') is True:
+ self.raise_geo_restricted(
+ msg=error_message, countries=self._GEO_COUNTRIES)
+ else:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error_message),
+ expected=True)
title = item['title']
description = item.get('description')
diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py
index f3541b6..7af1165 100644
--- a/youtube_dl/extractor/twentyfourvideo.py
+++ b/youtube_dl/extractor/twentyfourvideo.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
@@ -12,7 +14,7 @@ from ..utils import (
class TwentyFourVideoIE(InfoExtractor):
IE_NAME = '24video'
- _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex|tube)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.24video.net/video/view/1044982',
@@ -43,10 +45,12 @@ class TwentyFourVideoIE(InfoExtractor):
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ host = mobj.group('host')
webpage = self._download_webpage(
- 'http://www.24video.sex/video/view/%s' % video_id, video_id)
+ 'http://%s/video/view/%s' % (host, video_id), video_id)
title = self._og_search_title(webpage)
description = self._html_search_regex(
@@ -72,11 +76,11 @@ class TwentyFourVideoIE(InfoExtractor):
# Sets some cookies
self._download_xml(
- r'http://www.24video.sex/video/xml/%s?mode=init' % video_id,
+ r'http://%s/video/xml/%s?mode=init' % (host, video_id),
video_id, 'Downloading init XML')
video_xml = self._download_xml(
- 'http://www.24video.sex/video/xml/%s?mode=play' % video_id,
+ 'http://%s/video/xml/%s?mode=play' % (host, video_id),
video_id, 'Downloading video XML')
video = xpath_element(video_xml, './/video', 'video', fatal=True)
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index bbba394..ed36336 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -12,7 +12,6 @@ from ..compat import (
compat_str,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
- compat_urlparse,
)
from ..utils import (
clean_html,
@@ -24,6 +23,7 @@ from ..utils import (
parse_iso8601,
update_url_query,
urlencode_postdata,
+ urljoin,
)
@@ -32,7 +32,7 @@ class TwitchBaseIE(InfoExtractor):
_API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'https://usher.ttvnw.net'
- _LOGIN_URL = 'http://www.twitch.tv/login'
+ _LOGIN_URL = 'https://www.twitch.tv/login'
_CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6'
_NETRC_MACHINE = 'twitch'
@@ -64,6 +64,35 @@ class TwitchBaseIE(InfoExtractor):
raise ExtractorError(
'Unable to login. Twitch said: %s' % message, expected=True)
+ def login_step(page, urlh, note, data):
+ form = self._hidden_inputs(page)
+ form.update(data)
+
+ page_url = urlh.geturl()
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
+ 'post url', default=page_url, group='url')
+ post_url = urljoin(page_url, post_url)
+
+ headers = {'Referer': page_url}
+
+ try:
+ response = self._download_json(
+ post_url, None, note,
+ data=urlencode_postdata(form),
+ headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ response = self._parse_json(
+ e.cause.read().decode('utf-8'), None)
+ fail(response['message'])
+ raise
+
+ redirect_url = urljoin(post_url, response['redirect'])
+ return self._download_webpage_handle(
+ redirect_url, None, 'Downloading login redirect page',
+ headers=headers)
+
login_page, handle = self._download_webpage_handle(
self._LOGIN_URL, None, 'Downloading login page')
@@ -71,40 +100,19 @@ class TwitchBaseIE(InfoExtractor):
if 'blacklist_message' in login_page:
fail(clean_html(login_page))
- login_form = self._hidden_inputs(login_page)
-
- login_form.update({
- 'username': username,
- 'password': password,
+ redirect_page, handle = login_step(
+ login_page, handle, 'Logging in as %s' % username, {
+ 'username': username,
+ 'password': password,
})
- redirect_url = handle.geturl()
-
- post_url = self._search_regex(
- r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
- 'post url', default=redirect_url, group='url')
-
- if not post_url.startswith('http'):
- post_url = compat_urlparse.urljoin(redirect_url, post_url)
-
- headers = {'Referer': redirect_url}
-
- try:
- response = self._download_json(
- post_url, None, 'Logging in as %s' % username,
- data=urlencode_postdata(login_form),
- headers=headers)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
- response = self._parse_json(
- e.cause.read().decode('utf-8'), None)
- fail(response['message'])
- raise
-
- if response.get('redirect'):
- self._download_webpage(
- response['redirect'], None, 'Downloading login redirect page',
- headers=headers)
+ if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
+ # TODO: Add mechanism to request an SMS or phone call
+ tfa_token = self._get_tfa_info('two-factor authentication token')
+ login_step(redirect_page, handle, 'Submitting TFA token', {
+ 'authy_token': tfa_token,
+ 'remember_2fa': 'true',
+ })
def _prefer_source(self, formats):
try:
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index c4e37f6..9aa38bc 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -17,12 +17,12 @@ from ..utils import (
class VevoBaseIE(InfoExtractor):
- def _extract_json(self, webpage, video_id, item):
+ def _extract_json(self, webpage, video_id):
return self._parse_json(
self._search_regex(
r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
webpage, 'initial store'),
- video_id)['default'][item]
+ video_id)
class VevoIE(VevoBaseIE):
@@ -139,6 +139,11 @@ class VevoIE(VevoBaseIE):
# no genres available
'url': 'http://www.vevo.com/watch/INS171400764',
'only_matching': True,
+ }, {
+ # Another case available only via the webpage; using streams/streamsV3 formats
+ # Geo-restricted to Netherlands/Germany
+ 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
+ 'only_matching': True,
}]
_VERSIONS = {
0: 'youtube', # only in AuthenticateVideo videoVersions
@@ -193,7 +198,14 @@ class VevoIE(VevoBaseIE):
# https://github.com/rg3/youtube-dl/issues/9366)
if not video_versions:
webpage = self._download_webpage(url, video_id)
- video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0]
+ json_data = self._extract_json(webpage, video_id)
+ if 'streams' in json_data.get('default', {}):
+ video_versions = json_data['default']['streams'][video_id][0]
+ else:
+ video_versions = [
+ value
+ for key, value in json_data['apollo']['data'].items()
+ if key.startswith('%s.streams' % video_id)]
uploader = None
artist = None
@@ -207,7 +219,7 @@ class VevoIE(VevoBaseIE):
formats = []
for video_version in video_versions:
- version = self._VERSIONS.get(video_version['version'])
+ version = self._VERSIONS.get(video_version.get('version'), 'generic')
version_url = video_version.get('url')
if not version_url:
continue
@@ -339,7 +351,7 @@ class VevoPlaylistIE(VevoBaseIE):
if video_id:
return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
- playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind)
+ playlists = self._extract_json(webpage, playlist_id)['default']['%ss' % playlist_kind]
playlist = (list(playlists.values())[0]
if playlist_kind == 'playlist' else playlists[playlist_id])
diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py
index d26fb49..5086f59 100644
--- a/youtube_dl/extractor/vier.py
+++ b/youtube_dl/extractor/vier.py
@@ -9,7 +9,7 @@ from .common import InfoExtractor
class VierIE(InfoExtractor):
IE_NAME = 'vier'
- _VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
_TESTS = [{
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
'info_dict': {
@@ -24,6 +24,19 @@ class VierIE(InfoExtractor):
'skip_download': True,
},
}, {
+ 'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614',
+ 'info_dict': {
+ 'id': '2561614',
+ 'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas',
+ 'ext': 'mp4',
+ 'title': 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s',
+ 'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen',
'only_matching': True,
}, {
@@ -35,6 +48,7 @@ class VierIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
embed_id = mobj.group('embed_id')
display_id = mobj.group('display_id') or embed_id
+ site = mobj.group('site')
webpage = self._download_webpage(url, display_id)
@@ -43,7 +57,7 @@ class VierIE(InfoExtractor):
webpage, 'video id')
application = self._search_regex(
[r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
- webpage, 'application', default='vier_vod')
+ webpage, 'application', default=site + '_vod')
filename = self._search_regex(
[r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
webpage, 'filename')
@@ -68,7 +82,7 @@ class VierIE(InfoExtractor):
class VierVideosIE(InfoExtractor):
IE_NAME = 'vier:videos'
- _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
_TESTS = [{
'url': 'http://www.vier.be/demoestuin/videos',
'info_dict': {
@@ -76,6 +90,12 @@ class VierVideosIE(InfoExtractor):
},
'playlist_mincount': 153,
}, {
+ 'url': 'http://www.vijf.be/temptationisland/videos',
+ 'info_dict': {
+ 'id': 'temptationisland',
+ },
+ 'playlist_mincount': 159,
+ }, {
'url': 'http://www.vier.be/demoestuin/videos?page=6',
'info_dict': {
'id': 'demoestuin-page6',
@@ -92,6 +112,7 @@ class VierVideosIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
program = mobj.group('program')
+ site = mobj.group('site')
page_id = mobj.group('page')
if page_id:
@@ -105,13 +126,13 @@ class VierVideosIE(InfoExtractor):
entries = []
for current_page_id in itertools.count(start_page):
current_page = self._download_webpage(
- 'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
+ 'http://www.%s.be/%s/videos?page=%d' % (site, program, current_page_id),
program,
'Downloading page %d' % (current_page_id + 1))
page_entries = [
- self.url_result('http://www.vier.be' + video_url, 'Vier')
+ self.url_result('http://www.' + site + '.be' + video_url, 'Vier')
for video_url in re.findall(
- r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
+ r'<h[23]><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
entries.extend(page_entries)
if page_id or '>Meer<' not in current_page:
break
diff --git a/youtube_dl/extractor/vrak.py b/youtube_dl/extractor/vrak.py
new file mode 100644
index 0000000..daa247c
--- /dev/null
+++ b/youtube_dl/extractor/vrak.py
@@ -0,0 +1,80 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
+from ..utils import (
+ int_or_none,
+ parse_age_limit,
+ smuggle_url,
+ unescapeHTML,
+)
+
+
+class VrakIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?\btarget=(?P<id>[\d.]+)'
+ _TEST = {
+ 'url': 'http://www.vrak.tv/videos?target=1.2306782&filtre=emission&id=1.1806721',
+ 'info_dict': {
+ 'id': '5345661243001',
+ 'ext': 'mp4',
+ 'title': 'Obésité, film de hockey et Roseline Filion',
+ 'timestamp': 1488492126,
+ 'upload_date': '20170302',
+ 'uploader_id': '2890187628001',
+ 'creator': 'VRAK.TV',
+ 'age_limit': 8,
+ 'series': 'ALT (Actualité Légèrement Tordue)',
+ 'episode': 'Obésité, film de hockey et Roseline Filion',
+ 'tags': list,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2890187628001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h\d\b[^>]+\bclass=["\']videoTitle["\'][^>]*>([^<]+)',
+ webpage, 'title', default=None) or self._og_search_title(webpage)
+
+ content = self._parse_json(
+ self._search_regex(
+ r'data-player-options-content=(["\'])(?P<content>{.+?})\1',
+ webpage, 'content', default='{}', group='content'),
+ video_id, transform_source=unescapeHTML)
+
+ ref_id = content.get('refId') or self._search_regex(
+ r'refId&quot;:&quot;([^&]+)&quot;', webpage, 'ref id')
+
+ brightcove_id = self._search_regex(
+ r'''(?x)
+ java\.lang\.String\s+value\s*=\s*["']brightcove\.article\.\d+\.%s
+ [^>]*
+ java\.lang\.String\s+value\s*=\s*["'](\d+)
+ ''' % re.escape(ref_id), webpage, 'brightcove id')
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': BrightcoveNewIE.ie_key(),
+ 'url': smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': ['CA']}),
+ 'id': brightcove_id,
+ 'description': content.get('description'),
+ 'creator': content.get('brand'),
+ 'age_limit': parse_age_limit(content.get('rating')),
+ 'series': content.get('showName') or content.get(
+ 'episodeName'), # this is intentional
+ 'season_number': int_or_none(content.get('seasonNumber')),
+ 'episode': title,
+ 'episode_number': int_or_none(content.get('episodeNumber')),
+ 'tags': content.get('tags', []),
+ }
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index 36a8c98..7b67037 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..utils import (
dict_get,
+ ExtractorError,
int_or_none,
parse_duration,
unified_strdate,
@@ -57,6 +58,10 @@ class XHamsterIE(InfoExtractor):
}, {
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
'only_matching': True,
+ }, {
+ # This video is visible for marcoalfa123456's friends only
+ 'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -78,6 +83,12 @@ class XHamsterIE(InfoExtractor):
mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
webpage = self._download_webpage(mrss_url, video_id)
+ error = self._html_search_regex(
+ r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
+ webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
title = self._html_search_regex(
[r'<h1[^>]*>([^<]+)</h1>',
r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"',
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index dec0280..caa0482 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -47,7 +47,6 @@ from ..utils import (
unsmuggle_url,
uppercase_escape,
urlencode_postdata,
- ISO3166Utils,
)
@@ -371,6 +370,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}
_SUBTITLE_FORMATS = ('ttml', 'vtt')
+ _GEO_BYPASS = False
+
IE_NAME = 'youtube'
_TESTS = [
{
@@ -917,7 +918,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# itag 212
'url': '1t24XAntNCY',
'only_matching': True,
- }
+ },
+ {
+ # geo restricted to JP
+ 'url': 'sJL6WA-aGkQ',
+ 'only_matching': True,
+ },
]
def __init__(self, *args, **kwargs):
@@ -1376,11 +1382,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if 'token' not in video_info:
if 'reason' in video_info:
if 'The uploader has not made this video available in your country.' in video_info['reason']:
- regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
- if regions_allowed:
- raise ExtractorError('YouTube said: This video is available in %s only' % (
- ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
- expected=True)
+ regions_allowed = self._html_search_meta(
+ 'regionsAllowed', video_webpage, default=None)
+ countries = regions_allowed.split(',') if regions_allowed else None
+ self.raise_geo_restricted(
+ msg=video_info['reason'][0], countries=countries)
raise ExtractorError(
'YouTube said: %s' % video_info['reason'][0],
expected=True, video_id=video_id)
@@ -1448,7 +1454,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
- raise ExtractorError('"rental" videos not supported')
+ raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
# Start extracting information
self.report_information_extraction(video_id)
@@ -1845,7 +1851,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
(?:
youtube\.com/
(?:
- (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
+ (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
\? (?:.*?[&;])*? (?:p|a|list)=
| p/
)|
@@ -1919,6 +1925,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
}
}, {
+ 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ 'playlist_mincount': 485,
+ 'info_dict': {
+ 'title': '2017 華語最新單曲 (2/24更新)',
+ 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ }
+ }, {
'note': 'Embedded SWF player',
'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
'playlist_count': 4,
@@ -2066,7 +2079,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
# Check if it's a video-specific URL
query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
video_id = query_dict.get('v', [None])[0] or self._search_regex(
- r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url,
+ r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
'video id', default=None)
if video_id:
if self._downloader.params.get('noplaylist'):
@@ -2226,7 +2239,7 @@ class YoutubeUserIE(YoutubeChannelIE):
'url': 'https://www.youtube.com/gametrailers',
'only_matching': True,
}, {
- # This channel is not available.
+ # This channel is not available, geo restricted to JP
'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
'only_matching': True,
}]
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 17b8379..d293c74 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -473,7 +473,8 @@ def timeconvert(timestr):
def sanitize_filename(s, restricted=False, is_id=False):
"""Sanitizes a string so it could be used as part of a filename.
If restricted is set, use a stricter subset of allowed characters.
- Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
+ Set is_id if this is not an arbitrary string, but an ID that should be kept
+ if possible.
"""
def replace_insane(char):
if restricted and char in ACCENT_CHARS:
@@ -1747,11 +1748,16 @@ def base_url(url):
def urljoin(base, path):
+ if isinstance(path, bytes):
+ path = path.decode('utf-8')
if not isinstance(path, compat_str) or not path:
return None
if re.match(r'^(?:https?:)?//', path):
return path
- if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base):
+ if isinstance(base, bytes):
+ base = base.decode('utf-8')
+ if not isinstance(base, compat_str) or not re.match(
+ r'^(?:https?:)?//', base):
return None
return compat_urlparse.urljoin(base, path)
@@ -3319,6 +3325,57 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
self, req, proxy, type)
+# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
+# released into Public Domain
+# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
+
+def long_to_bytes(n, blocksize=0):
+ """long_to_bytes(n:long, blocksize:int) : string
+ Convert a long integer to a byte string.
+
+ If optional blocksize is given and greater than zero, pad the front of the
+ byte string with binary zeros so that the length is a multiple of
+ blocksize.
+ """
+ # after much testing, this algorithm was deemed to be the fastest
+ s = b''
+ n = int(n)
+ while n > 0:
+ s = compat_struct_pack('>I', n & 0xffffffff) + s
+ n = n >> 32
+ # strip off leading zeros
+ for i in range(len(s)):
+ if s[i] != b'\000'[0]:
+ break
+ else:
+ # only happens when n == 0
+ s = b'\000'
+ i = 0
+ s = s[i:]
+ # add back some pad bytes. this could be done more efficiently w.r.t. the
+ # de-padding being done above, but sigh...
+ if blocksize > 0 and len(s) % blocksize:
+ s = (blocksize - len(s) % blocksize) * b'\000' + s
+ return s
+
+
+def bytes_to_long(s):
+ """bytes_to_long(string) : long
+ Convert a byte string to a long integer.
+
+ This is (essentially) the inverse of long_to_bytes().
+ """
+ acc = 0
+ length = len(s)
+ if length % 4:
+ extra = (4 - length % 4)
+ s = b'\000' * extra + s
+ length = length + extra
+ for i in range(0, length, 4):
+ acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
+ return acc
+
+
def ohdave_rsa_encrypt(data, exponent, modulus):
'''
Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
@@ -3336,6 +3393,21 @@ def ohdave_rsa_encrypt(data, exponent, modulus):
return '%x' % encrypted
+def pkcs1pad(data, length):
+ """
+ Padding input data with PKCS#1 scheme
+
+ @param {int[]} data input data
+ @param {int} length target length
+ @returns {int[]} padded data
+ """
+ if len(data) > length - 11:
+ raise ValueError('Input data too long for PKCS#1 padding')
+
+ pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
+ return [0, 2] + pseudo_random + [0] + data
+
+
def encode_base_n(num, n, table=None):
FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
if not table:
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index fe7462e..bd451bf 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2017.02.24.1'
+__version__ = '2017.03.07'