aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.github/ISSUE_TEMPLATE.md6
-rw-r--r--.travis.yml13
-rw-r--r--ChangeLog62
-rwxr-xr-xdevscripts/install_jython.sh5
-rw-r--r--docs/supportedsites.md10
-rw-r--r--test/test_InfoExtractor.py11
-rwxr-xr-xyoutube_dl/YoutubeDL.py12
-rw-r--r--youtube_dl/compat.py21
-rw-r--r--youtube_dl/downloader/hls.py3
-rw-r--r--youtube_dl/extractor/abc.py61
-rw-r--r--youtube_dl/extractor/acast.py23
-rw-r--r--youtube_dl/extractor/aws.py6
-rw-r--r--youtube_dl/extractor/bilibili.py8
-rw-r--r--youtube_dl/extractor/canalplus.py99
-rw-r--r--youtube_dl/extractor/collegerama.py93
-rw-r--r--youtube_dl/extractor/common.py21
-rw-r--r--youtube_dl/extractor/extractors.py12
-rw-r--r--youtube_dl/extractor/filmweb.py42
-rw-r--r--youtube_dl/extractor/generic.py29
-rw-r--r--youtube_dl/extractor/internazionale.py64
-rw-r--r--youtube_dl/extractor/jwplatform.py11
-rw-r--r--youtube_dl/extractor/lynda.py28
-rw-r--r--youtube_dl/extractor/mediasite.py214
-rw-r--r--youtube_dl/extractor/mitele.py30
-rw-r--r--youtube_dl/extractor/motherless.py86
-rw-r--r--youtube_dl/extractor/odnoklassniki.py26
-rw-r--r--youtube_dl/extractor/openload.py35
-rw-r--r--youtube_dl/extractor/playtvak.py20
-rw-r--r--youtube_dl/extractor/pluralsight.py4
-rw-r--r--youtube_dl/extractor/rtve.py3
-rw-r--r--youtube_dl/extractor/sandia.py65
-rw-r--r--youtube_dl/extractor/slutload.py14
-rw-r--r--youtube_dl/extractor/soundcloud.py21
-rw-r--r--youtube_dl/extractor/twentythreevideo.py77
-rw-r--r--youtube_dl/extractor/twitch.py9
-rw-r--r--youtube_dl/extractor/ufctv.py55
-rw-r--r--youtube_dl/extractor/vimeo.py11
-rw-r--r--youtube_dl/extractor/youku.py12
-rw-r--r--youtube_dl/utils.py15
-rw-r--r--youtube_dl/version.py2
40 files changed, 967 insertions, 372 deletions
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index d7a9123..ad52c89 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@
---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.12.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.12.23**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.07*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.07**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.12.23
+[debug] youtube-dl version 2018.01.07
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/.travis.yml b/.travis.yml
index 5f4f392..92f3268 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,16 +7,21 @@ python:
- "3.4"
- "3.5"
- "3.6"
+ - "pypy"
+ - "pypy3"
sudo: false
env:
- YTDL_TEST_SET=core
- YTDL_TEST_SET=download
matrix:
+ include:
+ - env: JYTHON=true; YTDL_TEST_SET=core
+ - env: JYTHON=true; YTDL_TEST_SET=download
fast_finish: true
allow_failures:
- env: YTDL_TEST_SET=download
+ - env: JYTHON=true; YTDL_TEST_SET=core
+ - env: JYTHON=true; YTDL_TEST_SET=download
+before_install:
+ - if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
script: ./devscripts/run_tests.sh
-notifications:
- email:
- - filippo.valsorda@gmail.com
- - yasoob.khld@gmail.com
diff --git a/ChangeLog b/ChangeLog
index 420a1bd..9d37cdc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,69 @@
version <unreleased>
Extractors
+* [bilibili] fix extraction (#15188)
+
+
+version 2018.01.07
+
+Core
+* [utils] Fix youtube-dl under PyPy3 on Windows
+* [YoutubeDL] Output python implementation in debug header
+
+Extractors
++ [jwplatform] Add support for multiple embeds (#15192)
+* [mitele] Fix extraction (#15186)
++ [motherless] Add support for groups (#15124)
+* [lynda] Relax URL regular expression (#15185)
+* [soundcloud] Fallback to avatar picture for thumbnail (#12878)
+* [youku] Fix list extraction (#15135)
+* [openload] Fix extraction (#15166)
+* [lynda] Skip invalid subtitles (#15159)
+* [twitch] Pass video id to url_result when extracting playlist (#15139)
+* [rtve.es:alacarta] Fix extraction of some new URLs
+* [acast] Fix extraction (#15147)
+
+
+version 2017.12.31
+
+Core
++ [extractor/common] Add container meta field for formats extracted
+ in _parse_mpd_formats (#13616)
++ [downloader/hls] Use HTTP headers for key request
+* [common] Use AACL as the default fourcc when AudioTag is 255
+* [extractor/common] Fix extraction of DASH formats with the same
+ representation id (#15111)
+
+Extractors
++ [slutload] Add support for mobile URLs (#14806)
+* [abc:iview] Bypass geo restriction
+* [abc:iview] Fix extraction (#14711, #14782, #14838, #14917, #14963, #14985,
+ #15035, #15057, #15061, #15071, #15095, #15106)
+* [openload] Fix extraction (#15118)
+- [sandia] Remove extractor
+- [collegerama] Remove extractor
++ [mediasite] Add support for sites based on Mediasite Video Platform (#5428,
+ #11185, #14343)
++ [ufctv] Add support for ufc.tv (#14520)
+* [pluralsight] Fix missing first line of subtitles (#11118)
+* [openload] Fallback on f-page extraction (#14665, #14879)
+* [vimeo] Improve password protected videos extraction (#15114)
+* [aws] Fix canonical/signed headers generation on python 2 (#15102)
+
+
+version 2017.12.28
+
+Extractors
++ [internazionale] Add support for internazionale.it (#14973)
+* [playtvak] Relax video regular expression and make description optional
+ (#15037)
++ [filmweb] Add support for filmweb.no (#8773, #10368)
++ [23video] Add support for 23video.com
++ [espn] Add support for fivethirtyeight.com (#6864)
++ [umg:de] Add support for universal-music.de (#11582, #11584)
++ [espn] Add support for espnfc and extract more formats (#8053)
* [youku] Update ccode (#14880)
++ [openload] Add support for oload.stream (#15070)
* [youku] Fix list extraction (#15065)
diff --git a/devscripts/install_jython.sh b/devscripts/install_jython.sh
new file mode 100755
index 0000000..bafca4d
--- /dev/null
+++ b/devscripts/install_jython.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+wget http://central.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar
+java -jar jython-installer-2.7.1.jar -s -d "$HOME/jython"
+$HOME/jython/bin/jython -m pip install nose
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index eac35e3..79b3430 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -3,6 +3,7 @@
- **1up.com**
- **20min**
- **220.ro**
+ - **23video**
- **24video**
- **3qsdn**: 3Q SDN
- **3sat**
@@ -170,7 +171,6 @@
- **CNN**
- **CNNArticle**
- **CNNBlogs**
- - **CollegeRama**
- **ComCarCoff**
- **ComedyCentral**
- **ComedyCentralFullEpisodes**
@@ -269,6 +269,8 @@
- **Fczenit**
- **filmon**
- **filmon:channel**
+ - **Filmweb**
+ - **FiveThirtyEight**
- **FiveTV**
- **Flickr**
- **Flipagram**
@@ -359,6 +361,7 @@
- **InfoQ**
- **Instagram**
- **instagram:user**: Instagram user profile
+ - **Internazionale**
- **InternetVideoArchive**
- **IPrima**
- **iqiyi**: 爱奇艺
@@ -445,6 +448,7 @@
- **media.ccc.de**
- **Medialaan**
- **Mediaset**
+ - **Mediasite**
- **Medici**
- **megaphone.fm**: megaphone.fm embedded players
- **Meipai**: 美拍
@@ -474,6 +478,7 @@
- **Moniker**: allmyvideos.net and vidspot.net
- **Morningstar**: morningstar.com
- **Motherless**
+ - **MotherlessGroup**
- **Motorsport**: motorsport.com
- **MovieClips**
- **MovieFap**
@@ -713,7 +718,6 @@
- **safari**: safaribooksonline.com online video
- **safari:api**
- **safari:course**: safaribooksonline.com online courses
- - **Sandia**: Sandia National Laboratories
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **SBS**: sbs.com.au
@@ -888,7 +892,9 @@
- **udemy**
- **udemy:course**
- **UDNEmbed**: 聯合影音
+ - **UFCTV**
- **UKTVPlay**
+ - **umg:de**: Universal Music Deutschland
- **Unistra**
- **Unity**
- **uol.com.br**
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 8a372d2..7b31d51 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -493,10 +493,21 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/13919
+ # Also tests duplicate representation ids, see
+ # https://github.com/rg3/youtube-dl/issues/15111
'float_duration',
'http://unknown/manifest.mpd',
[{
'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'm4a',
+ 'format_id': '318597',
+ 'format_note': 'DASH audio',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'none',
+ 'tbr': 61.587,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '318597',
'format_note': 'DASH video',
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index ace80f1..97bd9c5 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -2233,8 +2233,16 @@ class YoutubeDL(object):
sys.exc_clear()
except Exception:
pass
- self._write_string('[debug] Python version %s - %s\n' % (
- platform.python_version(), platform_name()))
+
+ def python_implementation():
+ impl_name = platform.python_implementation()
+ if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
+ return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
+ return impl_name
+
+ self._write_string('[debug] Python version %s (%s) - %s\n' % (
+ platform.python_version(), python_implementation(),
+ platform_name()))
exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version()
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 2a62248..41ca9ad 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -3,12 +3,14 @@ from __future__ import unicode_literals
import binascii
import collections
+import ctypes
import email
import getpass
import io
import itertools
import optparse
import os
+import platform
import re
import shlex
import shutil
@@ -2906,6 +2908,24 @@ except ImportError: # not 2.6+ or is 3.x
except ImportError:
compat_zip = zip
+if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
+ # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
+ # names, see the original PyPy issue [1] and the youtube-dl one [2].
+ # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
+ # 2. https://github.com/rg3/youtube-dl/pull/4392
+ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ real = ctypes.WINFUNCTYPE(*args, **kwargs)
+
+ def resf(tpl, *args, **kwargs):
+ funcname, dll = tpl
+ return real((str(funcname), dll), *args, **kwargs)
+
+ return resf
+else:
+ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ return ctypes.WINFUNCTYPE(*args, **kwargs)
+
+
__all__ = [
'compat_HTMLParseError',
'compat_HTMLParser',
@@ -2914,6 +2934,7 @@ __all__ = [
'compat_chr',
'compat_cookiejar',
'compat_cookies',
+ 'compat_ctypes_WINFUNCTYPE',
'compat_etree_fromstring',
'compat_etree_register_namespace',
'compat_expanduser',
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index 1a6e226..4dc3ab4 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -163,7 +163,8 @@ class HlsFD(FragmentFD):
return False
if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
- decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(decrypt_info['URI']).read()
+ decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
+ self._prepare_url(info_dict, decrypt_info['URI'])).read()
frag_content = AES.new(
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
self._append_fragment(ctx, frag_content)
diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py
index 60f753b..87017ed 100644
--- a/youtube_dl/extractor/abc.py
+++ b/youtube_dl/extractor/abc.py
@@ -1,6 +1,9 @@
from __future__ import unicode_literals
+import hashlib
+import hmac
import re
+import time
from .common import InfoExtractor
from ..compat import compat_str
@@ -10,6 +13,7 @@ from ..utils import (
int_or_none,
parse_iso8601,
try_get,
+ update_url_query,
)
@@ -101,21 +105,24 @@ class ABCIE(InfoExtractor):
class ABCIViewIE(InfoExtractor):
IE_NAME = 'abc.net.au:iview'
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
+ _GEO_COUNTRIES = ['AU']
# ABC iview programs are normally available for 14 days only.
_TESTS = [{
- 'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
+ 'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00',
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
'info_dict': {
- 'id': 'ZX9735A001S00',
+ 'id': 'ZW0898A003S00',
'ext': 'mp4',
- 'title': 'Diaries Of A Broken Mind',
- 'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
- 'upload_date': '20161010',
- 'uploader_id': 'abc2',
- 'timestamp': 1476064920,
+ 'title': 'Series 5 Ep 3',
+ 'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79',
+ 'upload_date': '20171228',
+ 'uploader_id': 'abc1',
+ 'timestamp': 1514499187,
+ },
+ 'params': {
+ 'skip_download': True,
},
- 'skip': 'Video gone',
}]
def _real_extract(self, url):
@@ -126,20 +133,30 @@ class ABCIViewIE(InfoExtractor):
title = video_params.get('title') or video_params['seriesTitle']
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
- format_urls = [
- try_get(stream, lambda x: x['hds-unmetered'], compat_str)]
-
- # May have higher quality video
- sd_url = try_get(
- stream, lambda x: x['streams']['hds']['sd'], compat_str)
- if sd_url:
- format_urls.append(sd_url.replace('metered', 'um'))
-
- formats = []
- for format_url in format_urls:
- if format_url:
- formats.extend(
- self._extract_akamai_formats(format_url, video_id))
+ house_number = video_params.get('episodeHouseNumber')
+ path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
+ int(time.time()), house_number)
+ sig = hmac.new(
+ 'android.content.res.Resources'.encode('utf-8'),
+ path.encode('utf-8'), hashlib.sha256).hexdigest()
+ token = self._download_webpage(
+ 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
+
+ def tokenize_url(url, token):
+ return update_url_query(url, {
+ 'hdnea': token,
+ })
+
+ for sd in ('sd', 'sd-low'):
+ sd_url = try_get(
+ stream, lambda x: x['streams']['hls'][sd], compat_str)
+ if not sd_url:
+ continue
+ formats = self._extract_m3u8_formats(
+ tokenize_url(sd_url, token), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+ if formats:
+ break
self._sort_formats(formats)
subtitles = {}
diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py
index 6dace30..5871e72 100644
--- a/youtube_dl/extractor/acast.py
+++ b/youtube_dl/extractor/acast.py
@@ -8,7 +8,7 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
- parse_iso8601,
+ unified_timestamp,
OnDemandPagedList,
)
@@ -32,7 +32,7 @@ class ACastIE(InfoExtractor):
}, {
# test with multiple blings
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
- 'md5': '55c0097badd7095f494c99a172f86501',
+ 'md5': 'e87d5b8516cd04c0d81b6ee1caca28d0',
'info_dict': {
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
'ext': 'mp3',
@@ -40,23 +40,24 @@ class ACastIE(InfoExtractor):
'timestamp': 1477346700,
'upload_date': '20161024',
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
- 'duration': 2797,
+ 'duration': 2766,
}
}]
def _real_extract(self, url):
channel, display_id = re.match(self._VALID_URL, url).groups()
cast_data = self._download_json(
- 'https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id)
+ 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)
+ e = cast_data['result']['episode']
return {
- 'id': compat_str(cast_data['id']),
+ 'id': compat_str(e['id']),
'display_id': display_id,
- 'url': [b['audio'] for b in cast_data['blings'] if b['type'] == 'BlingAudio'][0],
- 'title': cast_data['name'],
- 'description': cast_data.get('description'),
- 'thumbnail': cast_data.get('image'),
- 'timestamp': parse_iso8601(cast_data.get('publishingDate')),
- 'duration': int_or_none(cast_data.get('duration')),
+ 'url': e['mediaUrl'],
+ 'title': e['name'],
+ 'description': e.get('description'),
+ 'thumbnail': e.get('image'),
+ 'timestamp': unified_timestamp(e.get('publishingDate')),
+ 'duration': int_or_none(e.get('duration')),
}
diff --git a/youtube_dl/extractor/aws.py b/youtube_dl/extractor/aws.py
index 670abce..dccfeaf 100644
--- a/youtube_dl/extractor/aws.py
+++ b/youtube_dl/extractor/aws.py
@@ -21,11 +21,11 @@ class AWSIE(InfoExtractor):
'Accept': 'application/json',
'Host': self._AWS_PROXY_HOST,
'X-Amz-Date': amz_date,
+ 'X-Api-Key': self._AWS_API_KEY
}
session_token = aws_dict.get('session_token')
if session_token:
headers['X-Amz-Security-Token'] = session_token
- headers['X-Api-Key'] = self._AWS_API_KEY
def aws_hash(s):
return hashlib.sha256(s.encode('utf-8')).hexdigest()
@@ -33,9 +33,9 @@ class AWSIE(InfoExtractor):
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
canonical_querystring = compat_urllib_parse_urlencode(query)
canonical_headers = ''
- for header_name, header_value in headers.items():
+ for header_name, header_value in sorted(headers.items()):
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
- signed_headers = ';'.join([header.lower() for header in headers.keys()])
+ signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
canonical_request = '\n'.join([
'GET',
aws_dict['uri'],
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
index 1e57310..beffcec 100644
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -102,6 +102,7 @@ class BiliBiliIE(InfoExtractor):
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
headers = {
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'Referer': url
}
headers.update(self.geo_verification_headers())
@@ -116,10 +117,15 @@ class BiliBiliIE(InfoExtractor):
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
+ headers = {
+ 'Referer': url
+ }
+ headers.update(self.geo_verification_headers())
+
video_info = self._download_json(
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
video_id, note='Downloading video info page',
- headers=self.geo_verification_headers())
+ headers=headers)
if 'durl' not in video_info:
self._report_error(video_info)
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py
index d8bf073..51c11cb 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -4,59 +4,36 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlparse
from ..utils import (
- dict_get,
# ExtractorError,
# HEADRequest,
int_or_none,
qualities,
- remove_end,
unified_strdate,
)
class CanalplusIE(InfoExtractor):
- IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
- _VALID_URL = r'''(?x)
- https?://
- (?:
- (?:
- (?:(?:www|m)\.)?canalplus\.fr|
- (?:www\.)?piwiplus\.fr|
- (?:www\.)?d8\.tv|
- (?:www\.)?c8\.fr|
- (?:www\.)?d17\.tv|
- (?:(?:football|www)\.)?cstar\.fr|
- (?:www\.)?itele\.fr
- )/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
- player\.canalplus\.fr/#/(?P<id>\d+)
- )
-
- '''
+ IE_DESC = 'mycanal.fr and piwiplus.fr'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>mycanal|piwiplus)\.fr/(?:[^/]+/)*(?P<display_id>[^?/]+)(?:\.html\?.*\bvid=|/p/)(?P<id>\d+)'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
_SITE_ID_MAP = {
- 'canalplus': 'cplus',
+ 'mycanal': 'cplus',
'piwiplus': 'teletoon',
- 'd8': 'd8',
- 'c8': 'd8',
- 'd17': 'd17',
- 'cstar': 'd17',
- 'itele': 'itele',
}
# Only works for direct mp4 URLs
_GEO_COUNTRIES = ['FR']
_TESTS = [{
- 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
+ 'url': 'https://www.mycanal.fr/d17-emissions/lolywood/p/1397061',
'info_dict': {
- 'id': '1405510',
- 'display_id': 'pid1830-c-zapping',
+ 'id': '1397061',
+ 'display_id': 'lolywood',
'ext': 'mp4',
- 'title': 'Zapping - 02/07/2016',
- 'description': 'Le meilleur de toutes les chaînes, tous les jours',
- 'upload_date': '20160702',
+ 'title': 'Euro 2016 : Je préfère te prévenir - Lolywood - Episode 34',
+ 'description': 'md5:7d97039d455cb29cdba0d652a0efaa5e',
+ 'upload_date': '20160602',
},
}, {
# geo restricted, bypassed
@@ -70,64 +47,12 @@ class CanalplusIE(InfoExtractor):
'upload_date': '20140724',
},
'expected_warnings': ['HTTP Error 403: Forbidden'],
- }, {
- # geo restricted, bypassed
- 'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html?vid=1443684',
- 'md5': 'bb6f9f343296ab7ebd88c97b660ecf8d',
- 'info_dict': {
- 'id': '1443684',
- 'display_id': 'pid6318-videos-integrales',
- 'ext': 'mp4',
- 'title': 'Guess my iep ! - TPMP - 07/04/2017',
- 'description': 'md5:6f005933f6e06760a9236d9b3b5f17fa',
- 'upload_date': '20170407',
- },
- 'expected_warnings': ['HTTP Error 403: Forbidden'],
- }, {
- 'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
- 'info_dict': {
- 'id': '1420176',
- 'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
- 'ext': 'mp4',
- 'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ',
- 'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
- 'upload_date': '20161014',
- },
- }, {
- 'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
- 'info_dict': {
- 'id': '1416769',
- 'display_id': 'pid7566-feminines-videos',
- 'ext': 'mp4',
- 'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
- 'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
- 'upload_date': '20160921',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://m.canalplus.fr/?vid=1398231',
- 'only_matching': True,
- }, {
- 'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061',
- 'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
-
- # Beware, some subclasses do not define an id group
- display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html')
+ site, display_id, video_id = re.match(self._VALID_URL, url).groups()
- webpage = self._download_webpage(url, display_id)
- video_id = self._search_regex(
- [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
- r'id=["\']canal_video_player(?P<id>\d+)',
- r'data-video=["\'](?P<id>\d+)'],
- webpage, 'video id', default=mobj.group('vid'), group='id')
+ site_id = self._SITE_ID_MAP[site]
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
@@ -161,7 +86,7 @@ class CanalplusIE(InfoExtractor):
format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False))
else:
formats.append({
- # the secret extracted ya function in http://player.canalplus.fr/common/js/canalPlayer.js
+ # the secret extracted from ya function in http://player.canalplus.fr/common/js/canalPlayer.js
'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes',
'format_id': format_id,
'preference': preference(format_id),
diff --git a/youtube_dl/extractor/collegerama.py b/youtube_dl/extractor/collegerama.py
deleted file mode 100644
index 6a41db8..0000000
--- a/youtube_dl/extractor/collegerama.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-from ..utils import (
- float_or_none,
- int_or_none,
- sanitized_Request,
-)
-
-
-class CollegeRamaIE(InfoExtractor):
- _VALID_URL = r'https?://collegerama\.tudelft\.nl/Mediasite/Play/(?P<id>[\da-f]+)'
- _TESTS = [
- {
- 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
- 'md5': '481fda1c11f67588c0d9d8fbdced4e39',
- 'info_dict': {
- 'id': '585a43626e544bdd97aeb71a0ec907a01d',
- 'ext': 'mp4',
- 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
- 'description': '',
- 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
- 'duration': 7713.088,
- 'timestamp': 1413309600,
- 'upload_date': '20141014',
- },
- },
- {
- 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
- 'md5': 'ef1fdded95bdf19b12c5999949419c92',
- 'info_dict': {
- 'id': '86a9ea9f53e149079fbdb4202b521ed21d',
- 'ext': 'wmv',
- 'title': '64ste Vakantiecursus: Afvalwater',
- 'description': 'md5:7fd774865cc69d972f542b157c328305',
- 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
- 'duration': 10853,
- 'timestamp': 1326446400,
- 'upload_date': '20120113',
- },
- },
- ]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- player_options_request = {
- 'getPlayerOptionsRequest': {
- 'ResourceId': video_id,
- 'QueryString': '',
- }
- }
-
- request = sanitized_Request(
- 'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
- json.dumps(player_options_request))
- request.add_header('Content-Type', 'application/json')
-
- player_options = self._download_json(request, video_id)
-
- presentation = player_options['d']['Presentation']
- title = presentation['Title']
- description = presentation.get('Description')
- thumbnail = None
- duration = float_or_none(presentation.get('Duration'), 1000)
- timestamp = int_or_none(presentation.get('UnixTime'), 1000)
-
- formats = []
- for stream in presentation['Streams']:
- for video in stream['VideoUrls']:
- thumbnail_url = stream.get('ThumbnailUrl')
- if thumbnail_url:
- thumbnail = 'http://collegerama.tudelft.nl' + thumbnail_url
- format_id = video['MediaType']
- if format_id == 'SS':
- continue
- formats.append({
- 'url': video['Location'],
- 'format_id': format_id,
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 3b79b8c..5b6a09c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1880,6 +1880,7 @@ class InfoExtractor(object):
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
'format_note': 'DASH %s' % content_type,
'filesize': filesize,
+ 'container': mimetype2ext(mime_type) + '_dash',
}
f.update(parse_codecs(representation_attrib.get('codecs')))
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
@@ -2007,16 +2008,14 @@ class InfoExtractor(object):
f['url'] = initialization_url
f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
- try:
- existing_format = next(
- fo for fo in formats
- if fo['format_id'] == representation_id)
- except StopIteration:
- full_info = formats_dict.get(representation_id, {}).copy()
- full_info.update(f)
- formats.append(full_info)
- else:
- existing_format.update(f)
+ # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
+ # is not necessarily unique within a Period thus formats with
+ # the same `format_id` are quite possible. There are numerous examples
+ # of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
+ # https://github.com/rg3/youtube-dl/issues/13919)
+ full_info = formats_dict.get(representation_id, {}).copy()
+ full_info.update(f)
+ formats.append(full_info)
else:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats
@@ -2056,7 +2055,7 @@ class InfoExtractor(object):
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name')
for track in stream.findall('QualityLevel'):
- fourcc = track.get('FourCC')
+ fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
# TODO: add support for WVC1 and WMAP
if fourcc not in ('H264', 'AVC1', 'AACL'):
self.report_warning('%s is not a supported codec' % fourcc)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index e863f03..a3ad4df 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -205,7 +205,6 @@ from .cnn import (
CNNArticleIE,
)
from .coub import CoubIE
-from .collegerama import CollegeRamaIE
from .comedycentral import (
ComedyCentralFullEpisodesIE,
ComedyCentralIE,
@@ -345,6 +344,7 @@ from .filmon import (
FilmOnIE,
FilmOnChannelIE,
)
+from .filmweb import FilmwebIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE
from .fivetv import FiveTVIE
@@ -465,6 +465,7 @@ from .indavideo import (
)
from .infoq import InfoQIE
from .instagram import InstagramIE, InstagramUserIE
+from .internazionale import InternazionaleIE
from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE
from .iqiyi import IqiyiIE
@@ -574,6 +575,7 @@ from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .mediaset import MediasetIE
+from .mediasite import MediasiteIE
from .medici import MediciIE
from .megaphone import MegaphoneIE
from .meipai import MeipaiIE
@@ -607,7 +609,10 @@ from .mofosex import MofosexIE
from .mojvideo import MojvideoIE
from .moniker import MonikerIE
from .morningstar import MorningstarIE
-from .motherless import MotherlessIE
+from .motherless import (
+ MotherlessIE,
+ MotherlessGroupIE
+)
from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE
from .moviezine import MoviezineIE
@@ -910,7 +915,6 @@ from .rutube import (
from .rutv import RUTVIE
from .ruutu import RuutuIE
from .ruv import RuvIE
-from .sandia import SandiaIE
from .safari import (
SafariIE,
SafariApiIE,
@@ -1120,6 +1124,7 @@ from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
+from .twentythreevideo import TwentyThreeVideoIE
from .twitch import (
TwitchVideoIE,
TwitchChapterIE,
@@ -1142,6 +1147,7 @@ from .udemy import (
UdemyCourseIE
)
from .udn import UDNEmbedIE
+from .ufctv import UFCTVIE
from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE
from .umg import UMGDeIE
diff --git a/youtube_dl/extractor/filmweb.py b/youtube_dl/extractor/filmweb.py
new file mode 100644
index 0000000..56000bc
--- /dev/null
+++ b/youtube_dl/extractor/filmweb.py
@@ -0,0 +1,42 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class FilmwebIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?filmweb\.no/(?P<type>trailere|filmnytt)/article(?P<id>\d+)\.ece'
+ _TEST = {
+ 'url': 'http://www.filmweb.no/trailere/article1264921.ece',
+ 'md5': 'e353f47df98e557d67edaceda9dece89',
+ 'info_dict': {
+ 'id': '13033574',
+ 'ext': 'mp4',
+ 'title': 'Det som en gang var',
+ 'upload_date': '20160316',
+ 'timestamp': 1458140101,
+ 'uploader_id': '12639966',
+ 'uploader': 'Live Roaldset',
+ }
+ }
+
+ def _real_extract(self, url):
+ article_type, article_id = re.match(self._VALID_URL, url).groups()
+ if article_type == 'filmnytt':
+ webpage = self._download_webpage(url, article_id)
+ article_id = self._search_regex(r'data-videoid="(\d+)"', webpage, 'article id')
+ embed_code = self._download_json(
+ 'https://www.filmweb.no/template_v2/ajax/json_trailerEmbed.jsp',
+ article_id, query={
+ 'articleId': article_id,
+ })['embedCode']
+ iframe_url = self._proto_relative_url(self._search_regex(
+ r'<iframe[^>]+src="([^"]+)', embed_code, 'iframe url'))
+
+ return {
+ '_type': 'url_transparent',
+ 'id': article_id,
+ 'url': iframe_url,
+ 'ie_key': 'TwentyThreeVideo',
+ }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index c7b6092..9b0cd00 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -100,6 +100,7 @@ from .megaphone import MegaphoneIE
from .vzaar import VzaarIE
from .channel9 import Channel9IE
from .vshare import VShareIE
+from .mediasite import MediasiteIE
class GenericIE(InfoExtractor):
@@ -1925,6 +1926,18 @@ class GenericIE(InfoExtractor):
'title': 'vl14062007715967',
'ext': 'mp4',
}
+ },
+ {
+ 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
+ 'md5': 'aecd089f55b1cb5a59032cb049d3a356',
+ 'info_dict': {
+ 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
+ 'ext': 'mp4',
+ 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
+ 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
+ 'timestamp': 1474354800,
+ 'upload_date': '20160920',
+ }
}
# {
# # TODO: find another test
@@ -2695,9 +2708,9 @@ class GenericIE(InfoExtractor):
return self.url_result(viewlift_url)
# Look for JWPlatform embeds
- jwplatform_url = JWPlatformIE._extract_url(webpage)
- if jwplatform_url:
- return self.url_result(jwplatform_url, 'JWPlatform')
+ jwplatform_urls = JWPlatformIE._extract_urls(webpage)
+ if jwplatform_urls:
+ return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
# Look for Digiteka embeds
digiteka_url = DigitekaIE._extract_url(webpage)
@@ -2883,6 +2896,16 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
+ # Look for Mediasite embeds
+ mediasite_urls = MediasiteIE._extract_urls(webpage)
+ if mediasite_urls:
+ entries = [
+ self.url_result(smuggle_url(
+ compat_urlparse.urljoin(url, mediasite_url),
+ {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
+ for mediasite_url in mediasite_urls]
+ return self.playlist_result(entries, video_id, video_title)
+
def merge_dicts(dict1, dict2):
merged = {}
for k, v in dict1.items():
diff --git a/youtube_dl/extractor/internazionale.py b/youtube_dl/extractor/internazionale.py
new file mode 100644
index 0000000..10ba1f6
--- /dev/null
+++ b/youtube_dl/extractor/internazionale.py
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import unified_timestamp
+
+
+class InternazionaleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
+ 'md5': '3e39d32b66882c1218e305acbf8348ca',
+ 'info_dict': {
+ 'id': '265968',
+ 'display_id': 'richard-linklater-racconta-una-scena-di-boyhood',
+ 'ext': 'mp4',
+ 'title': 'Richard Linklater racconta una scena di Boyhood',
+ 'description': 'md5:efb7e5bbfb1a54ae2ed5a4a015f0e665',
+ 'timestamp': 1424354635,
+ 'upload_date': '20150219',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ DATA_RE = r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1'
+
+ title = self._search_regex(
+ DATA_RE % 'video-title', webpage, 'title', default=None,
+ group='value') or self._og_search_title(webpage)
+
+ video_id = self._search_regex(
+ DATA_RE % 'job-id', webpage, 'video id', group='value')
+ video_path = self._search_regex(
+ DATA_RE % 'video-path', webpage, 'video path', group='value')
+
+ video_base = 'https://video.internazionale.it/%s/%s.' % (video_path, video_id)
+
+ formats = self._extract_m3u8_formats(
+ video_base + 'm3u8', display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ video_base + 'mpd', display_id, mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'article:published_time', webpage, 'timestamp'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py
index c9bcbb0..63d0dc9 100644
--- a/youtube_dl/extractor/jwplatform.py
+++ b/youtube_dl/extractor/jwplatform.py
@@ -23,11 +23,14 @@ class JWPlatformIE(InfoExtractor):
@staticmethod
def _extract_url(webpage):
- mobj = re.search(
- r'<(?:script|iframe)[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
+ urls = JWPlatformIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//content\.jwplatform\.com/players/[a-zA-Z0-9]{8})',
webpage)
- if mobj:
- return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py
index 1b6f509..f5c7abc 100644
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -94,7 +94,15 @@ class LyndaBaseIE(InfoExtractor):
class LyndaIE(LyndaBaseIE):
IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos'
- _VALID_URL = r'https?://(?:www\.)?(?:lynda\.com|educourse\.ga)/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?(?:lynda\.com|educourse\.ga)/
+ (?:
+ (?:[^/]+/){2,3}(?P<course_id>\d+)|
+ player/embed
+ )/
+ (?P<id>\d+)
+ '''
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
@@ -113,6 +121,9 @@ class LyndaIE(LyndaBaseIE):
}, {
'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
'only_matching': True,
+ }, {
+ 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
+ 'only_matching': True,
}]
def _raise_unavailable(self, video_id):
@@ -244,8 +255,9 @@ class LyndaIE(LyndaBaseIE):
def _get_subtitles(self, video_id):
url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
subs = self._download_json(url, None, False)
- if subs:
- return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
+ fixed_subs = self._fix_subtitles(subs)
+ if fixed_subs:
+ return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
else:
return {}
@@ -256,7 +268,15 @@ class LyndaCourseIE(LyndaBaseIE):
# Course link equals to welcome/introduction video link of same course
# We will recognize it as course link
- _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
+ _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>(?:[^/]+/){2,3}(?P<courseid>\d+))-2\.html'
+
+ _TESTS = [{
+ 'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/mediasite.py b/youtube_dl/extractor/mediasite.py
new file mode 100644
index 0000000..0e2645c
--- /dev/null
+++ b/youtube_dl/extractor/mediasite.py
@@ -0,0 +1,214 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ mimetype2ext,
+ unescapeHTML,
+ unsmuggle_url,
+ urljoin,
+)
+
+
+class MediasiteIE(InfoExtractor):
+ _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
+ _TESTS = [
+ {
+ 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
+ 'info_dict': {
+ 'id': '2db6c271681e4f199af3c60d1f82869b1d',
+ 'ext': 'mp4',
+ 'title': 'Lecture: Tuesday, September 20, 2016 - Sir Andrew Wiles',
+ 'description': 'Sir Andrew Wiles: “Equations in arithmetic”\\n\\nI will describe some of the interactions between modern number theory and the problem of solving equations in rational numbers or integers\\u0027.',
+ 'timestamp': 1474268400.0,
+ 'upload_date': '20160919',
+ },
+ },
+ {
+ 'url': 'http://mediasite.uib.no/Mediasite/Play/90bb363295d945d6b548c867d01181361d?catalog=a452b7df-9ae1-46b7-a3ba-aceeb285f3eb',
+ 'info_dict': {
+ 'id': '90bb363295d945d6b548c867d01181361d',
+ 'ext': 'mp4',
+ 'upload_date': '20150429',
+ 'title': '5) IT-forum 2015-Dag 1 - Dungbeetle - How and why Rain created a tiny bug tracker for Unity',
+ 'timestamp': 1430311380.0,
+ },
+ },
+ {
+ 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
+ 'md5': '481fda1c11f67588c0d9d8fbdced4e39',
+ 'info_dict': {
+ 'id': '585a43626e544bdd97aeb71a0ec907a01d',
+ 'ext': 'mp4',
+ 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
+ 'duration': 7713.088,
+ 'timestamp': 1413309600,
+ 'upload_date': '20141014',
+ },
+ },
+ {
+ 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
+ 'md5': 'ef1fdded95bdf19b12c5999949419c92',
+ 'info_dict': {
+ 'id': '86a9ea9f53e149079fbdb4202b521ed21d',
+ 'ext': 'wmv',
+ 'title': '64ste Vakantiecursus: Afvalwater',
+ 'description': 'md5:7fd774865cc69d972f542b157c328305',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
+ 'duration': 10853,
+ 'timestamp': 1326446400,
+ 'upload_date': '20120113',
+ },
+ },
+ {
+ 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
+ 'md5': '9422edc9b9a60151727e4b6d8bef393d',
+ 'info_dict': {
+ 'id': '24aace4429fc450fb5b38cdbf424a66e1d',
+ 'ext': 'mp4',
+ 'title': 'Xyce Software Training - Section 1',
+ 'description': r're:(?s)SAND Number: SAND 2013-7800.{200,}',
+ 'upload_date': '20120409',
+ 'timestamp': 1333983600,
+ 'duration': 7794,
+ }
+ }
+ ]
+
+ # look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
+ _STREAM_TYPES = {
+ 0: 'video1', # the main video
+ 2: 'slide',
+ 3: 'presentation',
+ 4: 'video2', # screencast?
+ 5: 'video3',
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ unescapeHTML(mobj.group('url'))
+ for mobj in re.finditer(
+ r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/[0-9a-f]{32,34}(?:\?.*?)?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ url, data = unsmuggle_url(url, {})
+ mobj = re.match(self._VALID_URL, url)
+ resource_id = mobj.group('id')
+ query = mobj.group('query')
+
+ webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
+ redirect_url = compat_str(urlh.geturl())
+
+ # XXX: might have also extracted UrlReferrer and QueryString from the html
+ service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
+ r'<div[^>]+\bid=["\']ServicePath[^>]+>(.+?)</div>', webpage, resource_id,
+ default='/Mediasite/PlayerService/PlayerService.svc/json'))
+
+ player_options = self._download_json(
+ '%s/GetPlayerOptions' % service_path, resource_id,
+ headers={
+ 'Content-type': 'application/json; charset=utf-8',
+ 'X-Requested-With': 'XMLHttpRequest',
+ },
+ data=json.dumps({
+ 'getPlayerOptionsRequest': {
+ 'ResourceId': resource_id,
+ 'QueryString': query,
+ 'UrlReferrer': data.get('UrlReferrer', ''),
+ 'UseScreenReader': False,
+ }
+ }).encode('utf-8'))['d']
+
+ presentation = player_options['Presentation']
+ title = presentation['Title']
+
+ if presentation is None:
+ raise ExtractorError(
+ 'Mediasite says: %s' % player_options['PlayerPresentationStatusMessage'],
+ expected=True)
+
+ thumbnails = []
+ formats = []
+ for snum, Stream in enumerate(presentation['Streams']):
+ stream_type = Stream.get('StreamType')
+ if stream_type is None:
+ continue
+
+ video_urls = Stream.get('VideoUrls')
+ if not isinstance(video_urls, list):
+ video_urls = []
+
+ stream_id = self._STREAM_TYPES.get(
+ stream_type, 'type%u' % stream_type)
+
+ stream_formats = []
+ for unum, VideoUrl in enumerate(video_urls):
+ video_url = VideoUrl.get('Location')
+ if not video_url or not isinstance(video_url, compat_str):
+ continue
+ # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
+
+ media_type = VideoUrl.get('MediaType')
+ if media_type == 'SS':
+ stream_formats.extend(self._extract_ism_formats(
+ video_url, resource_id,
+ ism_id='%s-%u.%u' % (stream_id, snum, unum),
+ fatal=False))
+ elif media_type == 'Dash':
+ stream_formats.extend(self._extract_mpd_formats(
+ video_url, resource_id,
+ mpd_id='%s-%u.%u' % (stream_id, snum, unum),
+ fatal=False))
+ else:
+ stream_formats.append({
+ 'format_id': '%s-%u.%u' % (stream_id, snum, unum),
+ 'url': video_url,
+ 'ext': mimetype2ext(VideoUrl.get('MimeType')),
+ })
+
+ # TODO: if Stream['HasSlideContent']:
+ # synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum)
+ # from Stream['Slides']
+ # this will require writing a custom downloader...
+
+ # disprefer 'secondary' streams
+ if stream_type != 0:
+ for fmt in stream_formats:
+ fmt['preference'] = -1
+
+ thumbnail_url = Stream.get('ThumbnailUrl')
+ if thumbnail_url:
+ thumbnails.append({
+ 'id': '%s-%u' % (stream_id, snum),
+ 'url': urljoin(redirect_url, thumbnail_url),
+ 'preference': -1 if stream_type != 0 else 0,
+ })
+ formats.extend(stream_formats)
+
+ self._sort_formats(formats)
+
+ # XXX: Presentation['Presenters']
+ # XXX: Presentation['Transcript']
+
+ return {
+ 'id': resource_id,
+ 'title': title,
+ 'description': presentation.get('Description'),
+ 'duration': float_or_none(presentation.get('Duration'), 1000),
+ 'timestamp': float_or_none(presentation.get('UnixTime'), 1000),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
index 964dc54..42759ea 100644
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -1,13 +1,13 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import uuid
from .common import InfoExtractor
from .ooyala import OoyalaIE
from ..compat import (
compat_str,
- compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..utils import (
@@ -42,31 +42,33 @@ class MiTeleBaseIE(InfoExtractor):
duration = int_or_none(mmc.get('duration'))
for location in mmc['locations']:
gat = self._proto_relative_url(location.get('gat'), 'http:')
- bas = location.get('bas')
- loc = location.get('loc')
+ gcp = location.get('gcp')
ogn = location.get('ogn')
- if None in (gat, bas, loc, ogn):
+ if None in (gat, gcp, ogn):
continue
token_data = {
- 'bas': bas,
- 'icd': loc,
+ 'gcp': gcp,
'ogn': ogn,
- 'sta': '0',
+ 'sta': 0,
}
media = self._download_json(
- '%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)),
- video_id, 'Downloading %s JSON' % location['loc'])
- file_ = media.get('file')
- if not file_:
+ gat, video_id, data=json.dumps(token_data).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json;charset=utf-8',
+ 'Referer': url,
+ })
+ stream = media.get('stream') or media.get('file')
+ if not stream:
continue
- ext = determine_ext(file_)
+ ext = determine_ext(stream)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
- file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
+ stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
- file_, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ stream, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return {
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index 6fe3b60..e24396e 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -4,8 +4,11 @@ import datetime
import re
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
+ InAdvancePagedList,
+ orderedSet,
str_to_int,
unified_strdate,
)
@@ -114,3 +117,86 @@ class MotherlessIE(InfoExtractor):
'age_limit': age_limit,
'url': video_url,
}
+
+
+class MotherlessGroupIE(InfoExtractor):
+ _VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
+ _TESTS = [{
+ 'url': 'http://motherless.com/g/movie_scenes',
+ 'info_dict': {
+ 'id': 'movie_scenes',
+ 'title': 'Movie Scenes',
+ 'description': 'Hot and sexy scenes from "regular" movies... '
+ 'Beautiful actresses fully nude... A looot of '
+ 'skin! :)Enjoy!',
+ },
+ 'playlist_mincount': 662,
+ }, {
+ 'url': 'http://motherless.com/gv/sex_must_be_funny',
+ 'info_dict': {
+ 'id': 'sex_must_be_funny',
+ 'title': 'Sex must be funny',
+ 'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
+ 'any kind!'
+ },
+ 'playlist_mincount': 9,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if MotherlessIE.suitable(url)
+ else super(MotherlessGroupIE, cls).suitable(url))
+
+ def _extract_entries(self, webpage, base):
+ entries = []
+ for mobj in re.finditer(
+ r'href="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^-]+-\s(?P<title>[^"]+)")?',
+ webpage):
+ video_url = compat_urlparse.urljoin(base, mobj.group('href'))
+ if not MotherlessIE.suitable(video_url):
+ continue
+ video_id = MotherlessIE._match_id(video_url)
+ title = mobj.group('title')
+ entries.append(self.url_result(
+ video_url, ie=MotherlessIE.ie_key(), video_id=video_id,
+ video_title=title))
+ # Alternative fallback
+ if not entries:
+ entries = [
+ self.url_result(
+ compat_urlparse.urljoin(base, '/' + video_id),
+ ie=MotherlessIE.ie_key(), video_id=video_id)
+ for video_id in orderedSet(re.findall(
+ r'data-codename=["\']([A-Z0-9]+)', webpage))]
+ return entries
+
+ def _real_extract(self, url):
+ group_id = self._match_id(url)
+ page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
+ webpage = self._download_webpage(page_url, group_id)
+ title = self._search_regex(
+ r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
+ description = self._html_search_meta(
+ 'description', webpage, fatal=False)
+ page_count = self._int(self._search_regex(
+ r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
+ webpage, 'page_count'), 'page_count')
+ PAGE_SIZE = 80
+
+ def _get_page(idx):
+ webpage = self._download_webpage(
+ page_url, group_id, query={'page': idx + 1},
+ note='Downloading page %d/%d' % (idx + 1, page_count)
+ )
+ for entry in self._extract_entries(webpage, url):
+ yield entry
+
+ playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
+
+ return {
+ '_type': 'playlist',
+ 'id': group_id,
+ 'title': title,
+ 'description': description,
+ 'entries': playlist
+ }
diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py
index 8e13bcf..5c8b37e 100644
--- a/youtube_dl/extractor/odnoklassniki.py
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -19,11 +19,11 @@ from ..utils import (
class OdnoklassnikiIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
+ _VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer|live)/(?P<id>[\d-]+)'
_TESTS = [{
# metadata in JSON
'url': 'http://ok.ru/video/20079905452',
- 'md5': '6ba728d85d60aa2e6dd37c9e70fdc6bc',
+ 'md5': '0b62089b479e06681abaaca9d204f152',
'info_dict': {
'id': '20079905452',
'ext': 'mp4',
@@ -35,7 +35,6 @@ class OdnoklassnikiIE(InfoExtractor):
'like_count': int,
'age_limit': 0,
},
- 'skip': 'Video has been blocked',
}, {
# metadataUrl
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
@@ -99,6 +98,9 @@ class OdnoklassnikiIE(InfoExtractor):
}, {
'url': 'http://mobile.ok.ru/video/20079905452',
'only_matching': True,
+ }, {
+ 'url': 'https://www.ok.ru/live/484531969818',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -184,6 +186,10 @@ class OdnoklassnikiIE(InfoExtractor):
})
return info
+ assert title
+ if provider == 'LIVE_TV_APP':
+ info['title'] = self._live_title(title)
+
quality = qualities(('4', '0', '1', '2', '3', '5'))
formats = [{
@@ -210,6 +216,20 @@ class OdnoklassnikiIE(InfoExtractor):
if fmt_type:
fmt['quality'] = quality(fmt_type)
+ # Live formats
+ m3u8_url = metadata.get('hlsMasterPlaylistUrl')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8',
+ m3u8_id='hls', fatal=False))
+ rtmp_url = metadata.get('rtmpUrl')
+ if rtmp_url:
+ formats.append({
+ 'url': rtmp_url,
+ 'format_id': 'rtmp',
+ 'ext': 'flv',
+ })
+
self._sort_formats(formats)
info['formats'] = formats
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
index d1eb3be..eaaaf8a 100644
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@@ -285,6 +285,11 @@ class OpenloadIE(InfoExtractor):
'url': 'https://openload.co/embed/Sxz5sADo82g/',
'only_matching': True,
}, {
+ # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available
+ # via https://openload.co/f/e-Ixz9ZR5L0/
+ 'url': 'https://openload.co/f/e-Ixz9ZR5L0/',
+ 'only_matching': True,
+ }, {
'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
'only_matching': True,
}, {
@@ -305,20 +310,34 @@ class OpenloadIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- url = 'https://openload.co/embed/%s/' % video_id
+ url_pattern = 'https://openload.co/%%s/%s/' % video_id
headers = {
'User-Agent': self._USER_AGENT,
}
- webpage = self._download_webpage(url, video_id, headers=headers)
-
- if 'File not found' in webpage or 'deleted by the owner' in webpage:
- raise ExtractorError('File not found', expected=True, video_id=video_id)
+ for path in ('embed', 'f'):
+ page_url = url_pattern % path
+ last = path == 'f'
+ webpage = self._download_webpage(
+ page_url, video_id, 'Downloading %s webpage' % path,
+ headers=headers, fatal=last)
+ if not webpage:
+ continue
+ if 'File not found' in webpage or 'deleted by the owner' in webpage:
+ if not last:
+ continue
+ raise ExtractorError('File not found', expected=True, video_id=video_id)
+ break
phantom = PhantomJSwrapper(self, required_version='2.0')
- webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers)
+ webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
+
+ decoded_id = (get_element_by_id('streamurl', webpage) or
+ get_element_by_id('streamuri', webpage) or
+ get_element_by_id('streamurj', webpage))
- decoded_id = get_element_by_id('streamurl', webpage)
+ if not decoded_id:
+ raise ExtractorError('Can\'t find stream URL', video_id=video_id)
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
@@ -327,7 +346,7 @@ class OpenloadIE(InfoExtractor):
'title', default=None) or self._html_search_meta(
'description', webpage, 'title', fatal=True)
- entries = self._parse_html5_media_entries(url, webpage, video_id)
+ entries = self._parse_html5_media_entries(page_url, webpage, video_id)
entry = entries[0] if entries else {}
subtitles = entry.get('subtitles')
diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py
index 391e1bd..4c5f579 100644
--- a/youtube_dl/extractor/playtvak.py
+++ b/youtube_dl/extractor/playtvak.py
@@ -24,7 +24,7 @@ class PlaytvakIE(InfoExtractor):
'id': 'A150730_150323_hodinovy-manzel_kuko',
'ext': 'mp4',
'title': 'Vyžeňte vosy a sršně ze zahrady',
- 'description': 'md5:f93d398691044d303bc4a3de62f3e976',
+ 'description': 'md5:4436e61b7df227a093778efb7e373571',
'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
'duration': 279,
'timestamp': 1438732860,
@@ -36,9 +36,19 @@ class PlaytvakIE(InfoExtractor):
'info_dict': {
'id': 'A150624_164934_planespotting_cat',
'ext': 'flv',
- 'title': 're:^Přímý přenos iDNES.cz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze',
- 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True, # requires rtmpdump
+ },
+ }, { # another live stream, this one without Misc.videoFLV
+ 'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap',
+ 'info_dict': {
+ 'id': 'A151218_145728_hlavni-nadrazi_plap',
+ 'ext': 'flv',
+ 'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'is_live': True,
},
'params': {
@@ -95,7 +105,7 @@ class PlaytvakIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
info_url = self._html_search_regex(
- r'Misc\.videoFLV\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
+ r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
parsed_url = compat_urlparse.urlparse(info_url)
@@ -160,7 +170,7 @@ class PlaytvakIE(InfoExtractor):
if is_live:
title = self._live_title(title)
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
- 'description', webpage, 'description')
+ 'description', webpage, 'description', default=None)
timestamp = None
duration = None
if not is_live:
diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py
index 597b112..aacc5d4 100644
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dl/extractor/pluralsight.py
@@ -171,12 +171,12 @@ class PluralsightIE(PluralsightBaseIE):
for num, current in enumerate(subs):
current = subs[num]
start, text = (
- float_or_none(dict_get(current, TIME_OFFSET_KEYS)),
+ float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)),
dict_get(current, TEXT_KEYS))
if start is None or text is None:
continue
end = duration if num == len(subs) - 1 else float_or_none(
- dict_get(subs[num + 1], TIME_OFFSET_KEYS))
+ dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False))
if end is None:
continue
srt += os.linesep.join(
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py
index d9edf9d..fa60ffd 100644
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -31,6 +31,9 @@ def _decrypt_url(png):
hash_index = data.index('#')
alphabet_data = data[:hash_index]
url_data = data[hash_index + 1:]
+ if url_data[0] == 'H' and url_data[3] == '%':
+ # remove useless HQ%% at the start
+ url_data = url_data[4:]
alphabet = []
e = 0
diff --git a/youtube_dl/extractor/sandia.py b/youtube_dl/extractor/sandia.py
deleted file mode 100644
index 96e43af..0000000
--- a/youtube_dl/extractor/sandia.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- mimetype2ext,
-)
-
-
-class SandiaIE(InfoExtractor):
- IE_DESC = 'Sandia National Laboratories'
- _VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)'
- _TEST = {
- 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
- 'md5': '9422edc9b9a60151727e4b6d8bef393d',
- 'info_dict': {
- 'id': '24aace4429fc450fb5b38cdbf424a66e1d',
- 'ext': 'mp4',
- 'title': 'Xyce Software Training - Section 1',
- 'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
- 'upload_date': '20120409',
- 'timestamp': 1333983600,
- 'duration': 7794,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- presentation_data = self._download_json(
- 'http://digitalops.sandia.gov/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
- video_id, data=json.dumps({
- 'getPlayerOptionsRequest': {
- 'ResourceId': video_id,
- 'QueryString': '',
- }
- }), headers={
- 'Content-Type': 'application/json; charset=utf-8',
- })['d']['Presentation']
-
- title = presentation_data['Title']
-
- formats = []
- for stream in presentation_data.get('Streams', []):
- for fd in stream.get('VideoUrls', []):
- formats.append({
- 'format_id': fd['MediaType'],
- 'format_note': fd['MimeType'].partition('/')[2],
- 'ext': mimetype2ext(fd['MimeType']),
- 'url': fd['Location'],
- 'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': presentation_data.get('Description'),
- 'formats': formats,
- 'timestamp': int_or_none(presentation_data.get('UnixTime'), 1000),
- 'duration': int_or_none(presentation_data.get('Duration'), 1000),
- }
diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py
index 7145d28..6fc2ff6 100644
--- a/youtube_dl/extractor/slutload.py
+++ b/youtube_dl/extractor/slutload.py
@@ -1,11 +1,13 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
class SlutloadIE(InfoExtractor):
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
'md5': '868309628ba00fd488cf516a113fd717',
'info_dict': {
@@ -15,11 +17,17 @@ class SlutloadIE(InfoExtractor):
'age_limit': 18,
'thumbnail': r're:https?://.*?\.jpg'
}
- }
+ }, {
+ # mobile site
+ 'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+
+ desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url)
+ webpage = self._download_webpage(desktop_url, video_id)
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
webpage, 'title').strip()
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 8894f4b..6c9816e 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -136,6 +136,25 @@ class SoundcloudIE(InfoExtractor):
'license': 'all-rights-reserved',
},
},
+ # no album art, use avatar pic for thumbnail
+ {
+ 'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real',
+ 'md5': '59c7872bc44e5d99b7211891664760c2',
+ 'info_dict': {
+ 'id': '309699954',
+ 'ext': 'mp3',
+ 'title': 'Sideways (Prod. Mad Real)',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'uploader': 'garyvee',
+ 'upload_date': '20170226',
+ 'duration': 207,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'license': 'all-rights-reserved',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
]
_CLIENT_ID = 'c6CU49JDMapyrQo06UxU9xouB9ZVzqCn'
@@ -160,7 +179,7 @@ class SoundcloudIE(InfoExtractor):
name = full_title or track_id
if quiet:
self.report_extraction(name)
- thumbnail = info.get('artwork_url')
+ thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
if isinstance(thumbnail, compat_str):
thumbnail = thumbnail.replace('-large', '-t500x500')
ext = 'mp3'
diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py
new file mode 100644
index 0000000..aa0c6e9
--- /dev/null
+++ b/youtube_dl/extractor/twentythreevideo.py
@@ -0,0 +1,77 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class TwentyThreeVideoIE(InfoExtractor):
+ IE_NAME = '23video'
+ _VALID_URL = r'https?://video\.(?P<domain>twentythree\.net|23video\.com|filmweb\.no)/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
+ _TEST = {
+ 'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1',
+ 'md5': '75fcf216303eb1dae9920d651f85ced4',
+ 'info_dict': {
+ 'id': '20448876',
+ 'ext': 'mp4',
+ 'title': 'Video Marketing Minute: Personalized Video',
+ 'timestamp': 1513855354,
+ 'upload_date': '20171221',
+ 'uploader_id': '12258964',
+ 'uploader': 'Rasmus Bysted',
+ }
+ }
+
+ def _real_extract(self, url):
+ domain, query, photo_id = re.match(self._VALID_URL, url).groups()
+ base_url = 'https://video.%s' % domain
+ photo_data = self._download_json(
+ base_url + '/api/photo/list?' + query, photo_id, query={
+ 'format': 'json',
+ }, transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photo data'))['photo']
+ title = photo_data['title']
+
+ formats = []
+
+ audio_path = photo_data.get('audio_download')
+ if audio_path:
+ formats.append({
+ 'format_id': 'audio',
+ 'url': base_url + audio_path,
+ 'filesize': int_or_none(photo_data.get('audio_size')),
+ 'vcodec': 'none',
+ })
+
+ def add_common_info_to_list(l, template, id_field, id_value):
+ f_base = template % id_value
+ f_path = photo_data.get(f_base + 'download')
+ if not f_path:
+ return
+ l.append({
+ id_field: id_value,
+ 'url': base_url + f_path,
+ 'width': int_or_none(photo_data.get(f_base + 'width')),
+ 'height': int_or_none(photo_data.get(f_base + 'height')),
+ 'filesize': int_or_none(photo_data.get(f_base + 'size')),
+ })
+
+ for f in ('mobile_high', 'medium', 'hd', '1080p', '4k'):
+ add_common_info_to_list(formats, 'video_%s_', 'format_id', f)
+
+ thumbnails = []
+ for t in ('quad16', 'quad50', 'quad75', 'quad100', 'small', 'portrait', 'standard', 'medium', 'large', 'original'):
+ add_common_info_to_list(thumbnails, '%s_', 'id', t)
+
+ return {
+ 'id': photo_id,
+ 'title': title,
+ 'timestamp': int_or_none(photo_data.get('creation_date_epoch')),
+ 'duration': int_or_none(photo_data.get('video_length')),
+ 'view_count': int_or_none(photo_data.get('view_count')),
+ 'comment_count': int_or_none(photo_data.get('number_of_comments')),
+ 'uploader_id': photo_data.get('user_id'),
+ 'uploader': photo_data.get('display_name'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index bf57eac..f9164af 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -358,9 +358,16 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
break
offset += limit
return self.playlist_result(
- [self.url_result(entry) for entry in orderedSet(entries)],
+ [self._make_url_result(entry) for entry in orderedSet(entries)],
channel_id, channel_name)
+ def _make_url_result(self, url):
+ try:
+ video_id = 'v%s' % TwitchVodIE._match_id(url)
+ return self.url_result(url, TwitchVodIE.ie_key(), video_id=video_id)
+ except AssertionError:
+ return self.url_result(url)
+
def _extract_playlist_page(self, response):
videos = response.get('videos')
return [video['url'] for video in videos] if videos else []
diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py
new file mode 100644
index 0000000..ab82381
--- /dev/null
+++ b/youtube_dl/extractor/ufctv.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class UFCTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode',
+ 'info_dict': {
+ 'id': '34167',
+ 'ext': 'mp4',
+ 'title': 'UFC 219 Countdown: Full Episode',
+ 'description': 'md5:26d4e8bf4665ae5878842d7050c3c646',
+ 'timestamp': 1513962360,
+ 'upload_date': '20171222',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ video_data = self._download_json(url, display_id, query={
+ 'format': 'json',
+ })
+ video_id = str(video_data['id'])
+ title = video_data['name']
+ m3u8_url = self._download_json(
+ 'https://www.ufc.tv/service/publishpoint', video_id, query={
+ 'type': 'video',
+ 'format': 'json',
+ 'id': video_id,
+ }, headers={
+ 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
+ })['path']
+ m3u8_url = m3u8_url.replace('_iphone.', '.')
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'duration': parse_duration(video_data.get('runtime')),
+ 'timestamp': parse_iso8601(video_data.get('releaseDate')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index cedb548..6af7056 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -468,11 +468,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
request = sanitized_Request(url, headers=headers)
try:
webpage, urlh = self._download_webpage_handle(request, video_id)
+ redirect_url = compat_str(urlh.geturl())
# Some URLs redirect to ondemand can't be extracted with
# this extractor right away thus should be passed through
# ondemand extractor (e.g. https://vimeo.com/73445910)
- if VimeoOndemandIE.suitable(urlh.geturl()):
- return self.url_result(urlh.geturl(), VimeoOndemandIE.ie_key())
+ if VimeoOndemandIE.suitable(redirect_url):
+ return self.url_result(redirect_url, VimeoOndemandIE.ie_key())
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
errmsg = ee.cause.read()
@@ -541,15 +542,15 @@ class VimeoIE(VimeoBaseInfoExtractor):
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
if '_video_password_verified' in data:
raise ExtractorError('video password verification failed!')
- self._verify_video_password(url, video_id, webpage)
+ self._verify_video_password(redirect_url, video_id, webpage)
return self._real_extract(
- smuggle_url(url, {'_video_password_verified': 'verified'}))
+ smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
else:
raise ExtractorError('Unable to extract info section',
cause=e)
else:
if config.get('view') == 4:
- config = self._verify_player_video_password(url, video_id)
+ config = self._verify_player_video_password(redirect_url, video_id)
def is_rented():
if '>You rented this title.<' in webpage:
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index c7947d4..5b0b248 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -245,13 +245,19 @@ class YoukuShowIE(InfoExtractor):
# No data-id value.
'url': 'http://list.youku.com/show/id_zefbfbd61237fefbfbdef.html',
'only_matching': True,
+ }, {
+ # Wrong number of reload_id.
+ 'url': 'http://list.youku.com/show/id_z20eb4acaf5c211e3b2ad.html',
+ 'only_matching': True,
}]
def _extract_entries(self, playlist_data_url, show_id, note, query):
query['callback'] = 'cb'
playlist_data = self._download_json(
playlist_data_url, show_id, query=query, note=note,
- transform_source=lambda s: js_to_json(strip_jsonp(s)))['html']
+ transform_source=lambda s: js_to_json(strip_jsonp(s))).get('html')
+ if playlist_data is None:
+ return [None, None]
drama_list = (get_element_by_class('p-drama-grid', playlist_data) or
get_element_by_class('p-drama-half-row', playlist_data))
if drama_list is None:
@@ -291,8 +297,8 @@ class YoukuShowIE(InfoExtractor):
'id': page_config['showid'],
'stage': reload_id,
})
- entries.extend(new_entries)
-
+ if new_entries is not None:
+ entries.extend(new_entries)
desc = self._html_search_meta('description', webpage, fatal=False)
playlist_title = desc.split(',')[0] if desc else None
detail_li = get_element_by_class('p-intro', webpage)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 2843a3d..386897a 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -39,6 +39,7 @@ from .compat import (
compat_HTMLParser,
compat_basestring,
compat_chr,
+ compat_ctypes_WINFUNCTYPE,
compat_etree_fromstring,
compat_expanduser,
compat_html_entities,
@@ -1330,24 +1331,24 @@ def _windows_write_string(s, out):
if fileno not in WIN_OUTPUT_IDS:
return False
- GetStdHandle = ctypes.WINFUNCTYPE(
+ GetStdHandle = compat_ctypes_WINFUNCTYPE(
ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
- (b'GetStdHandle', ctypes.windll.kernel32))
+ ('GetStdHandle', ctypes.windll.kernel32))
h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
- WriteConsoleW = ctypes.WINFUNCTYPE(
+ WriteConsoleW = compat_ctypes_WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
- ctypes.wintypes.LPVOID)((b'WriteConsoleW', ctypes.windll.kernel32))
+ ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
written = ctypes.wintypes.DWORD(0)
- GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b'GetFileType', ctypes.windll.kernel32))
+ GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
- GetConsoleMode = ctypes.WINFUNCTYPE(
+ GetConsoleMode = compat_ctypes_WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
ctypes.POINTER(ctypes.wintypes.DWORD))(
- (b'GetConsoleMode', ctypes.windll.kernel32))
+ ('GetConsoleMode', ctypes.windll.kernel32))
INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
def not_a_console(handle):
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index f999584..9030e24 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2017.12.23'
+__version__ = '2018.01.07'