aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/spiegel.py
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2015-01-14 20:27:14 +0600
committerSergey M․ <dstftw@gmail.com>2015-01-14 20:27:14 +0600
commite92d4a11f570abb47215b13f88020f52479766e3 (patch)
tree0030e94431f5c949f5ad3206e3e6108755ff7c57 /youtube_dl/extractor/spiegel.py
parentf2cbc96c3ea3f0c3b447b50b445de736e535d63f (diff)
downloadyoutube-dl-e92d4a11f570abb47215b13f88020f52479766e3.zip
youtube-dl-e92d4a11f570abb47215b13f88020f52479766e3.tar.gz
youtube-dl-e92d4a11f570abb47215b13f88020f52479766e3.tar.bz2
[spiegel] Test format video URLs for 404 (Closes #4579)
Diffstat (limited to 'youtube_dl/extractor/spiegel.py')
-rw-r--r--youtube_dl/extractor/spiegel.py49
1 files changed, 33 insertions, 16 deletions
diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py
index 1e55a9f..f345883 100644
--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dl/extractor/spiegel.py
@@ -4,7 +4,14 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+ compat_urlparse,
+ compat_HTTPError,
+)
+from ..utils import (
+ HEADRequest,
+ ExtractorError,
+)
from .spiegeltv import SpiegeltvIE
@@ -60,21 +67,31 @@ class SpiegelIE(InfoExtractor):
xml_url = base_url + video_id + '.xml'
idoc = self._download_xml(xml_url, video_id)
- formats = [
- {
- 'format_id': n.tag.rpartition('type')[2],
- 'url': base_url + n.find('./filename').text,
- 'width': int(n.find('./width').text),
- 'height': int(n.find('./height').text),
- 'abr': int(n.find('./audiobitrate').text),
- 'vbr': int(n.find('./videobitrate').text),
- 'vcodec': n.find('./codec').text,
- 'acodec': 'MP4A',
- }
- for n in list(idoc)
- # Blacklist type 6, it's extremely LQ and not available on the same server
- if n.tag.startswith('type') and n.tag != 'type6'
- ]
+ formats = []
+ for n in list(idoc):
+ if n.tag.startswith('type') and n.tag != 'type6':
+ format_id = n.tag.rpartition('type')[2]
+ video_url = base_url + n.find('./filename').text
+ # Test video URLs beforehand as some of them are invalid
+ try:
+ self._request_webpage(
+ HEADRequest(video_url), video_id,
+ 'Checking %s video URL' % format_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+ self.report_warning(
+ '%s video URL is invalid, skipping' % format_id, video_id)
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': video_url,
+ 'width': int(n.find('./width').text),
+ 'height': int(n.find('./height').text),
+ 'abr': int(n.find('./audiobitrate').text),
+ 'vbr': int(n.find('./videobitrate').text),
+ 'vcodec': n.find('./codec').text,
+ 'acodec': 'MP4A',
+ })
duration = float(idoc[0].findall('./duration')[0].text)
self._sort_formats(formats)