aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/downloader
diff options
context:
space:
mode:
authorRogério Brito <rbrito@ime.usp.br>2016-02-24 17:23:49 -0300
committerRogério Brito <rbrito@ime.usp.br>2016-02-24 17:23:49 -0300
commit9f2b33881274af98a9145c533a1d295fad71521a (patch)
treeb7e8f8ef288b2ec35a41261bb4774f9044cfce83 /youtube_dl/downloader
parent9ed7fe4fe4c445eb7d9f3197bb300d0db8f1807a (diff)
downloadyoutube-dl-9f2b33881274af98a9145c533a1d295fad71521a.zip
youtube-dl-9f2b33881274af98a9145c533a1d295fad71521a.tar.gz
youtube-dl-9f2b33881274af98a9145c533a1d295fad71521a.tar.bz2
Imported Upstream version 2016.02.22
Diffstat (limited to 'youtube_dl/downloader')
-rw-r--r--youtube_dl/downloader/common.py9
-rw-r--r--youtube_dl/downloader/dash.py79
-rw-r--r--youtube_dl/downloader/f4m.py24
-rw-r--r--youtube_dl/downloader/fragment.py51
-rw-r--r--youtube_dl/downloader/hls.py24
-rw-r--r--youtube_dl/downloader/http.py4
-rw-r--r--youtube_dl/downloader/rtmp.py16
7 files changed, 121 insertions, 86 deletions
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index b8bf8da..2d51540 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -5,9 +5,9 @@ import re
import sys
import time
-from ..compat import compat_str
from ..utils import (
encodeFilename,
+ error_to_compat_str,
decodeArgument,
format_bytes,
timeconvert,
@@ -45,6 +45,7 @@ class FileDownloader(object):
(experimental)
external_downloader_args: A list of additional command-line arguments for the
external downloader.
+ hls_use_mpegts: Use the mpegts container for HLS videos.
Subclasses of this one must re-define the real_download method.
"""
@@ -156,7 +157,7 @@ class FileDownloader(object):
def slow_down(self, start_time, now, byte_counter):
"""Sleep if the download speed is over the rate limit."""
- rate_limit = self.params.get('ratelimit', None)
+ rate_limit = self.params.get('ratelimit')
if rate_limit is None or byte_counter == 0:
return
if now is None:
@@ -186,7 +187,7 @@ class FileDownloader(object):
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err:
- self.report_error('unable to rename file: %s' % compat_str(err))
+ self.report_error('unable to rename file: %s' % error_to_compat_str(err))
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
@@ -295,7 +296,7 @@ class FileDownloader(object):
def report_retry(self, count, retries):
"""Report retry in case of HTTP error 5xx"""
- self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
+ self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %.0f)...' % (count, retries))
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index 535f2a7..8b1b17c 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -1,66 +1,59 @@
from __future__ import unicode_literals
+import os
import re
-from .common import FileDownloader
-from ..utils import sanitized_Request
+from .fragment import FragmentFD
+from ..utils import (
+ sanitize_open,
+ encodeFilename,
+)
-class DashSegmentsFD(FileDownloader):
+class DashSegmentsFD(FragmentFD):
"""
Download segments in a DASH manifest
"""
- def real_download(self, filename, info_dict):
- self.report_destination(filename)
- tmpfilename = self.temp_name(filename)
- base_url = info_dict['url']
- segment_urls = info_dict['segment_urls']
-
- is_test = self.params.get('test', False)
- remaining_bytes = self._TEST_FILE_SIZE if is_test else None
- byte_counter = 0
- def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
- self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
- req = sanitized_Request(target_url)
- if remaining_bytes is not None:
- req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
+ FD_NAME = 'dashsegments'
- data = self.ydl.urlopen(req).read()
+ def real_download(self, filename, info_dict):
+ base_url = info_dict['url']
+ segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls']
+ initialization_url = info_dict.get('initialization_url')
- if remaining_bytes is not None:
- data = data[:remaining_bytes]
+ ctx = {
+ 'filename': filename,
+ 'total_frags': len(segment_urls) + (1 if initialization_url else 0),
+ }
- outf.write(data)
- return len(data)
+ self._prepare_and_start_frag_download(ctx)
def combine_url(base_url, target_url):
if re.match(r'^https?://', target_url):
return target_url
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
- with open(tmpfilename, 'wb') as outf:
- append_url_to_file(
- outf, combine_url(base_url, info_dict['initialization_url']),
- 'initialization segment')
- for i, segment_url in enumerate(segment_urls):
- segment_len = append_url_to_file(
- outf, combine_url(base_url, segment_url),
- 'segment %d / %d' % (i + 1, len(segment_urls)),
- remaining_bytes)
- byte_counter += segment_len
- if remaining_bytes is not None:
- remaining_bytes -= segment_len
- if remaining_bytes <= 0:
- break
+ segments_filenames = []
- self.try_rename(tmpfilename, filename)
+ def append_url_to_file(target_url, target_filename):
+ success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
+ if not success:
+ return False
+ down, target_sanitized = sanitize_open(target_filename, 'rb')
+ ctx['dest_stream'].write(down.read())
+ down.close()
+ segments_filenames.append(target_sanitized)
- self._hook_progress({
- 'downloaded_bytes': byte_counter,
- 'total_bytes': byte_counter,
- 'filename': filename,
- 'status': 'finished',
- })
+ if initialization_url:
+ append_url_to_file(initialization_url, ctx['tmpfilename'] + '-Init')
+ for i, segment_url in enumerate(segment_urls):
+ segment_filename = '%s-Seg%d' % (ctx['tmpfilename'], i)
+ append_url_to_file(segment_url, segment_filename)
+
+ self._finish_frag_download(ctx)
+
+ for segment_file in segments_filenames:
+ os.remove(encodeFilename(segment_file))
return True
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 6170cc1..fc96429 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -15,6 +15,7 @@ from ..compat import (
)
from ..utils import (
encodeFilename,
+ fix_xml_ampersands,
sanitize_open,
struct_pack,
struct_unpack,
@@ -272,15 +273,21 @@ class F4mFD(FragmentFD):
return fragments_list
def _parse_bootstrap_node(self, node, base_url):
- if node.text is None:
+ # Sometimes non empty inline bootstrap info can be specified along
+ # with bootstrap url attribute (e.g. dummy inline bootstrap info
+ # contains whitespace characters in [1]). We will prefer bootstrap
+ # url over inline bootstrap info when present.
+ # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
+ bootstrap_url = node.get('url')
+ if bootstrap_url:
bootstrap_url = compat_urlparse.urljoin(
- base_url, node.attrib['url'])
+ base_url, bootstrap_url)
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
bootstrap = base64.b64decode(node.text.encode('ascii'))
boot_info = read_bootstrap_info(bootstrap)
- return (boot_info, bootstrap_url)
+ return boot_info, bootstrap_url
def real_download(self, filename, info_dict):
man_url = info_dict['url']
@@ -288,7 +295,10 @@ class F4mFD(FragmentFD):
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(man_url)
man_url = urlh.geturl()
- manifest = urlh.read()
+ # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+ # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
+ # and https://github.com/rg3/youtube-dl/issues/7823)
+ manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
doc = compat_etree_fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f)
@@ -312,7 +322,8 @@ class F4mFD(FragmentFD):
metadata = None
fragments_list = build_fragments_list(boot_info)
- if self.params.get('test', False):
+ test = self.params.get('test', False)
+ if test:
# We only download the first fragment
fragments_list = fragments_list[:1]
total_frags = len(fragments_list)
@@ -322,6 +333,7 @@ class F4mFD(FragmentFD):
ctx = {
'filename': filename,
'total_frags': total_frags,
+ 'live': live,
}
self._prepare_frag_download(ctx)
@@ -376,7 +388,7 @@ class F4mFD(FragmentFD):
else:
raise
- if not fragments_list and live and bootstrap_url:
+ if not fragments_list and not test and live and bootstrap_url:
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
total_frags += len(fragments_list)
if fragments_list and (fragments_list[0][1] > frag_i + 1):
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index 5a64b29..5bc9949 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -26,7 +26,11 @@ class FragmentFD(FileDownloader):
self._start_frag_download(ctx)
def _prepare_frag_download(self, ctx):
- self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags']))
+ if 'live' not in ctx:
+ ctx['live'] = False
+ self.to_screen(
+ '[%s] Total fragments: %s'
+ % (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
self.report_destination(ctx['filename'])
dl = HttpQuietDownloader(
self.ydl,
@@ -34,7 +38,7 @@ class FragmentFD(FileDownloader):
'continuedl': True,
'quiet': True,
'noprogress': True,
- 'ratelimit': self.params.get('ratelimit', None),
+ 'ratelimit': self.params.get('ratelimit'),
'retries': self.params.get('retries', 0),
'test': self.params.get('test', False),
}
@@ -59,37 +63,44 @@ class FragmentFD(FileDownloader):
'filename': ctx['filename'],
'tmpfilename': ctx['tmpfilename'],
}
+
start = time.time()
- ctx['started'] = start
+ ctx.update({
+ 'started': start,
+ # Total complete fragments downloaded so far in bytes
+ 'complete_frags_downloaded_bytes': 0,
+ # Amount of fragment's bytes downloaded by the time of the previous
+ # frag progress hook invocation
+ 'prev_frag_downloaded_bytes': 0,
+ })
def frag_progress_hook(s):
if s['status'] not in ('downloading', 'finished'):
return
- frag_total_bytes = s.get('total_bytes', 0)
- if s['status'] == 'finished':
- state['downloaded_bytes'] += frag_total_bytes
- state['frag_index'] += 1
-
- estimated_size = (
- (state['downloaded_bytes'] + frag_total_bytes) /
- (state['frag_index'] + 1) * total_frags)
time_now = time.time()
- state['total_bytes_estimate'] = estimated_size
state['elapsed'] = time_now - start
+ frag_total_bytes = s.get('total_bytes') or 0
+ if not ctx['live']:
+ estimated_size = (
+ (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
+ (state['frag_index'] + 1) * total_frags)
+ state['total_bytes_estimate'] = estimated_size
if s['status'] == 'finished':
- progress = self.calc_percent(state['frag_index'], total_frags)
+ state['frag_index'] += 1
+ state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
+ ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
+ ctx['prev_frag_downloaded_bytes'] = 0
else:
frag_downloaded_bytes = s['downloaded_bytes']
- frag_progress = self.calc_percent(frag_downloaded_bytes,
- frag_total_bytes)
- progress = self.calc_percent(state['frag_index'], total_frags)
- progress += frag_progress / float(total_frags)
-
- state['eta'] = self.calc_eta(
- start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
+ state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
+ if not ctx['live']:
+ state['eta'] = self.calc_eta(
+ start, time_now, estimated_size,
+ state['downloaded_bytes'])
state['speed'] = s.get('speed')
+ ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state)
ctx['dl'].add_progress_hook(frag_progress_hook)
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index 92765a3..2a775bf 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import os
import re
import subprocess
+import sys
from .common import FileDownloader
from .fragment import FragmentFD
@@ -13,6 +14,7 @@ from ..utils import (
encodeArgument,
encodeFilename,
sanitize_open,
+ handle_youtubedl_headers,
)
@@ -33,18 +35,34 @@ class HlsFD(FileDownloader):
if info_dict['http_headers'] and re.match(r'^https?://', url):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
+ headers = handle_youtubedl_headers(info_dict['http_headers'])
args += [
'-headers',
- ''.join('%s: %s\r\n' % (key, val) for key, val in info_dict['http_headers'].items() if key.lower() != 'accept-encoding')]
+ ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
- args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
+ args += ['-i', url, '-c', 'copy']
+ if self.params.get('hls_use_mpegts', False):
+ args += ['-f', 'mpegts']
+ else:
+ args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
args = [encodeArgument(opt) for opt in args]
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args)
- retval = subprocess.call(args)
+ proc = subprocess.Popen(args, stdin=subprocess.PIPE)
+ try:
+ retval = proc.wait()
+ except KeyboardInterrupt:
+ # subprocces.run would send the SIGKILL signal to ffmpeg and the
+ # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+ # produces a file that is playable (this is mostly useful for live
+ # streams). Note that Windows is not affected and produces playable
+ # files (see https://github.com/rg3/youtube-dl/issues/8300).
+ if sys.platform != 'win32':
+ proc.communicate(b'q')
+ raise
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 56840e0..f8b69d1 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -140,8 +140,8 @@ class HttpFD(FileDownloader):
if data_len is not None:
data_len = int(data_len) + resume_len
- min_data_len = self.params.get("min_filesize", None)
- max_data_len = self.params.get("max_filesize", None)
+ min_data_len = self.params.get('min_filesize')
+ max_data_len = self.params.get('max_filesize')
if min_data_len is not None and data_len < min_data_len:
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
index 14d56db..9de6e70 100644
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -94,15 +94,15 @@ class RtmpFD(FileDownloader):
return proc.returncode
url = info_dict['url']
- player_url = info_dict.get('player_url', None)
- page_url = info_dict.get('page_url', None)
- app = info_dict.get('app', None)
- play_path = info_dict.get('play_path', None)
- tc_url = info_dict.get('tc_url', None)
- flash_version = info_dict.get('flash_version', None)
+ player_url = info_dict.get('player_url')
+ page_url = info_dict.get('page_url')
+ app = info_dict.get('app')
+ play_path = info_dict.get('play_path')
+ tc_url = info_dict.get('tc_url')
+ flash_version = info_dict.get('flash_version')
live = info_dict.get('rtmp_live', False)
- conn = info_dict.get('rtmp_conn', None)
- protocol = info_dict.get('rtmp_protocol', None)
+ conn = info_dict.get('rtmp_conn')
+ protocol = info_dict.get('rtmp_protocol')
real_time = info_dict.get('rtmp_real_time', False)
no_resume = info_dict.get('no_resume', False)
continue_dl = self.params.get('continuedl', True)