Imported Upstream version 2016.02.22

author: Rogério Brito <rbrito@ime.usp.br> 2016-02-24 17:23:49 -0300
committer: Rogério Brito <rbrito@ime.usp.br> 2016-02-24 17:23:49 -0300
commit: 9f2b33881274af98a9145c533a1d295fad71521a (patch)
tree: b7e8f8ef288b2ec35a41261bb4774f9044cfce83 /youtube_dl/downloader
parent: 9ed7fe4fe4c445eb7d9f3197bb300d0db8f1807a (diff)
download: youtube-dl-9f2b33881274af98a9145c533a1d295fad71521a.zip
youtube-dl-9f2b33881274af98a9145c533a1d295fad71521a.tar.gz
youtube-dl-9f2b33881274af98a9145c533a1d295fad71521a.tar.bz2
7 files changed, 121 insertions, 86 deletions
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index b8bf8da..2d51540 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -5,9 +5,9 @@ import re
 import sys
 import time
 
-from ..compat import compat_str
 from ..utils import (
     encodeFilename,
+    error_to_compat_str,
     decodeArgument,
     format_bytes,
     timeconvert,
@@ -45,6 +45,7 @@ class FileDownloader(object):
                         (experimental)
     external_downloader_args:  A list of additional command-line arguments for the
                         external downloader.
+    hls_use_mpegts:     Use the mpegts container for HLS videos.
 
     Subclasses of this one must re-define the real_download method.
     """
@@ -156,7 +157,7 @@ class FileDownloader(object):
 
     def slow_down(self, start_time, now, byte_counter):
         """Sleep if the download speed is over the rate limit."""
-        rate_limit = self.params.get('ratelimit', None)
+        rate_limit = self.params.get('ratelimit')
         if rate_limit is None or byte_counter == 0:
             return
         if now is None:
@@ -186,7 +187,7 @@ class FileDownloader(object):
                 return
             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
         except (IOError, OSError) as err:
-            self.report_error('unable to rename file: %s' % compat_str(err))
+            self.report_error('unable to rename file: %s' % error_to_compat_str(err))
 
     def try_utime(self, filename, last_modified_hdr):
         """Try to set the last-modified time of the given file."""
@@ -295,7 +296,7 @@ class FileDownloader(object):
 
     def report_retry(self, count, retries):
         """Report retry in case of HTTP error 5xx"""
-        self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
+        self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %.0f)...' % (count, retries))
 
     def report_file_already_downloaded(self, file_name):
         """Report file has already been fully downloaded."""
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index 535f2a7..8b1b17c 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -1,66 +1,59 @@
 from __future__ import unicode_literals
 
+import os
 import re
 
-from .common import FileDownloader
-from ..utils import sanitized_Request
+from .fragment import FragmentFD
+from ..utils import (
+    sanitize_open,
+    encodeFilename,
+)
 
 
-class DashSegmentsFD(FileDownloader):
+class DashSegmentsFD(FragmentFD):
     """
     Download segments in a DASH manifest
     """
-    def real_download(self, filename, info_dict):
-        self.report_destination(filename)
-        tmpfilename = self.temp_name(filename)
-        base_url = info_dict['url']
-        segment_urls = info_dict['segment_urls']
-
-        is_test = self.params.get('test', False)
-        remaining_bytes = self._TEST_FILE_SIZE if is_test else None
-        byte_counter = 0
 
-        def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
-            self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
-            req = sanitized_Request(target_url)
-            if remaining_bytes is not None:
-                req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
+    FD_NAME = 'dashsegments'
 
-            data = self.ydl.urlopen(req).read()
+    def real_download(self, filename, info_dict):
+        base_url = info_dict['url']
+        segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls']
+        initialization_url = info_dict.get('initialization_url')
 
-            if remaining_bytes is not None:
-                data = data[:remaining_bytes]
+        ctx = {
+            'filename': filename,
+            'total_frags': len(segment_urls) + (1 if initialization_url else 0),
+        }
 
-            outf.write(data)
-            return len(data)
+        self._prepare_and_start_frag_download(ctx)
 
         def combine_url(base_url, target_url):
             if re.match(r'^https?://', target_url):
                 return target_url
             return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
 
-        with open(tmpfilename, 'wb') as outf:
-            append_url_to_file(
-                outf, combine_url(base_url, info_dict['initialization_url']),
-                'initialization segment')
-            for i, segment_url in enumerate(segment_urls):
-                segment_len = append_url_to_file(
-                    outf, combine_url(base_url, segment_url),
-                    'segment %d / %d' % (i + 1, len(segment_urls)),
-                    remaining_bytes)
-                byte_counter += segment_len
-                if remaining_bytes is not None:
-                    remaining_bytes -= segment_len
-                    if remaining_bytes <= 0:
-                        break
+        segments_filenames = []
 
-        self.try_rename(tmpfilename, filename)
+        def append_url_to_file(target_url, target_filename):
+            success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
+            if not success:
+                return False
+            down, target_sanitized = sanitize_open(target_filename, 'rb')
+            ctx['dest_stream'].write(down.read())
+            down.close()
+            segments_filenames.append(target_sanitized)
 
-        self._hook_progress({
-            'downloaded_bytes': byte_counter,
-            'total_bytes': byte_counter,
-            'filename': filename,
-            'status': 'finished',
-        })
+        if initialization_url:
+            append_url_to_file(initialization_url, ctx['tmpfilename'] + '-Init')
+        for i, segment_url in enumerate(segment_urls):
+            segment_filename = '%s-Seg%d' % (ctx['tmpfilename'], i)
+            append_url_to_file(segment_url, segment_filename)
+
+        self._finish_frag_download(ctx)
+
+        for segment_file in segments_filenames:
+            os.remove(encodeFilename(segment_file))
 
         return True
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 6170cc1..fc96429 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -15,6 +15,7 @@ from ..compat import (
 )
 from ..utils import (
     encodeFilename,
+    fix_xml_ampersands,
     sanitize_open,
     struct_pack,
     struct_unpack,
@@ -272,15 +273,21 @@ class F4mFD(FragmentFD):
         return fragments_list
 
     def _parse_bootstrap_node(self, node, base_url):
-        if node.text is None:
+        # Sometimes non empty inline bootstrap info can be specified along
+        # with bootstrap url attribute (e.g. dummy inline bootstrap info
+        # contains whitespace characters in [1]). We will prefer bootstrap
+        # url over inline bootstrap info when present.
+        # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
+        bootstrap_url = node.get('url')
+        if bootstrap_url:
             bootstrap_url = compat_urlparse.urljoin(
-                base_url, node.attrib['url'])
+                base_url, bootstrap_url)
             boot_info = self._get_bootstrap_from_url(bootstrap_url)
         else:
             bootstrap_url = None
             bootstrap = base64.b64decode(node.text.encode('ascii'))
             boot_info = read_bootstrap_info(bootstrap)
-        return (boot_info, bootstrap_url)
+        return boot_info, bootstrap_url
 
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
@@ -288,7 +295,10 @@ class F4mFD(FragmentFD):
         self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
         urlh = self.ydl.urlopen(man_url)
         man_url = urlh.geturl()
-        manifest = urlh.read()
+        # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+        # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
+        # and https://github.com/rg3/youtube-dl/issues/7823)
+        manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
 
         doc = compat_etree_fromstring(manifest)
         formats = [(int(f.attrib.get('bitrate', -1)), f)
@@ -312,7 +322,8 @@ class F4mFD(FragmentFD):
             metadata = None
 
         fragments_list = build_fragments_list(boot_info)
-        if self.params.get('test', False):
+        test = self.params.get('test', False)
+        if test:
             # We only download the first fragment
             fragments_list = fragments_list[:1]
         total_frags = len(fragments_list)
@@ -322,6 +333,7 @@ class F4mFD(FragmentFD):
         ctx = {
             'filename': filename,
             'total_frags': total_frags,
+            'live': live,
         }
 
         self._prepare_frag_download(ctx)
@@ -376,7 +388,7 @@ class F4mFD(FragmentFD):
                 else:
                     raise
 
-            if not fragments_list and live and bootstrap_url:
+            if not fragments_list and not test and live and bootstrap_url:
                 fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
                 total_frags += len(fragments_list)
                 if fragments_list and (fragments_list[0][1] > frag_i + 1):
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index 5a64b29..5bc9949 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -26,7 +26,11 @@ class FragmentFD(FileDownloader):
         self._start_frag_download(ctx)
 
     def _prepare_frag_download(self, ctx):
-        self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags']))
+        if 'live' not in ctx:
+            ctx['live'] = False
+        self.to_screen(
+            '[%s] Total fragments: %s'
+            % (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
         self.report_destination(ctx['filename'])
         dl = HttpQuietDownloader(
             self.ydl,
@@ -34,7 +38,7 @@ class FragmentFD(FileDownloader):
                 'continuedl': True,
                 'quiet': True,
                 'noprogress': True,
-                'ratelimit': self.params.get('ratelimit', None),
+                'ratelimit': self.params.get('ratelimit'),
                 'retries': self.params.get('retries', 0),
                 'test': self.params.get('test', False),
             }
@@ -59,37 +63,44 @@ class FragmentFD(FileDownloader):
             'filename': ctx['filename'],
             'tmpfilename': ctx['tmpfilename'],
         }
+
         start = time.time()
-        ctx['started'] = start
+        ctx.update({
+            'started': start,
+            # Total complete fragments downloaded so far in bytes
+            'complete_frags_downloaded_bytes': 0,
+            # Amount of fragment's bytes downloaded by the time of the previous
+            # frag progress hook invocation
+            'prev_frag_downloaded_bytes': 0,
+        })
 
         def frag_progress_hook(s):
             if s['status'] not in ('downloading', 'finished'):
                 return
 
-            frag_total_bytes = s.get('total_bytes', 0)
-            if s['status'] == 'finished':
-                state['downloaded_bytes'] += frag_total_bytes
-                state['frag_index'] += 1
-
-            estimated_size = (
-                (state['downloaded_bytes'] + frag_total_bytes) /
-                (state['frag_index'] + 1) * total_frags)
             time_now = time.time()
-            state['total_bytes_estimate'] = estimated_size
             state['elapsed'] = time_now - start
+            frag_total_bytes = s.get('total_bytes') or 0
+            if not ctx['live']:
+                estimated_size = (
+                    (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
+                    (state['frag_index'] + 1) * total_frags)
+                state['total_bytes_estimate'] = estimated_size
 
             if s['status'] == 'finished':
-                progress = self.calc_percent(state['frag_index'], total_frags)
+                state['frag_index'] += 1
+                state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
+                ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
+                ctx['prev_frag_downloaded_bytes'] = 0
             else:
                 frag_downloaded_bytes = s['downloaded_bytes']
-                frag_progress = self.calc_percent(frag_downloaded_bytes,
-                                                  frag_total_bytes)
-                progress = self.calc_percent(state['frag_index'], total_frags)
-                progress += frag_progress / float(total_frags)
-
-                state['eta'] = self.calc_eta(
-                    start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
+                state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
+                if not ctx['live']:
+                    state['eta'] = self.calc_eta(
+                        start, time_now, estimated_size,
+                        state['downloaded_bytes'])
                 state['speed'] = s.get('speed')
+                ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
             self._hook_progress(state)
 
         ctx['dl'].add_progress_hook(frag_progress_hook)
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index 92765a3..2a775bf 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import os
 import re
 import subprocess
+import sys
 
 from .common import FileDownloader
 from .fragment import FragmentFD
@@ -13,6 +14,7 @@ from ..utils import (
     encodeArgument,
     encodeFilename,
     sanitize_open,
+    handle_youtubedl_headers,
 )
 
 
@@ -33,18 +35,34 @@ class HlsFD(FileDownloader):
         if info_dict['http_headers'] and re.match(r'^https?://', url):
             # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
             # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
+            headers = handle_youtubedl_headers(info_dict['http_headers'])
             args += [
                 '-headers',
-                ''.join('%s: %s\r\n' % (key, val) for key, val in info_dict['http_headers'].items() if key.lower() != 'accept-encoding')]
+                ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
 
-        args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
+        args += ['-i', url, '-c', 'copy']
+        if self.params.get('hls_use_mpegts', False):
+            args += ['-f', 'mpegts']
+        else:
+            args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
 
         args = [encodeArgument(opt) for opt in args]
         args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
 
         self._debug_cmd(args)
 
-        retval = subprocess.call(args)
+        proc = subprocess.Popen(args, stdin=subprocess.PIPE)
+        try:
+            retval = proc.wait()
+        except KeyboardInterrupt:
+            # subprocces.run would send the SIGKILL signal to ffmpeg and the
+            # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+            # produces a file that is playable (this is mostly useful for live
+            # streams). Note that Windows is not affected and produces playable
+            # files (see https://github.com/rg3/youtube-dl/issues/8300).
+            if sys.platform != 'win32':
+                proc.communicate(b'q')
+            raise
         if retval == 0:
             fsize = os.path.getsize(encodeFilename(tmpfilename))
             self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 56840e0..f8b69d1 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -140,8 +140,8 @@ class HttpFD(FileDownloader):
 
         if data_len is not None:
             data_len = int(data_len) + resume_len
-            min_data_len = self.params.get("min_filesize", None)
-            max_data_len = self.params.get("max_filesize", None)
+            min_data_len = self.params.get('min_filesize')
+            max_data_len = self.params.get('max_filesize')
             if min_data_len is not None and data_len < min_data_len:
                 self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
                 return False
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
index 14d56db..9de6e70 100644
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -94,15 +94,15 @@ class RtmpFD(FileDownloader):
             return proc.returncode
 
         url = info_dict['url']
-        player_url = info_dict.get('player_url', None)
-        page_url = info_dict.get('page_url', None)
-        app = info_dict.get('app', None)
-        play_path = info_dict.get('play_path', None)
-        tc_url = info_dict.get('tc_url', None)
-        flash_version = info_dict.get('flash_version', None)
+        player_url = info_dict.get('player_url')
+        page_url = info_dict.get('page_url')
+        app = info_dict.get('app')
+        play_path = info_dict.get('play_path')
+        tc_url = info_dict.get('tc_url')
+        flash_version = info_dict.get('flash_version')
         live = info_dict.get('rtmp_live', False)
-        conn = info_dict.get('rtmp_conn', None)
-        protocol = info_dict.get('rtmp_protocol', None)
+        conn = info_dict.get('rtmp_conn')
+        protocol = info_dict.get('rtmp_protocol')
         real_time = info_dict.get('rtmp_real_time', False)
         no_resume = info_dict.get('no_resume', False)
         continue_dl = self.params.get('continuedl', True)
author	Rogério Brito <rbrito@ime.usp.br>	2016-02-24 17:23:49 -0300
committer	Rogério Brito <rbrito@ime.usp.br>	2016-02-24 17:23:49 -0300
commit	9f2b33881274af98a9145c533a1d295fad71521a (patch)
tree	b7e8f8ef288b2ec35a41261bb4774f9044cfce83 /youtube_dl/downloader
parent	9ed7fe4fe4c445eb7d9f3197bb300d0db8f1807a (diff)
download	youtube-dl-9f2b33881274af98a9145c533a1d295fad71521a.zip youtube-dl-9f2b33881274af98a9145c533a1d295fad71521a.tar.gz youtube-dl-9f2b33881274af98a9145c533a1d295fad71521a.tar.bz2