aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/utils.py')
-rw-r--r--youtube_dl/utils.py184
1 files changed, 170 insertions, 14 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index b3b687a..9595bcf 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
@@ -42,6 +42,7 @@ from .compat import (
compat_html_entities_html5,
compat_http_client,
compat_kwargs,
+ compat_os_name,
compat_parse_qs,
compat_shlex_quote,
compat_socket_create_connection,
@@ -91,6 +92,13 @@ ENGLISH_MONTH_NAMES = [
'January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November', 'December']
+MONTH_NAMES = {
+ 'en': ENGLISH_MONTH_NAMES,
+ 'fr': [
+ 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
+ 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
+}
+
KNOWN_EXTENSIONS = (
'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
'flv', 'f4v', 'f4a', 'f4b',
@@ -134,6 +142,8 @@ DATE_FORMATS = (
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
+ '%b %d %Y at %H:%M',
+ '%b %d %Y at %H:%M:%S',
)
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
@@ -155,6 +165,8 @@ DATE_FORMATS_MONTH_FIRST.extend([
'%m/%d/%Y %H:%M:%S',
])
+PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
+
def preferredencoding():
"""Get preferred encoding.
@@ -768,6 +780,26 @@ class ContentTooShortError(Exception):
self.expected = expected
+class XAttrMetadataError(Exception):
+ def __init__(self, code=None, msg='Unknown error'):
+ super(XAttrMetadataError, self).__init__(msg)
+ self.code = code
+ self.msg = msg
+
+ # Parsing code and msg
+ if (self.code in (errno.ENOSPC, errno.EDQUOT) or
+ 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+ self.reason = 'NO_SPACE'
+ elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+ self.reason = 'VALUE_TOO_LONG'
+ else:
+ self.reason = 'NOT_SUPPORTED'
+
+
+class XAttrUnavailableError(Exception):
+ pass
+
+
def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
# expected HTTP responses to meet HTTP/1.0 or later (see also
@@ -1504,38 +1536,63 @@ def parse_filesize(s):
_UNIT_TABLE = {
'B': 1,
'b': 1,
+ 'bytes': 1,
'KiB': 1024,
'KB': 1000,
'kB': 1024,
'Kb': 1000,
+ 'kb': 1000,
+ 'kilobytes': 1000,
+ 'kibibytes': 1024,
'MiB': 1024 ** 2,
'MB': 1000 ** 2,
'mB': 1024 ** 2,
'Mb': 1000 ** 2,
+ 'mb': 1000 ** 2,
+ 'megabytes': 1000 ** 2,
+ 'mebibytes': 1024 ** 2,
'GiB': 1024 ** 3,
'GB': 1000 ** 3,
'gB': 1024 ** 3,
'Gb': 1000 ** 3,
+ 'gb': 1000 ** 3,
+ 'gigabytes': 1000 ** 3,
+ 'gibibytes': 1024 ** 3,
'TiB': 1024 ** 4,
'TB': 1000 ** 4,
'tB': 1024 ** 4,
'Tb': 1000 ** 4,
+ 'tb': 1000 ** 4,
+ 'terabytes': 1000 ** 4,
+ 'tebibytes': 1024 ** 4,
'PiB': 1024 ** 5,
'PB': 1000 ** 5,
'pB': 1024 ** 5,
'Pb': 1000 ** 5,
+ 'pb': 1000 ** 5,
+ 'petabytes': 1000 ** 5,
+ 'pebibytes': 1024 ** 5,
'EiB': 1024 ** 6,
'EB': 1000 ** 6,
'eB': 1024 ** 6,
'Eb': 1000 ** 6,
+ 'eb': 1000 ** 6,
+ 'exabytes': 1000 ** 6,
+ 'exbibytes': 1024 ** 6,
'ZiB': 1024 ** 7,
'ZB': 1000 ** 7,
'zB': 1024 ** 7,
'Zb': 1000 ** 7,
+ 'zb': 1000 ** 7,
+ 'zettabytes': 1000 ** 7,
+ 'zebibytes': 1024 ** 7,
'YiB': 1024 ** 8,
'YB': 1000 ** 8,
'yB': 1024 ** 8,
'Yb': 1000 ** 8,
+ 'yb': 1000 ** 8,
+ 'yottabytes': 1000 ** 8,
+ 'yobibytes': 1024 ** 8,
}
return lookup_unit_table(_UNIT_TABLE, s)
@@ -1562,11 +1619,13 @@ def parse_count(s):
return lookup_unit_table(_UNIT_TABLE, s)
-def month_by_name(name):
+def month_by_name(name, lang='en'):
""" Return the number of a month by (locale-independently) English name """
+ month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
+
try:
- return ENGLISH_MONTH_NAMES.index(name) + 1
+ return month_names.index(name) + 1
except ValueError:
return None
@@ -1632,6 +1691,10 @@ def url_basename(url):
return path.strip('/').split('/')[-1]
+def base_url(url):
+ return re.match(r'https?://[^?#&]+/', url).group()
+
+
class HEADRequest(compat_urllib_request.Request):
def get_method(self):
return 'HEAD'
@@ -1759,8 +1822,12 @@ def get_exe_version(exe, args=['--version'],
""" Returns the version of the specified executable,
or False if the executable is not present """
try:
+ # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
+ # SIGTTOU if youtube-dl is run in the background.
+ # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
out, _ = subprocess.Popen(
[encodeArgument(exe)] + args,
+ stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
except OSError:
return False
@@ -2030,14 +2097,14 @@ def js_to_json(code):
}.get(m.group(0), m.group(0)), v[1:-1])
INTEGER_TABLE = (
- (r'^0[xX][0-9a-fA-F]+', 16),
- (r'^0+[0-7]+', 8),
+ (r'^(0[xX][0-9a-fA-F]+)\s*:?$', 16),
+ (r'^(0+[0-7]+)\s*:?$', 8),
)
for regex, base in INTEGER_TABLE:
im = re.match(regex, v)
if im:
- i = int(im.group(0), base)
+ i = int(im.group(1), base)
return '"%d":' % i if v.endswith(':') else '%d' % i
return '"%s"' % v
@@ -2123,7 +2190,7 @@ def mimetype2ext(mt):
return ext
_, _, res = mt.rpartition('/')
- res = res.lower()
+ res = res.split(';')[0].strip().lower()
return {
'3gpp': '3gp',
@@ -2143,6 +2210,7 @@ def mimetype2ext(mt):
'f4m+xml': 'f4m',
'hds+xml': 'f4m',
'vnd.ms-sstr+xml': 'ism',
+ 'quicktime': 'mov',
}.get(res, res)
@@ -2158,7 +2226,7 @@ def parse_codecs(codecs_str):
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
if not vcodec:
vcodec = full_codec
- elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac'):
+ elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3'):
if not acodec:
acodec = full_codec
else:
@@ -2281,11 +2349,18 @@ def _match_one(filter_part, dct):
m = operator_rex.search(filter_part)
if m:
op = COMPARISON_OPERATORS[m.group('op')]
- if m.group('strval') is not None:
+ actual_value = dct.get(m.group('key'))
+ if (m.group('strval') is not None or
+ # If the original field is a string and matching comparisonvalue is
+ # a number we should respect the origin of the original field
+ # and process comparison value as a string (see
+ # https://github.com/rg3/youtube-dl/issues/11082).
+ actual_value is not None and m.group('intval') is not None and
+ isinstance(actual_value, compat_str)):
if m.group('op') not in ('=', '!='):
raise ValueError(
'Operator %s does not support string values!' % m.group('op'))
- comparison_value = m.group('strval')
+ comparison_value = m.group('strval') or m.group('intval')
else:
try:
comparison_value = int(m.group('intval'))
@@ -2297,7 +2372,6 @@ def _match_one(filter_part, dct):
raise ValueError(
'Invalid integer value %r in filter part %r' % (
m.group('intval'), filter_part))
- actual_value = dct.get(m.group('key'))
if actual_value is None:
return m.group('none_inclusive')
return op(actual_value, comparison_value)
@@ -2959,9 +3033,7 @@ def encode_base_n(num, n, table=None):
def decode_packed_codes(code):
- mobj = re.search(
- r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)",
- code)
+ mobj = re.search(PACKED_CODES_RE, code)
obfucasted_code, base, count, symbols = mobj.groups()
base = int(base)
count = int(count)
@@ -3096,3 +3168,87 @@ def decode_png(png_data):
current_row.append(color)
return width, height, pixels
+
+
+def write_xattr(path, key, value):
+ # This mess below finds the best xattr tool for the job
+ try:
+ # try the pyxattr module...
+ import xattr
+
+ if hasattr(xattr, 'set'): # pyxattr
+ # Unicode arguments are not supported in python-pyxattr until
+ # version 0.5.0
+ # See https://github.com/rg3/youtube-dl/issues/5498
+ pyxattr_required_version = '0.5.0'
+ if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+ # TODO: fallback to CLI tools
+ raise XAttrUnavailableError(
+ 'python-pyxattr is detected but is too old. '
+ 'youtube-dl requires %s or above while your version is %s. '
+ 'Falling back to other xattr implementations' % (
+ pyxattr_required_version, xattr.__version__))
+
+ setxattr = xattr.set
+ else: # xattr
+ setxattr = xattr.setxattr
+
+ try:
+ setxattr(path, key, value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+
+ except ImportError:
+ if compat_os_name == 'nt':
+ # Write xattrs to NTFS Alternate Data Streams:
+ # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+ assert ':' not in key
+ assert os.path.exists(path)
+
+ ads_fn = path + ':' + key
+ try:
+ with open(ads_fn, 'wb') as f:
+ f.write(value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ else:
+ user_has_setfattr = check_executable('setfattr', ['--version'])
+ user_has_xattr = check_executable('xattr', ['-h'])
+
+ if user_has_setfattr or user_has_xattr:
+
+ value = value.decode('utf-8')
+ if user_has_setfattr:
+ executable = 'setfattr'
+ opts = ['-n', key, '-v', value]
+ elif user_has_xattr:
+ executable = 'xattr'
+ opts = ['-w', key, value]
+
+ cmd = ([encodeFilename(executable, True)] +
+ [encodeArgument(o) for o in opts] +
+ [encodeFilename(path, True)])
+
+ try:
+ p = subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ stdout, stderr = p.communicate()
+ stderr = stderr.decode('utf-8', 'replace')
+ if p.returncode != 0:
+ raise XAttrMetadataError(p.returncode, stderr)
+
+ else:
+ # On Unix, and can't find pyxattr, setfattr, or xattr.
+ if sys.platform.startswith('linux'):
+ raise XAttrUnavailableError(
+ "Couldn't find a tool to set the xattrs. "
+ "Install either the python 'pyxattr' or 'xattr' "
+ "modules, or the GNU 'attr' package "
+ "(which contains the 'setfattr' tool).")
+ else:
+ raise XAttrUnavailableError(
+ "Couldn't find a tool to set the xattrs. "
+ "Install either the python 'xattr' module, "
+ "or the 'xattr' binary.")