aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/arte.py
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2016-05-08 06:52:42 +0600
committerSergey M․ <dstftw@gmail.com>2016-05-08 06:52:42 +0600
commit9c072d38c6b0361d91e92c50cd0c753dc8ce3101 (patch)
treee8b6ab7160d20fbcd2597071661c990d4554ad0f /youtube_dl/extractor/arte.py
parent3e169233daf76cd7585ebac12504f8e624b7693b (diff)
downloadyoutube-dl-9c072d38c6b0361d91e92c50cd0c753dc8ce3101.zip
youtube-dl-9c072d38c6b0361d91e92c50cd0c753dc8ce3101.tar.gz
youtube-dl-9c072d38c6b0361d91e92c50cd0c753dc8ce3101.tar.bz2
[arte] Improve language preference (Closes #9401, closes #9162)
Diffstat (limited to 'youtube_dl/extractor/arte.py')
-rw-r--r--youtube_dl/extractor/arte.py58
1 files changed, 43 insertions, 15 deletions
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 881cacf..e37fdae 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -161,24 +161,53 @@ class ArteTVPlus7IE(InfoExtractor):
'es': 'E[ESP]',
}
+ langcode = LANGS.get(lang, lang)
+
formats = []
for format_id, format_dict in player_info['VSR'].items():
f = dict(format_dict)
versionCode = f.get('versionCode')
- langcode = LANGS.get(lang, lang)
- lang_rexs = [r'VO?%s-' % re.escape(langcode), r'VO?.-ST%s$' % re.escape(langcode)]
- lang_pref = None
- if versionCode:
- matched_lang_rexs = [r for r in lang_rexs if re.match(r, versionCode)]
- lang_pref = -10 if not matched_lang_rexs else 10 * len(matched_lang_rexs)
- source_pref = 0
- if versionCode is not None:
- # The original version with subtitles has lower relevance
- if re.match(r'VO-ST(F|A|E)', versionCode):
- source_pref -= 10
- # The version with sourds/mal subtitles has also lower relevance
- elif re.match(r'VO?(F|A|E)-STM\1', versionCode):
- source_pref -= 9
+ l = re.escape(langcode)
+
+ # Language preference from most to least priority
+ # Reference: section 5.6.3 of
+ # http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf
+ PREFERENCES = (
+ # original version in requested language, without subtitles
+ r'VO{0}$'.format(l),
+ # original version in requested language, with partial subtitles in requested language
+ r'VO{0}-ST{0}$'.format(l),
+ # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
+ r'VO{0}-STM{0}$'.format(l),
+ # non-original (dubbed) version in requested language, without subtitles
+ r'V{0}$'.format(l),
+ # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
+ r'V{0}-ST{0}$'.format(l),
+ # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
+ r'V{0}-STM{0}$'.format(l),
+ # original version in requested language, with partial subtitles in different language
+ r'VO{0}-ST(?!{0}).+?$'.format(l),
+ # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
+ r'VO{0}-STM(?!{0}).+?$'.format(l),
+ # original version in different language, with partial subtitles in requested language
+ r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
+ # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
+ r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
+ # original version in different language, without subtitles
+ r'VO(?:(?!{0}))?$'.format(l),
+ # original version in different language, with partial subtitles in different language
+ r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
+ # original version in different language, with subtitles for the deaf and hard-of-hearing in different language
+ r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
+ )
+
+ for pref, p in enumerate(PREFERENCES):
+ if re.match(p, versionCode):
+ lang_pref = len(PREFERENCES) - pref
+ break
+ else:
+ lang_pref = -1
+
format = {
'format_id': format_id,
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
@@ -188,7 +217,6 @@ class ArteTVPlus7IE(InfoExtractor):
'height': int_or_none(f.get('height')),
'tbr': int_or_none(f.get('bitrate')),
'quality': qfunc(f.get('quality')),
- 'source_preference': source_pref,
}
if f.get('mediaType') == 'rtmp':