diff options
author | Ismaël Mejía <iemejia@gmail.com> | 2013-11-02 19:50:45 +0100 |
---|---|---|
committer | Ismaël Mejía <iemejia@gmail.com> | 2013-11-02 19:50:45 +0100 |
commit | 38db46794f3ccfef09094db9b411e55acd4c1a3d (patch) | |
tree | 8c184e42a5f9710c5bad78a080038a4885800ca8 /youtube_dl | |
parent | aa2484e390d8a5e74d740fda61b4062a4a8c1d0e (diff) | |
parent | a9a3876d55be943a7eaf505cbeb8fb862514db6c (diff) | |
download | youtube-dl-38db46794f3ccfef09094db9b411e55acd4c1a3d.zip youtube-dl-38db46794f3ccfef09094db9b411e55acd4c1a3d.tar.gz youtube-dl-38db46794f3ccfef09094db9b411e55acd4c1a3d.tar.bz2 |
Merge branch 'ted_subtitles'
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/dailymotion.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/subtitles.py | 12 | ||||
-rw-r--r-- | youtube_dl/extractor/ted.py | 28 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 2 |
4 files changed, 34 insertions, 14 deletions
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 355b4ed..e87690f 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -141,9 +141,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): raise ExtractorError(u'Unable to extract video URL') # subtitles - video_subtitles = self.extract_subtitles(video_id) + video_subtitles = self.extract_subtitles(video_id, webpage) if self._downloader.params.get('listsubtitles', False): - self._list_available_subtitles(video_id) + self._list_available_subtitles(video_id, webpage) return return { @@ -157,7 +157,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): 'age_limit': age_limit, } - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py index 90de7de..4b4c523 100644 --- a/youtube_dl/extractor/subtitles.py +++ b/youtube_dl/extractor/subtitles.py @@ -12,9 +12,9 @@ class SubtitlesInfoExtractor(InfoExtractor): return any([self._downloader.params.get('writesubtitles', False), self._downloader.params.get('writeautomaticsub')]) - def _list_available_subtitles(self, video_id, webpage=None): + def _list_available_subtitles(self, video_id, webpage): """ outputs the available subtitles for the video """ - sub_lang_list = self._get_available_subtitles(video_id) + sub_lang_list = self._get_available_subtitles(video_id, webpage) auto_captions_list = self._get_available_automatic_caption(video_id, webpage) sub_lang = ",".join(list(sub_lang_list.keys())) self.to_screen(u'%s: Available subtitles for video: %s' % @@ -23,7 +23,7 @@ class SubtitlesInfoExtractor(InfoExtractor): self.to_screen(u'%s: Available automatic captions for video: %s' % (video_id, auto_lang)) - def extract_subtitles(self, video_id, video_webpage=None): + def extract_subtitles(self, video_id, webpage): """ returns {sub_lang: sub} ,{} if subtitles not found or None if the subtitles aren't requested. @@ -32,9 +32,9 @@ class SubtitlesInfoExtractor(InfoExtractor): return None available_subs_list = {} if self._downloader.params.get('writeautomaticsub', False): - available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage)) + available_subs_list.update(self._get_available_automatic_caption(video_id, webpage)) if self._downloader.params.get('writesubtitles', False): - available_subs_list.update(self._get_available_subtitles(video_id)) + available_subs_list.update(self._get_available_subtitles(video_id, webpage)) if not available_subs_list: # error, it didn't get the available subtitles return {} @@ -74,7 +74,7 @@ class SubtitlesInfoExtractor(InfoExtractor): return return sub - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): """ returns {sub_lang: url} or {} if not available Must be redefined by the subclasses diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index dfa1176..239e2a4 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -1,10 +1,9 @@ import json import re -from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor - -class TEDIE(InfoExtractor): +class TEDIE(SubtitlesInfoExtractor): _VALID_URL=r'''http://www\.ted\.com/ ( ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist @@ -82,11 +81,21 @@ class TEDIE(InfoExtractor): 'url': stream['file'], 'format': stream['id'] } for stream in info['htmlStreams']] + + video_id = info['id'] + + # subtitles + video_subtitles = self.extract_subtitles(video_id, webpage) + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, webpage) + return + info = { - 'id': info['id'], + 'id': video_id, 'title': title, 'thumbnail': thumbnail, 'description': desc, + 'subtitles': video_subtitles, 'formats': formats, } @@ -94,3 +103,14 @@ class TEDIE(InfoExtractor): info.update(info['formats'][-1]) return info + + def _get_available_subtitles(self, video_id, webpage): + options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL) + languages = re.findall(r'(?:<option value=")(\S+)"', options) + if languages: + sub_lang_list = {} + for l in languages: + url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) + sub_lang_list[l] = url + return sub_lang_list + return {} diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index dc601de..9053f3e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1094,7 +1094,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, |