aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/nrk.py
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2015-01-10 17:46:01 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2015-01-10 17:46:01 +0100
commit76bfaf6daf73ac74f7a9068c51d14ab0a5ec5e52 (patch)
tree9167b2cd207b0468af8448ec4b71603359bb1a69 /youtube_dl/extractor/nrk.py
parentd89c6e336a3a20085b4ea35f2afcfcecedab5c53 (diff)
downloadyoutube-dl-76bfaf6daf73ac74f7a9068c51d14ab0a5ec5e52.zip
youtube-dl-76bfaf6daf73ac74f7a9068c51d14ab0a5ec5e52.tar.gz
youtube-dl-76bfaf6daf73ac74f7a9068c51d14ab0a5ec5e52.tar.bz2
[nrk] Improve subtitle support (#3092)
Diffstat (limited to 'youtube_dl/extractor/nrk.py')
-rw-r--r--youtube_dl/extractor/nrk.py42
1 files changed, 39 insertions, 3 deletions
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index 321ce5c..f6de260 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -7,8 +7,10 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
+ parse_duration,
unified_strdate,
)
+from .subtitles import SubtitlesInfoExtractor
class NRKIE(InfoExtractor):
@@ -71,8 +73,8 @@ class NRKIE(InfoExtractor):
}
-class NRKTVIE(InfoExtractor):
- _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
+class NRKTVIE(SubtitlesInfoExtractor):
+ _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
_TESTS = [
{
@@ -147,6 +149,29 @@ class NRKTVIE(InfoExtractor):
}
]
+ def _seconds2str(self, s):
+ return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000)
+
+ def _debug_print(self, txt):
+ if self._downloader.params.get('verbose', False):
+ self.to_screen('[debug] %s' % txt)
+
+ def _extract_captions(self, subtitlesurl, video_id, baseurl):
+ url = "%s%s" % (baseurl, subtitlesurl)
+ self._debug_print('%s: Subtitle url: %s' % (video_id, url))
+ captions = self._download_xml(url, video_id, 'Downloading subtitles')
+ lang = captions.get('lang', 'no')
+ ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
+ srt = ''
+ for pos, p in enumerate(ps):
+ begin = parse_duration(p.get('begin'))
+ duration = parse_duration(p.get('dur'))
+ starttime = self._seconds2str(begin)
+ endtime = self._seconds2str(begin + duration)
+ text = '\n'.join(p.itertext())
+ srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
+ return {lang: srt}
+
def _extract_f4m(self, manifest_url, video_id):
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
@@ -154,6 +179,7 @@ class NRKTVIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
part_id = mobj.group('part_id')
+ baseurl = mobj.group('baseurl')
webpage = self._download_webpage(url, video_id)
@@ -210,9 +236,18 @@ class NRKTVIE(InfoExtractor):
m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
if m3u8_url:
formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))
-
self._sort_formats(formats)
+ subtitles_url = self._html_search_regex(
+ r'data-subtitlesurl[ ]*=[ ]*"([^"]+)"',
+ webpage, 'subtitle URL', default=None)
+ subtitles = None
+ if subtitles_url:
+ subtitles = self._extract_captions(subtitles_url, video_id, baseurl)
+ if self._downloader.params.get('listsubtitles', False):
+ self._list_available_subtitles(video_id, subtitles)
+ return
+
return {
'id': video_id,
'title': title,
@@ -221,4 +256,5 @@ class NRKTVIE(InfoExtractor):
'upload_date': upload_date,
'duration': duration,
'formats': formats,
+ 'subtitles': subtitles,
}