aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/yesjapan.py
diff options
context:
space:
mode:
authorNaglis Jonaitis <njonaitis@gmail.com>2014-12-14 17:59:25 +0200
committerNaglis Jonaitis <njonaitis@gmail.com>2014-12-14 17:59:25 +0200
commit408b5839b186ffaea09a67e584b8da33525ed967 (patch)
tree2fe41ada3c41ce167ea876a5ea7b904a731cdfdb /youtube_dl/extractor/yesjapan.py
parent60620368d75346212e9eb3ec6e3e7f2c630a5b0e (diff)
downloadyoutube-dl-408b5839b186ffaea09a67e584b8da33525ed967.zip
youtube-dl-408b5839b186ffaea09a67e584b8da33525ed967.tar.gz
youtube-dl-408b5839b186ffaea09a67e584b8da33525ed967.tar.bz2
[yesjapan] Add new extractor (Closes #4466)
Diffstat (limited to 'youtube_dl/extractor/yesjapan.py')
-rw-r--r--youtube_dl/extractor/yesjapan.py62
1 files changed, 62 insertions, 0 deletions
diff --git a/youtube_dl/extractor/yesjapan.py b/youtube_dl/extractor/yesjapan.py
new file mode 100644
index 0000000..fec1ad1
--- /dev/null
+++ b/youtube_dl/extractor/yesjapan.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ HEADRequest,
+ get_element_by_attribute,
+ parse_iso8601,
+)
+
+
+class YesJapanIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?yesjapan\.com/video/(?P<slug>[A-Za-z0-9\-]*)_(?P<id>[A-Za-z0-9]+)\.html'
+ _TEST = {
+ 'url': 'http://www.yesjapan.com/video/japanese-in-5-20-wa-and-ga-particle-usages_726497834.html',
+ 'md5': 'f0be416314e5be21a12b499b330c21cf',
+ 'info_dict': {
+ 'id': '726497834',
+ 'title': 'Japanese in 5! #20 - WA And GA Particle Usages',
+ 'description': 'This should clear up some issues most students of Japanese encounter with WA and GA....',
+ 'ext': 'mp4',
+ 'timestamp': 1416391590,
+ 'upload_date': '20141119',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+ video_url = self._og_search_video_url(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ timestamp = None
+ submit_info = get_element_by_attribute('class', 'pm-submit-data', webpage)
+ if submit_info:
+ timestamp = parse_iso8601(self._search_regex(
+ r'datetime="([^"]+)"', webpage, 'upload date', fatal=False, default=None))
+
+ # attempt to resolve the final URL in order to get a proper extension
+ redirect_req = HEADRequest(video_url)
+ req = self._request_webpage(
+ redirect_req, video_id, note='Resolving final URL', errnote='Could not resolve final URL', fatal=False)
+ if req:
+ video_url = req.geturl()
+
+ formats = [{
+ 'format_id': 'sd',
+ 'url': video_url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'thumbnail': thumbnail,
+ }