diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2013-06-23 22:14:22 +0200 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2013-06-23 22:14:22 +0200 |
commit | c3c77cec300dd05938dcf175ab5fec536184589a (patch) | |
tree | 43a32b4445fbc465b10cd0f57a6f4e993d200896 /youtube_dl | |
parent | 1183b85f50478f6e57e51d2af06e0f6730bb6cb1 (diff) | |
download | youtube-dl-c3c77cec300dd05938dcf175ab5fec536184589a.zip youtube-dl-c3c77cec300dd05938dcf175ab5fec536184589a.tar.gz youtube-dl-c3c77cec300dd05938dcf175ab5fec536184589a.tar.bz2 |
[youjizz] move into own file
Diffstat (limited to 'youtube_dl')
-rwxr-xr-x | youtube_dl/InfoExtractors.py | 41 | ||||
-rw-r--r-- | youtube_dl/extractor/youjizz.py | 45 |
2 files changed, 46 insertions, 40 deletions
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 73b3825..080563d 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -53,6 +53,7 @@ from .extractor.worldstarhiphop import WorldStarHipHopIE from .extractor.xnxx import XNXXIE from .extractor.xvideos import XVideosIE from .extractor.yahoo import YahooIE, YahooSearchIE +from .extractor.youjizz import YouJizzIE from .extractor.youku import YoukuIE from .extractor.youporn import YouPornIE from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE @@ -81,46 +82,6 @@ from .extractor.zdf import ZDFIE -class YouJizzIE(InfoExtractor): - """Information extractor for youjizz.com.""" - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) - - video_id = mobj.group('videoid') - - # Get webpage content - webpage = self._download_webpage(url, video_id) - - # Get the video title - video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>', - webpage, u'title').strip() - - # Get the embed page - result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage) - if result is None: - raise ExtractorError(u'ERROR: unable to extract embed page') - - embed_page_url = result.group(0).strip() - video_id = result.group('videoid') - - webpage = self._download_webpage(embed_page_url, video_id) - - # Get the video URL - video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', - webpage, u'video URL') - - info = {'id': video_id, - 'url': video_url, - 'title': video_title, - 'ext': 'flv', - 'format': 'flv', - 'player_url': embed_page_url} - - return [info] class EightTracksIE(InfoExtractor): IE_NAME = '8tracks' diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py new file mode 100644 index 0000000..d9efac7 --- /dev/null +++ b/youtube_dl/extractor/youjizz.py @@ -0,0 +1,45 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) + + +class YouJizzIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('videoid') + + # Get webpage content + webpage = self._download_webpage(url, video_id) + + # Get the video title + video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>', + webpage, u'title').strip() + + # Get the embed page + result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage) + if result is None: + raise ExtractorError(u'ERROR: unable to extract embed page') + + embed_page_url = result.group(0).strip() + video_id = result.group('videoid') + + webpage = self._download_webpage(embed_page_url, video_id) + + # Get the video URL + video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', + webpage, u'video URL') + + info = {'id': video_id, + 'url': video_url, + 'title': video_title, + 'ext': 'flv', + 'format': 'flv', + 'player_url': embed_page_url} + + return [info] |