aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/pornhub.py
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-08-13 07:53:02 +0700
committerSergey M․ <dstftw@gmail.com>2017-08-13 07:53:02 +0700
commit475bcb225f6046e38b47594c504da6ec15bac113 (patch)
treea0162464a094b37ba7f44990fae4dcd70e370066 /youtube_dl/extractor/pornhub.py
parentb3c6515365ed415bbf813c0c2e6c12585824b77a (diff)
downloadyoutube-dl-475bcb225f6046e38b47594c504da6ec15bac113.zip
youtube-dl-475bcb225f6046e38b47594c504da6ec15bac113.tar.gz
youtube-dl-475bcb225f6046e38b47594c504da6ec15bac113.tar.bz2
[pornhub:playlistbase] Skip videos from drop-down menu for all playlists (closes #12819, closes #13902)
Diffstat (limited to 'youtube_dl/extractor/pornhub.py')
-rw-r--r--youtube_dl/extractor/pornhub.py18
1 files changed, 9 insertions, 9 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index e032817..f6777cd 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -227,13 +227,20 @@ class PornHubIE(InfoExtractor):
class PornHubPlaylistBaseIE(InfoExtractor):
def _extract_entries(self, webpage):
+ # Only process container div with main playlist content skipping
+ # drop-down menu that uses similar pattern for videos (see
+ # https://github.com/rg3/youtube-dl/issues/11594).
+ container = self._search_regex(
+ r'(?s)(<div[^>]+class=["\']container.+)', webpage,
+ 'container', default=webpage)
+
return [
self.url_result(
'http://www.pornhub.com/%s' % video_url,
PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
- webpage))
+ container))
]
def _real_extract(self, url):
@@ -241,14 +248,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
- # Only process container div with main playlist content skipping
- # drop-down menu that uses similar pattern for videos (see
- # https://github.com/rg3/youtube-dl/issues/11594).
- container = self._search_regex(
- r'(?s)(<div[^>]+class=["\']container.+)', webpage,
- 'container', default=webpage)
-
- entries = self._extract_entries(container)
+ entries = self._extract_entries(webpage)
playlist = self._parse_json(
self._search_regex(