aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/pornhub.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/pornhub.py')
-rw-r--r--youtube_dl/extractor/pornhub.py20
1 files changed, 10 insertions, 10 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index e032817..3428458 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -186,7 +186,7 @@ class PornHubIE(InfoExtractor):
title, thumbnail, duration = [None] * 3
video_uploader = self._html_search_regex(
- r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
+ r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
webpage, 'uploader', fatal=False)
view_count = self._extract_count(
@@ -227,13 +227,20 @@ class PornHubIE(InfoExtractor):
class PornHubPlaylistBaseIE(InfoExtractor):
def _extract_entries(self, webpage):
+ # Only process container div with main playlist content skipping
+ # drop-down menu that uses similar pattern for videos (see
+ # https://github.com/rg3/youtube-dl/issues/11594).
+ container = self._search_regex(
+ r'(?s)(<div[^>]+class=["\']container.+)', webpage,
+ 'container', default=webpage)
+
return [
self.url_result(
'http://www.pornhub.com/%s' % video_url,
PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
- webpage))
+ container))
]
def _real_extract(self, url):
@@ -241,14 +248,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
- # Only process container div with main playlist content skipping
- # drop-down menu that uses similar pattern for videos (see
- # https://github.com/rg3/youtube-dl/issues/11594).
- container = self._search_regex(
- r'(?s)(<div[^>]+class=["\']container.+)', webpage,
- 'container', default=webpage)
-
- entries = self._extract_entries(container)
+ entries = self._extract_entries(webpage)
playlist = self._parse_json(
self._search_regex(