aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-01-07 00:31:53 +0700
committerSergey M․ <dstftw@gmail.com>2018-01-07 00:31:53 +0700
commit0a5b1295b7c1aa6395b65ee137087c540b37b32b (patch)
tree296c9dd21a73afc359c5c6d6ceed9b43673d6b3e
parenta133eb7764594b830cb975e3925972214e932704 (diff)
downloadyoutube-dl-0a5b1295b7c1aa6395b65ee137087c540b37b32b.zip
youtube-dl-0a5b1295b7c1aa6395b65ee137087c540b37b32b.tar.gz
youtube-dl-0a5b1295b7c1aa6395b65ee137087c540b37b32b.tar.bz2
[motherless:group] Relax entry extraction and add a fallback scenario
-rw-r--r--youtube_dl/extractor/motherless.py29
1 files changed, 21 insertions, 8 deletions
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index 4adac69..e24396e 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -148,14 +148,27 @@ class MotherlessGroupIE(InfoExtractor):
else super(MotherlessGroupIE, cls).suitable(url))
def _extract_entries(self, webpage, base):
- return [
- self.url_result(
- compat_urlparse.urljoin(base, video_path),
- MotherlessIE.ie_key(), video_title=title)
- for video_path, title in orderedSet(re.findall(
- r'href="(/[^"]+)"[^>]+>\s+<img[^>]+alt="[^-]+-\s([^"]+)"',
- webpage))
- ]
+ entries = []
+ for mobj in re.finditer(
+ r'href="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^-]+-\s(?P<title>[^"]+)")?',
+ webpage):
+ video_url = compat_urlparse.urljoin(base, mobj.group('href'))
+ if not MotherlessIE.suitable(video_url):
+ continue
+ video_id = MotherlessIE._match_id(video_url)
+ title = mobj.group('title')
+ entries.append(self.url_result(
+ video_url, ie=MotherlessIE.ie_key(), video_id=video_id,
+ video_title=title))
+ # Alternative fallback
+ if not entries:
+ entries = [
+ self.url_result(
+ compat_urlparse.urljoin(base, '/' + video_id),
+ ie=MotherlessIE.ie_key(), video_id=video_id)
+ for video_id in orderedSet(re.findall(
+ r'data-codename=["\']([A-Z0-9]+)', webpage))]
+ return entries
def _real_extract(self, url):
group_id = self._match_id(url)