[youtube] Separate methods for embeds extraction

author: Sergey M․ <dstftw@gmail.com> 2017-09-06 00:48:37 +0700
committer: Sergey M․ <dstftw@gmail.com> 2017-09-06 00:48:37 +0700
commit: 66c9fa36c10860b380806b9de48f38d628289e03 (patch)
tree: 7d9b1991b5ae9082c3d0ea8e683c7cc7af627442
parent: c5c9bf0c120d2c481124a0c3913b981cf061fb95 (diff)
download: youtube-dl-66c9fa36c10860b380806b9de48f38d628289e03.zip
youtube-dl-66c9fa36c10860b380806b9de48f38d628289e03.tar.gz
youtube-dl-66c9fa36c10860b380806b9de48f38d628289e03.tar.bz2
2 files changed, 41 insertions, 29 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index c81efdc..b83c183 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -2243,36 +2243,11 @@ class GenericIE(InfoExtractor):
         if vid_me_embed_url is not None:
             return self.url_result(vid_me_embed_url, 'Vidme')
 
-        # Look for embedded YouTube player
-        matches = re.findall(r'''(?x)
-            (?:
-                <iframe[^>]+?src=|
-                data-video-url=|
-                <embed[^>]+?src=|
-                embedSWF\(?:\s*|
-                <object[^>]+data=|
-                new\s+SWFObject\(
-            )
-            (["\'])
-                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
-                (?:embed|v|p)/.+?)
-            \1''', webpage)
-        if matches:
+        # Look for YouTube embeds
+        youtube_urls = YoutubeIE._extract_urls(webpage)
+        if youtube_urls:
             return self.playlist_from_matches(
-                matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
-
-        # Look for lazyYT YouTube embed
-        matches = re.findall(
-            r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
-        if matches:
-            return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
-
-        # Look for Wordpress "YouTube Video Importer" plugin
-        matches = re.findall(r'''(?x)<div[^>]+
-            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
-            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
-        if matches:
-            return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
+                youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
 
         matches = DailymotionIE._extract_urls(webpage)
         if matches:
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 953e382..ad2e933 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1374,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             playback_url, video_id, 'Marking watched',
             'Unable to mark watched', fatal=False)
 
+    @staticmethod
+    def _extract_urls(webpage):
+        # Embedded YouTube player
+        entries = [
+            unescapeHTML(mobj.group('url'))
+            for mobj in re.finditer(r'''(?x)
+            (?:
+                <iframe[^>]+?src=|
+                data-video-url=|
+                <embed[^>]+?src=|
+                embedSWF\(?:\s*|
+                <object[^>]+data=|
+                new\s+SWFObject\(
+            )
+            (["\'])
+                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+                (?:embed|v|p)/.+?)
+            \1''', webpage)]
+
+        # lazyYT YouTube embed
+        entries.extend(list(map(
+            unescapeHTML,
+            re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
+
+        # Wordpress "YouTube Video Importer" plugin
+        matches = re.findall(r'''(?x)<div[^>]+
+            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
+        entries.extend(m[-1] for m in matches)
+
+        return entries
+
+    @staticmethod
+    def _extract_url(webpage):
+        urls = YoutubeIE._extract_urls(webpage)
+        return urls[0] if urls else None
+
     @classmethod
     def extract_id(cls, url):
         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
author	Sergey M․ <dstftw@gmail.com>	2017-09-06 00:48:37 +0700
committer	Sergey M․ <dstftw@gmail.com>	2017-09-06 00:48:37 +0700
commit	66c9fa36c10860b380806b9de48f38d628289e03 (patch)
tree	7d9b1991b5ae9082c3d0ea8e683c7cc7af627442
parent	c5c9bf0c120d2c481124a0c3913b981cf061fb95 (diff)
download	youtube-dl-66c9fa36c10860b380806b9de48f38d628289e03.zip youtube-dl-66c9fa36c10860b380806b9de48f38d628289e03.tar.gz youtube-dl-66c9fa36c10860b380806b9de48f38d628289e03.tar.bz2