aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/youtube.py
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-09-06 00:48:37 +0700
committerSergey M․ <dstftw@gmail.com>2017-09-06 00:48:37 +0700
commit66c9fa36c10860b380806b9de48f38d628289e03 (patch)
tree7d9b1991b5ae9082c3d0ea8e683c7cc7af627442 /youtube_dl/extractor/youtube.py
parentc5c9bf0c120d2c481124a0c3913b981cf061fb95 (diff)
downloadyoutube-dl-66c9fa36c10860b380806b9de48f38d628289e03.zip
youtube-dl-66c9fa36c10860b380806b9de48f38d628289e03.tar.gz
youtube-dl-66c9fa36c10860b380806b9de48f38d628289e03.tar.bz2
[youtube] Separate methods for embeds extraction
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r--youtube_dl/extractor/youtube.py37
1 files changed, 37 insertions, 0 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 953e382..ad2e933 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1374,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
playback_url, video_id, 'Marking watched',
'Unable to mark watched', fatal=False)
+ @staticmethod
+ def _extract_urls(webpage):
+ # Embedded YouTube player
+ entries = [
+ unescapeHTML(mobj.group('url'))
+ for mobj in re.finditer(r'''(?x)
+ (?:
+ <iframe[^>]+?src=|
+ data-video-url=|
+ <embed[^>]+?src=|
+ embedSWF\(?:\s*|
+ <object[^>]+data=|
+ new\s+SWFObject\(
+ )
+ (["\'])
+ (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+ (?:embed|v|p)/.+?)
+ \1''', webpage)]
+
+ # lazyYT YouTube embed
+ entries.extend(list(map(
+ unescapeHTML,
+ re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
+
+ # Wordpress "YouTube Video Importer" plugin
+ matches = re.findall(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
+ entries.extend(m[-1] for m in matches)
+
+ return entries
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = YoutubeIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
@classmethod
def extract_id(cls, url):
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)