diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2016-04-17 17:07:57 +0200 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2016-04-17 17:07:57 +0200 |
commit | 1b6182d8f759546b54a675123901348361bad979 (patch) | |
tree | e3552f926d004d6bb22605cb2f34e05793540210 | |
parent | 7bab22a4029feb651d08f4b631359a0d1aaffd71 (diff) | |
download | youtube-dl-1b6182d8f759546b54a675123901348361bad979.zip youtube-dl-1b6182d8f759546b54a675123901348361bad979.tar.gz youtube-dl-1b6182d8f759546b54a675123901348361bad979.tar.bz2 |
[youtube:playlist] Fetch all the videos in a mix (fixes #3837)
Since there doesn't seem to be any indication, it stops when there aren't new videos in the webpage.
-rw-r--r-- | test/test_youtube_lists.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 28 |
2 files changed, 21 insertions, 9 deletions
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 47df0f3..af1c454 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -44,7 +44,7 @@ class TestYoutubeLists(unittest.TestCase): ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w') entries = result['entries'] - self.assertTrue(len(entries) >= 20) + self.assertTrue(len(entries) >= 50) original_video = entries[0] self.assertEqual(original_video['id'], 'OQpdSVF_k_w') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 44c1191..a4dd628 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1818,20 +1818,32 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): def _extract_mix(self, playlist_id): # The mixes are generated from a single video # the id of the playlist is just 'RD' + video_id - url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) - webpage = self._download_webpage( - url, playlist_id, 'Downloading Youtube mix') + ids = [] + last_id = playlist_id[-11:] + for n in itertools.count(1): + url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id) + webpage = self._download_webpage( + url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n)) + new_ids = orderedSet(re.findall( + r'''(?xs)data-video-username=".*?".*? + href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), + webpage)) + # Fetch new pages until all the videos are repeated, it seems that + # there are always 51 unique videos. + new_ids = [_id for _id in new_ids if _id not in ids] + if not new_ids: + break + ids.extend(new_ids) + last_id = ids[-1] + + url_results = self._ids_to_results(ids) + search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) title_span = ( search_title('playlist-title') or search_title('title long-title') or search_title('title')) title = clean_html(title_span) - ids = orderedSet(re.findall( - r'''(?xs)data-video-username=".*?".*? - href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), - webpage)) - url_results = self._ids_to_results(ids) return self.playlist_result(url_results, playlist_id, title) |