aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/soundcloud.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/soundcloud.py')
-rw-r--r--youtube_dl/extractor/soundcloud.py176
1 files changed, 112 insertions, 64 deletions
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 3f1a46b..1c6799d 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -1,8 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
import itertools
+import re
from .common import (
InfoExtractor,
@@ -17,6 +17,7 @@ from ..utils import (
ExtractorError,
int_or_none,
unified_strdate,
+ update_url_query,
)
@@ -31,6 +32,7 @@ class SoundcloudIE(InfoExtractor):
_VALID_URL = r'''(?x)^(?:https?://)?
(?:(?:(?:www\.|m\.)?soundcloud\.com/
+ (?!stations/track)
(?P<uploader>[\w\d-]+)/
(?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
(?P<title>[\w\d-]+)/?
@@ -119,9 +121,24 @@ class SoundcloudIE(InfoExtractor):
'license': 'cc-by-sa',
},
},
+ # private link, downloadable format
+ {
+ 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
+ 'md5': '64a60b16e617d41d0bef032b7f55441e',
+ 'info_dict': {
+ 'id': '340344461',
+ 'ext': 'wav',
+ 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
+ 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
+ 'uploader': 'Ori Uplift Music',
+ 'upload_date': '20170831',
+ 'duration': 7449,
+ 'license': 'all-rights-reserved',
+ },
+ },
]
- _CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z'
+ _CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@staticmethod
@@ -159,11 +176,13 @@ class SoundcloudIE(InfoExtractor):
'license': info.get('license'),
}
formats = []
+ query = {'client_id': self._CLIENT_ID}
+ if secret_token is not None:
+ query['secret_token'] = secret_token
if info.get('downloadable', False):
# We can build a direct link to the song
- format_url = (
- 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
- track_id, self._CLIENT_ID))
+ format_url = update_url_query(
+ 'https://api.soundcloud.com/tracks/%s/download' % track_id, query)
formats.append({
'format_id': 'download',
'ext': info.get('original_format', 'mp3'),
@@ -175,10 +194,7 @@ class SoundcloudIE(InfoExtractor):
# We have to retrieve the url
format_dict = self._download_json(
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
- track_id, 'Downloading track url', query={
- 'client_id': self._CLIENT_ID,
- 'secret_token': secret_token,
- })
+ track_id, 'Downloading track url', query=query)
for key, stream_url in format_dict.items():
abr = int_or_none(self._search_regex(
@@ -215,7 +231,7 @@ class SoundcloudIE(InfoExtractor):
# cannot be always used, sometimes it can give an HTTP 404 error
formats.append({
'format_id': 'fallback',
- 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
+ 'url': update_url_query(info['stream_url'], query),
'ext': ext,
})
@@ -330,7 +346,63 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
}
-class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
+class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
+ _API_BASE = 'https://api.soundcloud.com'
+ _API_V2_BASE = 'https://api-v2.soundcloud.com'
+
+ def _extract_playlist(self, base_url, playlist_id, playlist_title):
+ COMMON_QUERY = {
+ 'limit': 50,
+ 'client_id': self._CLIENT_ID,
+ 'linked_partitioning': '1',
+ }
+
+ query = COMMON_QUERY.copy()
+ query['offset'] = 0
+
+ next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
+
+ entries = []
+ for i in itertools.count():
+ response = self._download_json(
+ next_href, playlist_id, 'Downloading track page %s' % (i + 1))
+
+ collection = response['collection']
+ if not collection:
+ break
+
+ def resolve_permalink_url(candidates):
+ for cand in candidates:
+ if isinstance(cand, dict):
+ permalink_url = cand.get('permalink_url')
+ entry_id = self._extract_id(cand)
+ if permalink_url and permalink_url.startswith('http'):
+ return permalink_url, entry_id
+
+ for e in collection:
+ permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
+ if permalink_url:
+ entries.append(self.url_result(permalink_url, video_id=entry_id))
+
+ next_href = response.get('next_href')
+ if not next_href:
+ break
+
+ parsed_next_href = compat_urlparse.urlparse(response['next_href'])
+ qs = compat_urlparse.parse_qs(parsed_next_href.query)
+ qs.update(COMMON_QUERY)
+ next_href = compat_urlparse.urlunparse(
+ parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_title,
+ 'entries': entries,
+ }
+
+
+class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:(?:www|m)\.)?soundcloud\.com/
@@ -385,16 +457,13 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
'playlist_mincount': 1,
}]
- _API_BASE = 'https://api.soundcloud.com'
- _API_V2_BASE = 'https://api-v2.soundcloud.com'
-
_BASE_URL_MAP = {
- 'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE,
- 'tracks': '%s/users/%%s/tracks' % _API_BASE,
- 'sets': '%s/users/%%s/playlists' % _API_V2_BASE,
- 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE,
- 'likes': '%s/users/%%s/likes' % _API_V2_BASE,
- 'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE,
+ 'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE,
+ 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
}
_TITLE_MAP = {
@@ -416,57 +485,36 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
resolv_url, uploader, 'Downloading user info')
resource = mobj.group('rsrc') or 'all'
- base_url = self._BASE_URL_MAP[resource] % user['id']
- COMMON_QUERY = {
- 'limit': 50,
- 'client_id': self._CLIENT_ID,
- 'linked_partitioning': '1',
- }
+ return self._extract_playlist(
+ self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']),
+ '%s (%s)' % (user['username'], self._TITLE_MAP[resource]))
- query = COMMON_QUERY.copy()
- query['offset'] = 0
-
- next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
- entries = []
- for i in itertools.count():
- response = self._download_json(
- next_href, uploader, 'Downloading track page %s' % (i + 1))
-
- collection = response['collection']
- if not collection:
- break
-
- def resolve_permalink_url(candidates):
- for cand in candidates:
- if isinstance(cand, dict):
- permalink_url = cand.get('permalink_url')
- entry_id = self._extract_id(cand)
- if permalink_url and permalink_url.startswith('http'):
- return permalink_url, entry_id
+class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
+ IE_NAME = 'soundcloud:trackstation'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
+ 'info_dict': {
+ 'id': '286017854',
+ 'title': 'Track station: your-text',
+ },
+ 'playlist_mincount': 47,
+ }]
- for e in collection:
- permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
- if permalink_url:
- entries.append(self.url_result(permalink_url, video_id=entry_id))
+ def _real_extract(self, url):
+ track_name = self._match_id(url)
- next_href = response.get('next_href')
- if not next_href:
- break
+ webpage = self._download_webpage(url, track_name)
- parsed_next_href = compat_urlparse.urlparse(response['next_href'])
- qs = compat_urlparse.parse_qs(parsed_next_href.query)
- qs.update(COMMON_QUERY)
- next_href = compat_urlparse.urlunparse(
- parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
+ track_id = self._search_regex(
+ r'soundcloud:track-stations:(\d+)', webpage, 'track id')
- return {
- '_type': 'playlist',
- 'id': compat_str(user['id']),
- 'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]),
- 'entries': entries,
- }
+ return self._extract_playlist(
+ '%s/stations/soundcloud:track-stations:%s/tracks'
+ % (self._API_V2_BASE, track_id),
+ track_id, 'Track station: %s' % track_name)
class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):