diff options
author | Rogério Brito <rbrito@ime.usp.br> | 2017-03-08 22:53:09 -0300 |
---|---|---|
committer | Rogério Brito <rbrito@ime.usp.br> | 2017-03-08 22:53:09 -0300 |
commit | 1d3fd83f473663fce3e0a10303473a38d80cc3d0 (patch) | |
tree | 80d9943427637e0e06a725c5791e9fc9368d7f98 /youtube_dl/extractor/crunchyroll.py | |
parent | 4e090bc3ceacc4e3cd464d12ea97700e3acad37d (diff) | |
download | youtube-dl-1d3fd83f473663fce3e0a10303473a38d80cc3d0.zip youtube-dl-1d3fd83f473663fce3e0a10303473a38d80cc3d0.tar.gz youtube-dl-1d3fd83f473663fce3e0a10303473a38d80cc3d0.tar.bz2 |
New upstream version 2017.03.07
Diffstat (limited to 'youtube_dl/extractor/crunchyroll.py')
-rw-r--r-- | youtube_dl/extractor/crunchyroll.py | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index a1fc6a7..9c6cf00 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -207,6 +207,21 @@ class CrunchyrollIE(CrunchyrollBaseIE): # Just test metadata extraction 'skip_download': True, }, + }, { + # make sure we can extract an uploader name that's not a link + 'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899', + 'info_dict': { + 'id': '606899', + 'ext': 'mp4', + 'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors', + 'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"', + 'uploader': 'Geneon Entertainment', + 'upload_date': '20120717', + }, + 'params': { + # just test metadata extraction + 'skip_download': True, + }, }] _FORMAT_IDS = { @@ -388,8 +403,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text if video_upload_date: video_upload_date = unified_strdate(video_upload_date) video_uploader = self._html_search_regex( - r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage, - 'video_uploader', fatal=False) + # try looking for both an uploader that's a link and one that's not + [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'], + webpage, 'video_uploader', fatal=False) available_fmts = [] for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): |