aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2015-07-19 11:24:19 +0200
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2015-07-19 11:27:12 +0200
commit32470bf619d31605dc9c51ad107839a097f829f4 (patch)
treee62c08b669bf0bc6eea7263d1fa28ce0afd737f0
parent8b61bfd6389b62f054cdf9dcb3436395c82a8e28 (diff)
downloadyoutube-dl-32470bf619d31605dc9c51ad107839a097f829f4.zip
youtube-dl-32470bf619d31605dc9c51ad107839a097f829f4.tar.gz
youtube-dl-32470bf619d31605dc9c51ad107839a097f829f4.tar.bz2
[sportschau] Improve title extraction
The html '<title>' ends with '- sportschau.de', which shouldn't be part of the title.
-rw-r--r--youtube_dl/extractor/sportschau.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/youtube_dl/extractor/sportschau.py b/youtube_dl/extractor/sportschau.py
index 2879726..bf9b075 100644
--- a/youtube_dl/extractor/sportschau.py
+++ b/youtube_dl/extractor/sportschau.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import get_element_by_attribute
class SportschauIE(InfoExtractor):
@@ -12,7 +13,7 @@ class SportschauIE(InfoExtractor):
'info_dict': {
'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100',
'ext': 'mp4',
- 'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun" - Tour de France - sportschau.de',
+ 'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.',
},
@@ -34,7 +35,7 @@ class SportschauIE(InfoExtractor):
m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, ext="mp4")
webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
+ title = get_element_by_attribute('class', 'headline', webpage)
desc = self._html_search_meta('description', webpage)
return {