aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py11
-rw-r--r--youtube_dl/__init__.py10
-rw-r--r--youtube_dl/compat.py21
-rw-r--r--youtube_dl/downloader/external.py14
-rw-r--r--youtube_dl/downloader/fragment.py1
-rw-r--r--youtube_dl/downloader/hls.py3
-rw-r--r--youtube_dl/extractor/abcnews.py4
-rw-r--r--youtube_dl/extractor/abcotvs.py2
-rw-r--r--youtube_dl/extractor/acast.py26
-rw-r--r--youtube_dl/extractor/adobetv.py2
-rw-r--r--youtube_dl/extractor/aenetworks.py14
-rw-r--r--youtube_dl/extractor/afreecatv.py105
-rw-r--r--youtube_dl/extractor/airmozilla.py2
-rw-r--r--youtube_dl/extractor/allocine.py8
-rw-r--r--youtube_dl/extractor/alphaporno.py2
-rw-r--r--youtube_dl/extractor/aol.py31
-rw-r--r--youtube_dl/extractor/ard.py2
-rw-r--r--youtube_dl/extractor/arkena.py21
-rw-r--r--youtube_dl/extractor/atresplayer.py4
-rw-r--r--youtube_dl/extractor/atttechchannel.py2
-rw-r--r--youtube_dl/extractor/audioboom.py2
-rw-r--r--youtube_dl/extractor/azmedien.py172
-rw-r--r--youtube_dl/extractor/azubu.py4
-rw-r--r--youtube_dl/extractor/bandcamp.py19
-rw-r--r--youtube_dl/extractor/beampro.py73
-rw-r--r--youtube_dl/extractor/bet.py4
-rw-r--r--youtube_dl/extractor/bild.py2
-rw-r--r--youtube_dl/extractor/bilibili.py141
-rw-r--r--youtube_dl/extractor/biobiochiletv.py4
-rw-r--r--youtube_dl/extractor/bloomberg.py3
-rw-r--r--youtube_dl/extractor/breakcom.py125
-rw-r--r--youtube_dl/extractor/brightcove.py19
-rw-r--r--youtube_dl/extractor/byutv.py2
-rw-r--r--youtube_dl/extractor/camdemy.py4
-rw-r--r--youtube_dl/extractor/canalplus.py20
-rw-r--r--youtube_dl/extractor/canvas.py9
-rw-r--r--youtube_dl/extractor/carambatv.py4
-rw-r--r--youtube_dl/extractor/cbc.py64
-rw-r--r--youtube_dl/extractor/cbsnews.py2
-rw-r--r--youtube_dl/extractor/ccc.py4
-rw-r--r--youtube_dl/extractor/ccma.py99
-rw-r--r--youtube_dl/extractor/cctv.py196
-rwxr-xr-xyoutube_dl/extractor/cda.py4
-rw-r--r--youtube_dl/extractor/ceskatelevize.py8
-rw-r--r--youtube_dl/extractor/channel9.py6
-rw-r--r--youtube_dl/extractor/charlierose.py2
-rw-r--r--youtube_dl/extractor/chaturbate.py21
-rw-r--r--youtube_dl/extractor/chirbit.py5
-rw-r--r--youtube_dl/extractor/cliphunter.py4
-rw-r--r--youtube_dl/extractor/clipsyndicate.py2
-rw-r--r--youtube_dl/extractor/clubic.py2
-rw-r--r--youtube_dl/extractor/cmt.py31
-rw-r--r--youtube_dl/extractor/collegerama.py2
-rw-r--r--youtube_dl/extractor/comedycentral.py18
-rw-r--r--youtube_dl/extractor/common.py145
-rw-r--r--youtube_dl/extractor/coub.py2
-rw-r--r--youtube_dl/extractor/crackle.py55
-rw-r--r--youtube_dl/extractor/criterion.py2
-rw-r--r--youtube_dl/extractor/crooksandliars.py2
-rw-r--r--youtube_dl/extractor/crunchyroll.py45
-rw-r--r--youtube_dl/extractor/cspan.py29
-rw-r--r--youtube_dl/extractor/ctsnews.py4
-rw-r--r--youtube_dl/extractor/ctvnews.py5
-rw-r--r--youtube_dl/extractor/cultureunplugged.py2
-rw-r--r--youtube_dl/extractor/dailymotion.py2
-rw-r--r--youtube_dl/extractor/daum.py8
-rw-r--r--youtube_dl/extractor/dbtv.py2
-rw-r--r--youtube_dl/extractor/dctp.py2
-rw-r--r--youtube_dl/extractor/deezer.py2
-rw-r--r--youtube_dl/extractor/dhm.py4
-rw-r--r--youtube_dl/extractor/digiteka.py4
-rw-r--r--youtube_dl/extractor/discoverygo.py3
-rw-r--r--youtube_dl/extractor/disney.py115
-rw-r--r--youtube_dl/extractor/douyutv.py18
-rw-r--r--youtube_dl/extractor/dplay.py11
-rw-r--r--youtube_dl/extractor/dramafever.py11
-rw-r--r--youtube_dl/extractor/drbonanza.py4
-rw-r--r--youtube_dl/extractor/dreisat.py170
-rw-r--r--youtube_dl/extractor/drtuber.py2
-rw-r--r--youtube_dl/extractor/drtv.py74
-rw-r--r--youtube_dl/extractor/dumpert.py2
-rw-r--r--youtube_dl/extractor/eagleplatform.py4
-rw-r--r--youtube_dl/extractor/egghead.py39
-rw-r--r--youtube_dl/extractor/einthusan.py4
-rw-r--r--youtube_dl/extractor/elpais.py23
-rw-r--r--youtube_dl/extractor/eroprofile.py4
-rw-r--r--youtube_dl/extractor/escapist.py4
-rw-r--r--youtube_dl/extractor/esri.py2
-rw-r--r--youtube_dl/extractor/europa.py2
-rw-r--r--youtube_dl/extractor/expotv.py2
-rw-r--r--youtube_dl/extractor/extractors.py79
-rw-r--r--youtube_dl/extractor/facebook.py53
-rw-r--r--youtube_dl/extractor/fc2.py2
-rw-r--r--youtube_dl/extractor/filmon.py178
-rw-r--r--youtube_dl/extractor/firsttv.py134
-rw-r--r--youtube_dl/extractor/fivetv.py6
-rw-r--r--youtube_dl/extractor/fktv.py2
-rw-r--r--youtube_dl/extractor/flipagram.py2
-rw-r--r--youtube_dl/extractor/foxgay.py2
-rw-r--r--youtube_dl/extractor/foxnews.py6
-rw-r--r--youtube_dl/extractor/franceculture.py2
-rw-r--r--youtube_dl/extractor/francetv.py4
-rw-r--r--youtube_dl/extractor/freesound.py66
-rw-r--r--youtube_dl/extractor/freevideo.py38
-rw-r--r--youtube_dl/extractor/funimation.py6
-rw-r--r--youtube_dl/extractor/funnyordie.py4
-rw-r--r--youtube_dl/extractor/fusion.py2
-rw-r--r--youtube_dl/extractor/gamersyde.py2
-rw-r--r--youtube_dl/extractor/gamespot.py2
-rw-r--r--youtube_dl/extractor/gamestar.py2
-rw-r--r--youtube_dl/extractor/gaskrank.py123
-rw-r--r--youtube_dl/extractor/gazeta.py2
-rw-r--r--youtube_dl/extractor/generic.py225
-rw-r--r--youtube_dl/extractor/giantbomb.py2
-rw-r--r--youtube_dl/extractor/giga.py2
-rw-r--r--youtube_dl/extractor/glide.py2
-rw-r--r--youtube_dl/extractor/go.py5
-rw-r--r--youtube_dl/extractor/godtube.py2
-rw-r--r--youtube_dl/extractor/googledrive.py9
-rw-r--r--youtube_dl/extractor/goshgay.py2
-rw-r--r--youtube_dl/extractor/hbo.py4
-rw-r--r--youtube_dl/extractor/hearthisat.py4
-rw-r--r--youtube_dl/extractor/heise.py2
-rw-r--r--youtube_dl/extractor/hellporno.py2
-rw-r--r--youtube_dl/extractor/historicfilms.py2
-rw-r--r--youtube_dl/extractor/hitbox.py4
-rw-r--r--youtube_dl/extractor/hitrecord.py68
-rw-r--r--youtube_dl/extractor/hornbunny.py2
-rw-r--r--youtube_dl/extractor/howstuffworks.py6
-rw-r--r--youtube_dl/extractor/huajiao.py2
-rw-r--r--youtube_dl/extractor/huffpost.py2
-rw-r--r--youtube_dl/extractor/imdb.py5
-rw-r--r--youtube_dl/extractor/inc.py41
-rw-r--r--youtube_dl/extractor/indavideo.py4
-rw-r--r--youtube_dl/extractor/infoq.py63
-rw-r--r--youtube_dl/extractor/instagram.py6
-rw-r--r--youtube_dl/extractor/iprima.py2
-rw-r--r--youtube_dl/extractor/ir90tv.py2
-rw-r--r--youtube_dl/extractor/itv.py196
-rw-r--r--youtube_dl/extractor/ivi.py6
-rw-r--r--youtube_dl/extractor/iwara.py42
-rw-r--r--youtube_dl/extractor/izlesene.py4
-rw-r--r--youtube_dl/extractor/jamendo.py77
-rw-r--r--youtube_dl/extractor/jove.py4
-rw-r--r--youtube_dl/extractor/jwplatform.py15
-rw-r--r--youtube_dl/extractor/kaltura.py10
-rw-r--r--youtube_dl/extractor/karrierevideos.py4
-rw-r--r--youtube_dl/extractor/keezmovies.py2
-rw-r--r--youtube_dl/extractor/ketnet.py2
-rw-r--r--youtube_dl/extractor/konserthusetplay.py36
-rw-r--r--youtube_dl/extractor/krasview.py2
-rw-r--r--youtube_dl/extractor/kusi.py2
-rw-r--r--youtube_dl/extractor/laola1tv.py183
-rw-r--r--youtube_dl/extractor/leeco.py4
-rw-r--r--youtube_dl/extractor/lemonde.py2
-rw-r--r--youtube_dl/extractor/libraryofcongress.py2
-rw-r--r--youtube_dl/extractor/libsyn.py2
-rw-r--r--youtube_dl/extractor/lifenews.py2
-rw-r--r--youtube_dl/extractor/limelight.py32
-rw-r--r--youtube_dl/extractor/litv.py4
-rw-r--r--youtube_dl/extractor/liveleak.py6
-rw-r--r--youtube_dl/extractor/livestream.py2
-rw-r--r--youtube_dl/extractor/lnkgo.py4
-rw-r--r--youtube_dl/extractor/lynda.py2
-rw-r--r--youtube_dl/extractor/matchtv.py2
-rw-r--r--youtube_dl/extractor/mdr.py2
-rw-r--r--youtube_dl/extractor/meipai.py104
-rw-r--r--youtube_dl/extractor/melonvod.py72
-rw-r--r--youtube_dl/extractor/metacafe.py2
-rw-r--r--youtube_dl/extractor/mgoon.py2
-rw-r--r--youtube_dl/extractor/mgtv.py2
-rw-r--r--youtube_dl/extractor/minhateca.py2
-rw-r--r--youtube_dl/extractor/ministrygrid.py2
-rw-r--r--youtube_dl/extractor/mitele.py6
-rw-r--r--youtube_dl/extractor/mixcloud.py29
-rw-r--r--youtube_dl/extractor/mlb.py8
-rw-r--r--youtube_dl/extractor/mnet.py2
-rw-r--r--youtube_dl/extractor/moevideo.py4
-rw-r--r--youtube_dl/extractor/mofosex.py2
-rw-r--r--youtube_dl/extractor/mojvideo.py2
-rw-r--r--youtube_dl/extractor/motherless.py6
-rw-r--r--youtube_dl/extractor/movieclips.py2
-rw-r--r--youtube_dl/extractor/moviezine.py2
-rw-r--r--youtube_dl/extractor/movingimage.py2
-rw-r--r--youtube_dl/extractor/msn.py5
-rw-r--r--youtube_dl/extractor/mtv.py136
-rw-r--r--youtube_dl/extractor/muenchentv.py2
-rw-r--r--youtube_dl/extractor/mwave.py4
-rw-r--r--youtube_dl/extractor/myspace.py108
-rw-r--r--youtube_dl/extractor/myvi.py2
-rw-r--r--youtube_dl/extractor/myvideo.py2
-rw-r--r--youtube_dl/extractor/naver.py9
-rw-r--r--youtube_dl/extractor/nbc.py45
-rw-r--r--youtube_dl/extractor/ndr.py4
-rw-r--r--youtube_dl/extractor/ndtv.py2
-rw-r--r--youtube_dl/extractor/netzkino.py2
-rw-r--r--youtube_dl/extractor/nextmedia.py79
-rw-r--r--youtube_dl/extractor/nfl.py4
-rw-r--r--youtube_dl/extractor/nick.py5
-rw-r--r--youtube_dl/extractor/niconico.py43
-rw-r--r--youtube_dl/extractor/nosvideo.py4
-rw-r--r--youtube_dl/extractor/nova.py10
-rw-r--r--youtube_dl/extractor/novamov.py12
-rw-r--r--youtube_dl/extractor/nowness.py6
-rw-r--r--youtube_dl/extractor/nowtv.py12
-rw-r--r--youtube_dl/extractor/noz.py2
-rw-r--r--youtube_dl/extractor/npo.py2
-rw-r--r--youtube_dl/extractor/nrk.py252
-rw-r--r--youtube_dl/extractor/ntvde.py2
-rw-r--r--youtube_dl/extractor/ntvru.py10
-rw-r--r--youtube_dl/extractor/oktoberfesttv.py2
-rw-r--r--youtube_dl/extractor/ondemandkorea.py60
-rw-r--r--youtube_dl/extractor/onionstudios.py2
-rw-r--r--youtube_dl/extractor/ooyala.py14
-rw-r--r--youtube_dl/extractor/openload.py104
-rw-r--r--youtube_dl/extractor/orf.py2
-rw-r--r--youtube_dl/extractor/pandoratv.py19
-rw-r--r--youtube_dl/extractor/pbs.py28
-rw-r--r--youtube_dl/extractor/people.py2
-rw-r--r--youtube_dl/extractor/phoenix.py4
-rw-r--r--youtube_dl/extractor/piksel.py123
-rw-r--r--youtube_dl/extractor/pinkbike.py2
-rw-r--r--youtube_dl/extractor/pladform.py2
-rw-r--r--youtube_dl/extractor/playtvak.py10
-rw-r--r--youtube_dl/extractor/playvid.py2
-rw-r--r--youtube_dl/extractor/playwire.py2
-rw-r--r--youtube_dl/extractor/pluralsight.py9
-rw-r--r--youtube_dl/extractor/polskieradio.py2
-rw-r--r--youtube_dl/extractor/porncom.py2
-rw-r--r--youtube_dl/extractor/pornflip.py92
-rw-r--r--youtube_dl/extractor/pornhd.py4
-rw-r--r--youtube_dl/extractor/pornhub.py24
-rw-r--r--youtube_dl/extractor/pornotube.py2
-rw-r--r--youtube_dl/extractor/pornovoisines.py2
-rw-r--r--youtube_dl/extractor/pornoxo.py2
-rw-r--r--youtube_dl/extractor/presstv.py2
-rw-r--r--youtube_dl/extractor/promptfile.py2
-rw-r--r--youtube_dl/extractor/prosiebensat1.py22
-rw-r--r--youtube_dl/extractor/qqmusic.py6
-rw-r--r--youtube_dl/extractor/r7.py2
-rw-r--r--youtube_dl/extractor/radiobremen.py2
-rw-r--r--youtube_dl/extractor/radiocanada.py14
-rw-r--r--youtube_dl/extractor/radiode.py2
-rw-r--r--youtube_dl/extractor/radiojavan.py2
-rw-r--r--youtube_dl/extractor/rai.py6
-rw-r--r--youtube_dl/extractor/rbmaradio.py2
-rw-r--r--youtube_dl/extractor/reuters.py2
-rw-r--r--youtube_dl/extractor/reverbnation.py2
-rw-r--r--youtube_dl/extractor/ro220.py2
-rw-r--r--youtube_dl/extractor/rockstargames.py2
-rw-r--r--youtube_dl/extractor/roosterteeth.py2
-rw-r--r--youtube_dl/extractor/rottentomatoes.py2
-rw-r--r--youtube_dl/extractor/rte.py181
-rw-r--r--youtube_dl/extractor/rtl2.py57
-rw-r--r--youtube_dl/extractor/rtlnl.py4
-rw-r--r--youtube_dl/extractor/rtp.py2
-rw-r--r--youtube_dl/extractor/rts.py159
-rw-r--r--youtube_dl/extractor/rtve.py5
-rw-r--r--youtube_dl/extractor/rtvnh.py2
-rw-r--r--youtube_dl/extractor/rudo.py2
-rw-r--r--youtube_dl/extractor/ruhd.py2
-rw-r--r--youtube_dl/extractor/ruutu.py9
-rw-r--r--youtube_dl/extractor/savefrom.py2
-rw-r--r--youtube_dl/extractor/sbs.py2
-rw-r--r--youtube_dl/extractor/screencast.py8
-rw-r--r--youtube_dl/extractor/screencastomatic.py2
-rw-r--r--youtube_dl/extractor/screenjunkies.py138
-rw-r--r--youtube_dl/extractor/senateisvp.py2
-rw-r--r--youtube_dl/extractor/sendtonews.py38
-rw-r--r--youtube_dl/extractor/sexu.py2
-rw-r--r--youtube_dl/extractor/sharesix.py91
-rw-r--r--youtube_dl/extractor/showroomlive.py84
-rw-r--r--youtube_dl/extractor/skysports.py11
-rw-r--r--youtube_dl/extractor/slutload.py2
-rw-r--r--youtube_dl/extractor/smotri.py4
-rw-r--r--youtube_dl/extractor/snotr.py4
-rw-r--r--youtube_dl/extractor/soundcloud.py60
-rw-r--r--youtube_dl/extractor/soundgasm.py2
-rw-r--r--youtube_dl/extractor/southpark.py4
-rw-r--r--youtube_dl/extractor/spankbang.py2
-rw-r--r--youtube_dl/extractor/spankwire.py2
-rw-r--r--youtube_dl/extractor/spiegeltv.py2
-rw-r--r--youtube_dl/extractor/spike.py2
-rw-r--r--youtube_dl/extractor/sport5.py2
-rw-r--r--youtube_dl/extractor/sportbox.py62
-rw-r--r--youtube_dl/extractor/sportdeutschland.py6
-rw-r--r--youtube_dl/extractor/srgssr.py38
-rw-r--r--youtube_dl/extractor/srmediathek.py2
-rw-r--r--youtube_dl/extractor/stanfordoc.py4
-rw-r--r--youtube_dl/extractor/stitcher.py4
-rw-r--r--youtube_dl/extractor/streamable.py4
-rw-r--r--youtube_dl/extractor/streetvoice.py2
-rw-r--r--youtube_dl/extractor/sunporno.py2
-rw-r--r--youtube_dl/extractor/svt.py2
-rw-r--r--youtube_dl/extractor/swrmediathek.py89
-rw-r--r--youtube_dl/extractor/tagesschau.py12
-rw-r--r--youtube_dl/extractor/tass.py2
-rw-r--r--youtube_dl/extractor/tdslifeway.py2
-rw-r--r--youtube_dl/extractor/teachertube.py4
-rw-r--r--youtube_dl/extractor/ted.py4
-rw-r--r--youtube_dl/extractor/telebruxelles.py45
-rw-r--r--youtube_dl/extractor/telegraaf.py2
-rw-r--r--youtube_dl/extractor/telemb.py4
-rw-r--r--youtube_dl/extractor/telewebion.py2
-rw-r--r--youtube_dl/extractor/theplatform.py8
-rw-r--r--youtube_dl/extractor/thisamericanlife.py2
-rw-r--r--youtube_dl/extractor/thisoldhouse.py7
-rw-r--r--youtube_dl/extractor/tinypic.py2
-rw-r--r--youtube_dl/extractor/tnaflix.py14
-rw-r--r--youtube_dl/extractor/tudou.py4
-rw-r--r--youtube_dl/extractor/tumblr.py8
-rw-r--r--youtube_dl/extractor/tunein.py107
-rw-r--r--youtube_dl/extractor/turbo.py2
-rw-r--r--youtube_dl/extractor/turner.py8
-rw-r--r--youtube_dl/extractor/tv2.py2
-rw-r--r--youtube_dl/extractor/tv4.py53
-rw-r--r--youtube_dl/extractor/tva.py54
-rw-r--r--youtube_dl/extractor/tvc.py8
-rw-r--r--youtube_dl/extractor/tweakers.py2
-rw-r--r--youtube_dl/extractor/twentyfourvideo.py10
-rw-r--r--youtube_dl/extractor/twentymin.py109
-rw-r--r--youtube_dl/extractor/twitch.py127
-rw-r--r--youtube_dl/extractor/twitter.py12
-rw-r--r--youtube_dl/extractor/udn.py2
-rw-r--r--youtube_dl/extractor/uktvplay.py33
-rw-r--r--youtube_dl/extractor/uol.py27
-rw-r--r--youtube_dl/extractor/uplynk.py4
-rw-r--r--youtube_dl/extractor/urort.py2
-rw-r--r--youtube_dl/extractor/ustream.py7
-rw-r--r--youtube_dl/extractor/ustudio.py2
-rw-r--r--youtube_dl/extractor/varzesh3.py4
-rw-r--r--youtube_dl/extractor/vbox7.py78
-rw-r--r--youtube_dl/extractor/vessel.py2
-rw-r--r--youtube_dl/extractor/vevo.py269
-rw-r--r--youtube_dl/extractor/vgtv.py10
-rw-r--r--youtube_dl/extractor/vidbit.py2
-rw-r--r--youtube_dl/extractor/viddler.py2
-rw-r--r--youtube_dl/extractor/videa.py97
-rw-r--r--youtube_dl/extractor/videomega.py2
-rw-r--r--youtube_dl/extractor/videomore.py10
-rw-r--r--youtube_dl/extractor/videopress.py99
-rw-r--r--youtube_dl/extractor/videott.py65
-rw-r--r--youtube_dl/extractor/vidio.py2
-rw-r--r--youtube_dl/extractor/vidme.py10
-rw-r--r--youtube_dl/extractor/viewlift.py8
-rw-r--r--youtube_dl/extractor/viewster.py2
-rw-r--r--youtube_dl/extractor/viidea.py12
-rw-r--r--youtube_dl/extractor/vimeo.py146
-rw-r--r--youtube_dl/extractor/vimple.py2
-rw-r--r--youtube_dl/extractor/vine.py107
-rw-r--r--youtube_dl/extractor/viu.py249
-rw-r--r--youtube_dl/extractor/vk.py42
-rw-r--r--youtube_dl/extractor/vlive.py159
-rw-r--r--youtube_dl/extractor/vodlocker.py2
-rw-r--r--youtube_dl/extractor/voicerepublic.py2
-rw-r--r--youtube_dl/extractor/vporn.py12
-rw-r--r--youtube_dl/extractor/vube.py8
-rw-r--r--youtube_dl/extractor/vvvvid.py140
-rw-r--r--youtube_dl/extractor/walla.py2
-rw-r--r--youtube_dl/extractor/watchindianporn.py2
-rw-r--r--youtube_dl/extractor/webcaster.py2
-rw-r--r--youtube_dl/extractor/webofstories.py8
-rw-r--r--youtube_dl/extractor/weiqitv.py4
-rw-r--r--youtube_dl/extractor/xbef.py2
-rw-r--r--youtube_dl/extractor/xfileshare.py6
-rw-r--r--youtube_dl/extractor/xhamster.py14
-rw-r--r--youtube_dl/extractor/xiami.py53
-rw-r--r--youtube_dl/extractor/xuite.py6
-rw-r--r--youtube_dl/extractor/yesjapan.py2
-rw-r--r--youtube_dl/extractor/yinyuetai.py2
-rw-r--r--youtube_dl/extractor/ynet.py4
-rw-r--r--youtube_dl/extractor/youporn.py4
-rw-r--r--youtube_dl/extractor/yourupload.py49
-rw-r--r--youtube_dl/extractor/youtube.py184
-rw-r--r--youtube_dl/extractor/zapiks.py2
-rw-r--r--youtube_dl/extractor/zdf.py466
-rw-r--r--youtube_dl/extractor/zingmp3.py2
-rw-r--r--youtube_dl/jsinterp.py2
-rw-r--r--youtube_dl/options.py66
-rw-r--r--youtube_dl/postprocessor/metadatafromtitle.py2
-rw-r--r--youtube_dl/socks.py6
-rw-r--r--youtube_dl/utils.py54
-rw-r--r--youtube_dl/version.py2
383 files changed, 7041 insertions, 2860 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 53f20ac..a7bf5a1 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -24,6 +24,7 @@ import sys
import time
import tokenize
import traceback
+import random
from .compat import (
compat_basestring,
@@ -159,6 +160,7 @@ class YoutubeDL(object):
playlistend: Playlist item to end at.
playlist_items: Specific indices of playlist to download.
playlistreverse: Download playlist items in reverse order.
+ playlistrandom: Download playlist items in random order.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logger: Log messages to a logging.Logger instance.
@@ -584,7 +586,7 @@ class YoutubeDL(object):
if autonumber_size is None:
autonumber_size = 5
autonumber_templ = '%0' + str(autonumber_size) + 'd'
- template_dict['autonumber'] = autonumber_templ % self._num_downloads
+ template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
if template_dict.get('playlist_index') is not None:
template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
if template_dict.get('resolution') is None:
@@ -842,6 +844,9 @@ class YoutubeDL(object):
if self.params.get('playlistreverse', False):
entries = entries[::-1]
+ if self.params.get('playlistrandom', False):
+ random.shuffle(entries)
+
for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
extra = {
@@ -1339,7 +1344,7 @@ class YoutubeDL(object):
format['format_id'] = compat_str(i)
else:
# Sanitize format_id from characters used in format selector expression
- format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
+ format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
format_id = format['format_id']
if format_id not in formats_dict:
formats_dict[format_id] = []
@@ -1363,7 +1368,7 @@ class YoutubeDL(object):
format['ext'] = determine_ext(format['url']).lower()
# Automatically determine protocol if missing (useful for format
# selection purposes)
- if 'protocol' not in format:
+ if format.get('protocol') is None:
format['protocol'] = determine_protocol(format)
# Add HTTP headers, so that external programs can use them from the
# json output
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 6850d95..5c5b809 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -133,6 +133,12 @@ def _real_main(argv=None):
parser.error('TV Provider account username missing\n')
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
parser.error('using output template conflicts with using title, video ID or auto number')
+ if opts.autonumber_size is not None:
+ if opts.autonumber_size <= 0:
+ parser.error('auto number size must be positive')
+ if opts.autonumber_start is not None:
+ if opts.autonumber_start < 0:
+ parser.error('auto number start must be positive or 0')
if opts.usetitle and opts.useid:
parser.error('using title conflicts with using video ID')
if opts.username is not None and opts.password is None:
@@ -321,6 +327,7 @@ def _real_main(argv=None):
'listformats': opts.listformats,
'outtmpl': outtmpl,
'autonumber_size': opts.autonumber_size,
+ 'autonumber_start': opts.autonumber_start,
'restrictfilenames': opts.restrictfilenames,
'ignoreerrors': opts.ignoreerrors,
'force_generic_extractor': opts.force_generic_extractor,
@@ -337,6 +344,7 @@ def _real_main(argv=None):
'playliststart': opts.playliststart,
'playlistend': opts.playlistend,
'playlistreverse': opts.playlist_reverse,
+ 'playlistrandom': opts.playlist_random,
'noplaylist': opts.noplaylist,
'logtostderr': opts.outtmpl == '-',
'consoletitle': opts.consoletitle,
@@ -405,7 +413,7 @@ def _real_main(argv=None):
'postprocessor_args': postprocessor_args,
'cn_verification_proxy': opts.cn_verification_proxy,
'geo_verification_proxy': opts.geo_verification_proxy,
-
+ 'config_location': opts.config_location,
}
with YoutubeDL(ydl_opts) as ydl:
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 83ee7e2..7189020 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2344,7 +2344,7 @@ try:
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
except ImportError: # Python 2
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
- else re.compile('([\x00-\x7f]+)'))
+ else re.compile(r'([\x00-\x7f]+)'))
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
# implementations from cpython 3.4.3's stdlib. Python 2's version
@@ -2529,6 +2529,24 @@ else:
el.text = el.text.decode('utf-8')
return doc
+if hasattr(etree, 'register_namespace'):
+ compat_etree_register_namespace = etree.register_namespace
+else:
+ def compat_etree_register_namespace(prefix, uri):
+ """Register a namespace prefix.
+ The registry is global, and any existing mapping for either the
+ given prefix or the namespace URI will be removed.
+ *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
+ attributes in this namespace will be serialized with prefix if possible.
+ ValueError is raised if prefix is reserved or is invalid.
+ """
+ if re.match(r"ns\d+$", prefix):
+ raise ValueError("Prefix format reserved for internal use")
+ for k, v in list(etree._namespace_map.items()):
+ if k == uri or v == prefix:
+ del etree._namespace_map[k]
+ etree._namespace_map[uri] = prefix
+
if sys.version_info < (2, 7):
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
# .//node does not match if a node is a direct child of . !
@@ -2865,6 +2883,7 @@ __all__ = [
'compat_cookiejar',
'compat_cookies',
'compat_etree_fromstring',
+ 'compat_etree_register_namespace',
'compat_expanduser',
'compat_get_terminal_size',
'compat_getenv',
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 5d3e5d8..41e3726 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -17,6 +17,7 @@ from ..utils import (
encodeArgument,
handle_youtubedl_headers,
check_executable,
+ is_outdated_version,
)
@@ -198,6 +199,15 @@ class FFmpegFD(ExternalFD):
args = [ffpp.executable, '-y']
+ seekable = info_dict.get('_seekable')
+ if seekable is not None:
+ # setting -seekable prevents ffmpeg from guessing if the server
+ # supports seeking(by adding the header `Range: bytes=0-`), which
+ # can cause problems in some cases
+ # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127
+ # http://trac.ffmpeg.org/ticket/6125#comment:10
+ args += ['-seekable', '1' if seekable else '0']
+
args += self._configuration_args()
# start_time = info_dict.get('start_time') or 0
@@ -264,7 +274,9 @@ class FFmpegFD(ExternalFD):
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
args += ['-f', 'mpegts']
else:
- args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
+ args += ['-f', 'mp4']
+ if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
+ args += ['-bsf:a', 'aac_adtstoasc']
elif protocol == 'rtmp':
args += ['-f', 'flv']
else:
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index 60df627..56f9752 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -61,6 +61,7 @@ class FragmentFD(FileDownloader):
'noprogress': True,
'ratelimit': self.params.get('ratelimit'),
'retries': self.params.get('retries', 0),
+ 'nopart': self.params.get('nopart', False),
'test': self.params.get('test', False),
}
)
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index 7373ec0..4989abc 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -65,6 +65,9 @@ class HlsFD(FragmentFD):
s = manifest.decode('utf-8', 'ignore')
if not self.can_download(s, info_dict):
+ if info_dict.get('extra_param_to_segment_url'):
+ self.report_error('pycrypto not found. Please install it.')
+ return False
self.report_warning(
'hlsnative has detected features it does not support, '
'extraction will be delegated to ffmpeg')
diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py
index 6ae5d9a..4f56c4c 100644
--- a/youtube_dl/extractor/abcnews.py
+++ b/youtube_dl/extractor/abcnews.py
@@ -23,7 +23,7 @@ class AbcNewsVideoIE(AMPIE):
'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
'duration': 180,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
# m3u8 download
@@ -59,7 +59,7 @@ class AbcNewsIE(InfoExtractor):
'display_id': 'dramatic-video-rare-death-job-america',
'title': 'Occupational Hazards',
'description': 'Nightline investigates the dangers that lurk at various jobs.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20100428',
'timestamp': 1272412800,
},
diff --git a/youtube_dl/extractor/abcotvs.py b/youtube_dl/extractor/abcotvs.py
index 054bb05..76e9813 100644
--- a/youtube_dl/extractor/abcotvs.py
+++ b/youtube_dl/extractor/abcotvs.py
@@ -23,7 +23,7 @@ class ABCOTVSIE(InfoExtractor):
'ext': 'mp4',
'title': 'East Bay museum celebrates vintage synthesizers',
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421123075,
'upload_date': '20150113',
'uploader': 'Jonathan Bloom',
diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py
index 94ce88c..6dace30 100644
--- a/youtube_dl/extractor/acast.py
+++ b/youtube_dl/extractor/acast.py
@@ -8,6 +8,7 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
+ parse_iso8601,
OnDemandPagedList,
)
@@ -15,18 +16,33 @@ from ..utils import (
class ACastIE(InfoExtractor):
IE_NAME = 'acast'
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
- _TEST = {
+ _TESTS = [{
+ # test with one bling
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
'md5': 'ada3de5a1e3a2a381327d749854788bb',
'info_dict': {
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
'ext': 'mp3',
'title': '"Where Are You?": Taipei 101, Taiwan',
- 'timestamp': 1196172000000,
+ 'timestamp': 1196172000,
+ 'upload_date': '20071127',
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
'duration': 211,
}
- }
+ }, {
+ # test with multiple blings
+ 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
+ 'md5': '55c0097badd7095f494c99a172f86501',
+ 'info_dict': {
+ 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
+ 'ext': 'mp3',
+ 'title': '2. Raggarmordet - Röster ur det förflutna',
+ 'timestamp': 1477346700,
+ 'upload_date': '20161024',
+ 'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
+ 'duration': 2797,
+ }
+ }]
def _real_extract(self, url):
channel, display_id = re.match(self._VALID_URL, url).groups()
@@ -35,11 +51,11 @@ class ACastIE(InfoExtractor):
return {
'id': compat_str(cast_data['id']),
'display_id': display_id,
- 'url': cast_data['blings'][0]['audio'],
+ 'url': [b['audio'] for b in cast_data['blings'] if b['type'] == 'BlingAudio'][0],
'title': cast_data['name'],
'description': cast_data.get('description'),
'thumbnail': cast_data.get('image'),
- 'timestamp': int_or_none(cast_data.get('publishingDate')),
+ 'timestamp': parse_iso8601(cast_data.get('publishingDate')),
'duration': int_or_none(cast_data.get('duration')),
}
diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py
index 5ae16fa..008c98e 100644
--- a/youtube_dl/extractor/adobetv.py
+++ b/youtube_dl/extractor/adobetv.py
@@ -30,7 +30,7 @@ class AdobeTVIE(AdobeTVBaseIE):
'ext': 'mp4',
'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'upload_date': '20110914',
'duration': 60,
'view_count': int,
diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
index 6adb6d8..c973174 100644
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@@ -26,7 +26,7 @@ class AENetworksIE(AENetworksBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
_TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
- 'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
+ 'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
'info_dict': {
'id': '22253814',
'ext': 'mp4',
@@ -87,7 +87,7 @@ class AENetworksIE(AENetworksBaseIE):
self._html_search_meta('aetn:SeriesTitle', webpage))
elif url_parts_len == 2:
entries = []
- for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage):
+ for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
episode_attributes = extract_attributes(episode_item)
episode_url = compat_urlparse.urljoin(
url, episode_attributes['data-canonical'])
@@ -99,7 +99,7 @@ class AENetworksIE(AENetworksBaseIE):
query = {
'mbr': 'true',
- 'assetTypes': 'medium_video_s3'
+ 'assetTypes': 'high_video_s3'
}
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
@@ -155,7 +155,7 @@ class HistoryTopicIE(AENetworksBaseIE):
'id': 'world-war-i-history',
'title': 'World War I History',
},
- 'playlist_mincount': 24,
+ 'playlist_mincount': 23,
}, {
'url': 'http://www.history.com/topics/world-war-i-history/videos',
'only_matching': True,
@@ -193,7 +193,8 @@ class HistoryTopicIE(AENetworksBaseIE):
return self.theplatform_url_result(
release_url, video_id, {
'mbr': 'true',
- 'switch': 'hls'
+ 'switch': 'hls',
+ 'assetTypes': 'high_video_ak',
})
else:
webpage = self._download_webpage(url, topic_id)
@@ -203,6 +204,7 @@ class HistoryTopicIE(AENetworksBaseIE):
entries.append(self.theplatform_url_result(
video_attributes['data-release-url'], video_attributes['data-id'], {
'mbr': 'true',
- 'switch': 'hls'
+ 'switch': 'hls',
+ 'assetTypes': 'high_video_ak',
}))
return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))
diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py
index 75b3669..e0a0f7c 100644
--- a/youtube_dl/extractor/afreecatv.py
+++ b/youtube_dl/extractor/afreecatv.py
@@ -18,6 +18,7 @@ from ..utils import (
class AfreecaTVIE(InfoExtractor):
+ IE_NAME = 'afreecatv'
IE_DESC = 'afreecatv.com'
_VALID_URL = r'''(?x)
https?://
@@ -143,3 +144,107 @@ class AfreecaTVIE(InfoExtractor):
expected=True)
return info
+
+
+class AfreecaTVGlobalIE(AfreecaTVIE):
+ IE_NAME = 'afreecatv:global'
+ _VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?'
+ _TESTS = [{
+ 'url': 'http://afreeca.tv/36853014/v/58301',
+ 'info_dict': {
+ 'id': '58301',
+ 'title': 'tryhard top100',
+ 'uploader_id': '36853014',
+ 'uploader': 'makgi Hearthstone Live!',
+ },
+ 'playlist_count': 3,
+ }]
+
+ def _real_extract(self, url):
+ channel_id, video_id = re.match(self._VALID_URL, url).groups()
+ video_type = 'video' if video_id else 'live'
+ query = {
+ 'pt': 'view',
+ 'bid': channel_id,
+ }
+ if video_id:
+ query['vno'] = video_id
+ video_data = self._download_json(
+ 'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type),
+ video_id or channel_id, query=query)['channel']
+
+ if video_data.get('result') != 1:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg']))
+
+ title = video_data['title']
+
+ info = {
+ 'thumbnail': video_data.get('thumb'),
+ 'view_count': int_or_none(video_data.get('vcnt')),
+ 'age_limit': int_or_none(video_data.get('grade')),
+ 'uploader_id': channel_id,
+ 'uploader': video_data.get('cname'),
+ }
+
+ if video_id:
+ entries = []
+ for i, f in enumerate(video_data.get('flist', [])):
+ video_key = self.parse_video_key(f.get('key', ''))
+ f_url = f.get('file')
+ if not video_key or not f_url:
+ continue
+ entries.append({
+ 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
+ 'title': title,
+ 'upload_date': video_key.get('upload_date'),
+ 'duration': int_or_none(f.get('length')),
+ 'url': f_url,
+ 'protocol': 'm3u8_native',
+ 'ext': 'mp4',
+ })
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'duration': int_or_none(video_data.get('length')),
+ })
+ if len(entries) > 1:
+ info['_type'] = 'multi_video'
+ info['entries'] = entries
+ elif len(entries) == 1:
+ i = entries[0].copy()
+ i.update(info)
+ info = i
+ else:
+ formats = []
+ for s in video_data.get('strm', []):
+ s_url = s.get('purl')
+ if not s_url:
+ continue
+ stype = s.get('stype')
+ if stype == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
+ elif stype == 'RTMP':
+ format_id = [stype]
+ label = s.get('label')
+ if label:
+ format_id.append(label)
+ formats.append({
+ 'format_id': '-'.join(format_id),
+ 'url': s_url,
+ 'tbr': int_or_none(s.get('bps')),
+ 'height': int_or_none(s.get('brt')),
+ 'ext': 'flv',
+ 'rtmp_live': True,
+ })
+ self._sort_formats(formats)
+
+ info.update({
+ 'id': channel_id,
+ 'title': self._live_title(title),
+ 'is_live': True,
+ 'formats': formats,
+ })
+
+ return info
diff --git a/youtube_dl/extractor/airmozilla.py b/youtube_dl/extractor/airmozilla.py
index f8e70f4..0e06918 100644
--- a/youtube_dl/extractor/airmozilla.py
+++ b/youtube_dl/extractor/airmozilla.py
@@ -20,7 +20,7 @@ class AirMozillaIE(InfoExtractor):
'id': '6x4q2w',
'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
- 'thumbnail': 're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
+ 'thumbnail': r're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800,
'upload_date': '20150128',
diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py
index 517b06d..90f11d3 100644
--- a/youtube_dl/extractor/allocine.py
+++ b/youtube_dl/extractor/allocine.py
@@ -21,7 +21,7 @@ class AllocineIE(InfoExtractor):
'ext': 'mp4',
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
},
}, {
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
@@ -32,7 +32,7 @@ class AllocineIE(InfoExtractor):
'ext': 'mp4',
'title': 'Planes 2 Bande-annonce VF',
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
},
}, {
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
@@ -43,7 +43,7 @@ class AllocineIE(InfoExtractor):
'ext': 'mp4',
'title': 'Dragons 2 - Bande annonce finale VF',
'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
},
}, {
'url': 'http://www.allocine.fr/video/video-19550147/',
@@ -53,7 +53,7 @@ class AllocineIE(InfoExtractor):
'ext': 'mp4',
'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger',
'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
},
}]
diff --git a/youtube_dl/extractor/alphaporno.py b/youtube_dl/extractor/alphaporno.py
index c34719d..3a6d99f 100644
--- a/youtube_dl/extractor/alphaporno.py
+++ b/youtube_dl/extractor/alphaporno.py
@@ -19,7 +19,7 @@ class AlphaPornoIE(InfoExtractor):
'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
'ext': 'mp4',
'title': 'Sensual striptease porn with Samantha Alexandra',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'timestamp': 1418694611,
'upload_date': '20141216',
'duration': 387,
diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py
index 2cdee33..b50f454 100644
--- a/youtube_dl/extractor/aol.py
+++ b/youtube_dl/extractor/aol.py
@@ -12,7 +12,7 @@ from ..utils import (
class AolIE(InfoExtractor):
IE_NAME = 'on.aol.com'
- _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P<id>[^/?#&]+)'
+ _VALID_URL = r'(?:aol-video:|https?://(?:(?:www|on)\.)?aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P<id>[^/?#&]+)'
_TESTS = [{
# video with 5min ID
@@ -33,7 +33,7 @@ class AolIE(InfoExtractor):
}
}, {
# video with vidible ID
- 'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183',
+ 'url': 'http://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
'info_dict': {
'id': '5707d6b8e4b090497b04f706',
'ext': 'mp4',
@@ -108,30 +108,3 @@ class AolIE(InfoExtractor):
'uploader': video_data.get('videoOwner'),
'formats': formats,
}
-
-
-class AolFeaturesIE(InfoExtractor):
- IE_NAME = 'features.aol.com'
- _VALID_URL = r'https?://features\.aol\.com/video/(?P<id>[^/?#]+)'
-
- _TESTS = [{
- 'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts',
- 'md5': '7db483bb0c09c85e241f84a34238cc75',
- 'info_dict': {
- 'id': '519507715',
- 'ext': 'mp4',
- 'title': 'What To Watch - February 17, 2016',
- },
- 'add_ie': ['FiveMin'],
- 'params': {
- # encrypted m3u8 download
- 'skip_download': True,
- },
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- return self.url_result(self._search_regex(
- r'<script type="text/javascript" src="(https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js[^"]+)"',
- webpage, '5min embed url'), 'FiveMin')
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 35f3656..2d55994 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -253,7 +253,7 @@ class ARDIE(InfoExtractor):
'duration': 2600,
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
'upload_date': '20140804',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'HTTP Error 404: Not Found',
}
diff --git a/youtube_dl/extractor/arkena.py b/youtube_dl/extractor/arkena.py
index d45cae3..50ffb44 100644
--- a/youtube_dl/extractor/arkena.py
+++ b/youtube_dl/extractor/arkena.py
@@ -4,8 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
determine_ext,
+ ExtractorError,
float_or_none,
int_or_none,
mimetype2ext,
@@ -15,7 +17,13 @@ from ..utils import (
class ArkenaIE(InfoExtractor):
- _VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ video\.arkena\.com/play2/embed/player\?|
+ play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
+ )
+ '''
_TESTS = [{
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
@@ -37,6 +45,9 @@ class ArkenaIE(InfoExtractor):
}, {
'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
'only_matching': True,
+ }, {
+ 'url': 'http://video.arkena.com/play2/embed/player?accountId=472718&mediaId=35763b3b-00090078-bf604299&pageStyling=styled',
+ 'only_matching': True,
}]
@staticmethod
@@ -53,6 +64,14 @@ class ArkenaIE(InfoExtractor):
video_id = mobj.group('id')
account_id = mobj.group('account_id')
+ # Handle http://video.arkena.com/play2/embed/player URL
+ if not video_id:
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = qs.get('mediaId', [None])[0]
+ account_id = qs.get('accountId', [None])[0]
+ if not video_id or not account_id:
+ raise ExtractorError('Invalid URL', expected=True)
+
playlist = self._download_json(
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
% (video_id, account_id),
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py
index d2f3889..e3c6698 100644
--- a/youtube_dl/extractor/atresplayer.py
+++ b/youtube_dl/extractor/atresplayer.py
@@ -30,7 +30,7 @@ class AtresPlayerIE(InfoExtractor):
'title': 'Especial Solidario de Nochebuena',
'description': 'md5:e2d52ff12214fa937107d21064075bf1',
'duration': 5527.6,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'This video is only available for registered users'
},
@@ -43,7 +43,7 @@ class AtresPlayerIE(InfoExtractor):
'title': 'David Bustamante',
'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
'duration': 1439.0,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
diff --git a/youtube_dl/extractor/atttechchannel.py b/youtube_dl/extractor/atttechchannel.py
index b01d35b..8f93fb3 100644
--- a/youtube_dl/extractor/atttechchannel.py
+++ b/youtube_dl/extractor/atttechchannel.py
@@ -14,7 +14,7 @@ class ATTTechChannelIE(InfoExtractor):
'ext': 'flv',
'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use',
'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140127',
},
'params': {
diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py
index d7d1c63..8fc5f65 100644
--- a/youtube_dl/extractor/audioboom.py
+++ b/youtube_dl/extractor/audioboom.py
@@ -17,7 +17,7 @@ class AudioBoomIE(InfoExtractor):
'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans',
'duration': 2245.72,
'uploader': 'Steve Czaban',
- 'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
+ 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
}
}, {
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py
new file mode 100644
index 0000000..cbc3ed5
--- /dev/null
+++ b/youtube_dl/extractor/azmedien.py
@@ -0,0 +1,172 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from ..utils import (
+ get_element_by_id,
+ strip_or_none,
+ urljoin,
+)
+
+
+class AZMedienBaseIE(InfoExtractor):
+ def _kaltura_video(self, partner_id, entry_id):
+ return self.url_result(
+ 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
+ video_id=entry_id)
+
+
+class AZMedienIE(AZMedienBaseIE):
+ IE_DESC = 'AZ Medien videos'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ telezueri\.ch|
+ telebaern\.tv|
+ telem1\.ch
+ )/
+ [0-9]+-show-[^/\#]+
+ (?:
+ /[0-9]+-episode-[^/\#]+
+ (?:
+ /[0-9]+-segment-(?:[^/\#]+\#)?|
+ \#
+ )|
+ \#
+ )
+ (?P<id>[^\#]+)
+ '''
+
+ _TESTS = [{
+ # URL with 'segment'
+ 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
+ 'info_dict': {
+ 'id': '1_2444peh4',
+ 'ext': 'mov',
+ 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
+ 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
+ 'uploader_id': 'TeleZ?ri',
+ 'upload_date': '20161218',
+ 'timestamp': 1482084490,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # URL with 'segment' and fragment:
+ 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
+ 'only_matching': True
+ }, {
+ # URL with 'episode' and fragment:
+ 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
+ 'only_matching': True
+ }, {
+ # URL with 'show' and fragment:
+ 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ partner_id = self._search_regex(
+ r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
+ webpage, 'kaltura partner id')
+ entry_id = self._html_search_regex(
+ r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
+ % re.escape(video_id), webpage, 'kaltura entry id', group='id')
+
+ return self._kaltura_video(partner_id, entry_id)
+
+
+class AZMedienPlaylistIE(AZMedienBaseIE):
+ IE_DESC = 'AZ Medien playlists'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ telezueri\.ch|
+ telebaern\.tv|
+ telem1\.ch
+ )/
+ (?P<id>[0-9]+-
+ (?:
+ show|
+ topic|
+ themen
+ )-[^/\#]+
+ (?:
+ /[0-9]+-episode-[^/\#]+
+ )?
+ )$
+ '''
+
+ _TESTS = [{
+ # URL with 'episode'
+ 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
+ 'info_dict': {
+ 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
+ 'title': 'News - Donnerstag, 15. Dezember 2016',
+ },
+ 'playlist_count': 9,
+ }, {
+ # URL with 'themen'
+ 'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
+ 'info_dict': {
+ 'id': '258-themen-tele-m1-classics',
+ 'title': 'Tele M1 Classics',
+ },
+ 'playlist_mincount': 15,
+ }, {
+ # URL with 'topic', contains nested playlists
+ 'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
+ 'only_matching': True,
+ }, {
+ # URL with 'show' only
+ 'url': 'http://www.telezueri.ch/86-show-talktaeglich',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ webpage = self._download_webpage(url, show_id)
+
+ entries = []
+
+ partner_id = self._search_regex(
+ r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
+ webpage, 'kaltura partner id', default=None)
+
+ if partner_id:
+ entries = [
+ self._kaltura_video(partner_id, m.group('id'))
+ for m in re.finditer(
+ r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
+
+ if not entries:
+ entries = [
+ self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
+ for m in re.finditer(
+ r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
+
+ if not entries:
+ entries = [
+ # May contain nested playlists (e.g. [1]) thus no explicit
+ # ie_key
+ # 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
+ self.url_result(urljoin(url, m.group('url')))
+ for m in re.finditer(
+ r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
+
+ title = self._search_regex(
+ r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
+ webpage, 'title',
+ default=strip_or_none(get_element_by_id(
+ 'video-title', webpage)), group='title')
+
+ return self.playlist_result(entries, show_id, title)
diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py
index 1eebf5d..3ba2f00 100644
--- a/youtube_dl/extractor/azubu.py
+++ b/youtube_dl/extractor/azubu.py
@@ -21,7 +21,7 @@ class AzubuIE(InfoExtractor):
'ext': 'mp4',
'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
- 'thumbnail': 're:^https?://.*\.jpe?g',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
'timestamp': 1417523507.334,
'upload_date': '20141202',
'duration': 9988.7,
@@ -38,7 +38,7 @@ class AzubuIE(InfoExtractor):
'ext': 'mp4',
'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
- 'thumbnail': 're:^https?://.*\.jpe?g',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
'timestamp': 1410530893.320,
'upload_date': '20140912',
'duration': 172.385,
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index 88c590e..056e063 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -209,6 +209,15 @@ class BandcampAlbumIE(InfoExtractor):
'id': 'entropy-ep',
},
'playlist_mincount': 3,
+ }, {
+ # not all tracks have songs
+ 'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
+ 'info_dict': {
+ 'id': 'we-are-the-plague',
+ 'title': 'WE ARE THE PLAGUE',
+ 'uploader_id': 'insulters',
+ },
+ 'playlist_count': 2,
}]
def _real_extract(self, url):
@@ -217,12 +226,16 @@ class BandcampAlbumIE(InfoExtractor):
album_id = mobj.group('album_id')
playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id)
- tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
- if not tracks_paths:
+ track_elements = re.findall(
+ r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
+ if not track_elements:
raise ExtractorError('The page doesn\'t contain any tracks')
+ # Only tracks with duration info have songs
entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
- for t_path in tracks_paths]
+ for elem_content, t_path in track_elements
+ if self._html_search_meta('duration', elem_content, default=None)]
+
title = self._html_search_regex(
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
webpage, 'title', fatal=False)
diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py
new file mode 100644
index 0000000..f3a9e32
--- /dev/null
+++ b/youtube_dl/extractor/beampro.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ compat_str,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class BeamProLiveIE(InfoExtractor):
+ IE_NAME = 'Beam:live'
+ _VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P<id>[^/?#&]+)'
+ _RATINGS = {'family': 0, 'teen': 13, '18+': 18}
+ _TEST = {
+ 'url': 'http://www.beam.pro/niterhayven',
+ 'info_dict': {
+ 'id': '261562',
+ 'ext': 'mp4',
+ 'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
+ 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
+ 'thumbnail': r're:https://.*\.jpg$',
+ 'timestamp': 1483477281,
+ 'upload_date': '20170103',
+ 'uploader': 'niterhayven',
+ 'uploader_id': '373396',
+ 'age_limit': 18,
+ 'is_live': True,
+ 'view_count': int,
+ },
+ 'skip': 'niterhayven is offline',
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url)
+
+ chan = self._download_json(
+ 'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name)
+
+ if chan.get('online') is False:
+ raise ExtractorError(
+ '{0} is offline'.format(channel_name), expected=True)
+
+ channel_id = chan['id']
+
+ formats = self._extract_m3u8_formats(
+ 'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id,
+ channel_name, ext='mp4', m3u8_id='hls', fatal=False)
+ self._sort_formats(formats)
+
+ user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
+
+ return {
+ 'id': compat_str(chan.get('id') or channel_name),
+ 'title': self._live_title(chan.get('name') or channel_name),
+ 'description': clean_html(chan.get('description')),
+ 'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str),
+ 'timestamp': parse_iso8601(chan.get('updatedAt')),
+ 'uploader': chan.get('token') or try_get(
+ chan, lambda x: x['user']['username'], compat_str),
+ 'uploader_id': compat_str(user_id) if user_id else None,
+ 'age_limit': self._RATINGS.get(chan.get('audience')),
+ 'is_live': True,
+ 'view_count': int_or_none(chan.get('viewersTotal')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py
index 1f8ef03..d7ceaa8 100644
--- a/youtube_dl/extractor/bet.py
+++ b/youtube_dl/extractor/bet.py
@@ -17,7 +17,7 @@ class BetIE(MTVServicesInfoExtractor):
'description': 'President Obama urges persistence in confronting racism and bias.',
'duration': 1534,
'upload_date': '20141208',
- 'thumbnail': 're:(?i)^https?://.*\.jpg$',
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
'subtitles': {
'en': 'mincount:2',
}
@@ -37,7 +37,7 @@ class BetIE(MTVServicesInfoExtractor):
'description': 'A BET News special.',
'duration': 1696,
'upload_date': '20141125',
- 'thumbnail': 're:(?i)^https?://.*\.jpg$',
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
'subtitles': {
'en': 'mincount:2',
}
diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py
index 1a01848..b8dfbd4 100644
--- a/youtube_dl/extractor/bild.py
+++ b/youtube_dl/extractor/bild.py
@@ -19,7 +19,7 @@ class BildIE(InfoExtractor):
'ext': 'mp4',
'title': 'Das können die neuen iPads',
'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 196,
}
}
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
index 2d174e6..80dd838 100644
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -5,19 +5,27 @@ import hashlib
import re
from .common import InfoExtractor
-from ..compat import compat_parse_qs
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
from ..utils import (
+ ExtractorError,
int_or_none,
float_or_none,
+ parse_iso8601,
+ smuggle_url,
+ strip_jsonp,
unified_timestamp,
+ unsmuggle_url,
urlencode_postdata,
)
class BiliBiliIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.bilibili.tv/video/av1074402/',
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
'info_dict': {
@@ -28,29 +36,65 @@ class BiliBiliIE(InfoExtractor):
'duration': 308.315,
'timestamp': 1398012660,
'upload_date': '20140420',
- 'thumbnail': 're:^https?://.+\.jpg',
+ 'thumbnail': r're:^https?://.+\.jpg',
'uploader': '菊子桑',
'uploader_id': '156160',
},
- }
+ }, {
+ # Tested in BiliBiliBangumiIE
+ 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
+ 'md5': '3f721ad1e75030cc06faf73587cfec57',
+ 'info_dict': {
+ 'id': '100643',
+ 'ext': 'mp4',
+ 'title': 'CHAOS;CHILD',
+ 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
+ },
+ 'skip': 'Geo-restricted to China',
+ }]
+
+ _APP_KEY = '84956560bc028eb7'
+ _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
- _APP_KEY = '6f90a59ac58a4123'
- _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
+ def _report_error(self, result):
+ if 'message' in result:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
+ elif 'code' in result:
+ raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
+ else:
+ raise ExtractorError('Can\'t extract Bangumi episode ID')
def _real_extract(self, url):
- video_id = self._match_id(url)
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ anime_id = mobj.group('anime_id')
webpage = self._download_webpage(url, video_id)
- if 'anime/v' not in url:
+ if 'anime/' not in url:
cid = compat_parse_qs(self._search_regex(
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
webpage, 'player parameters'))['cid'][0]
else:
+ if 'no_bangumi_tip' not in smuggled_data:
+ self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % (
+ video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
+ headers = {
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ }
+ headers.update(self.geo_verification_headers())
+
js = self._download_json(
'http://bangumi.bilibili.com/web_api/get_source', video_id,
data=urlencode_postdata({'episode_id': video_id}),
- headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
+ headers=headers)
+ if 'result' not in js:
+ self._report_error(js)
cid = js['result']['cid']
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
@@ -58,7 +102,11 @@ class BiliBiliIE(InfoExtractor):
video_info = self._download_json(
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
- video_id, note='Downloading video info page')
+ video_id, note='Downloading video info page',
+ headers=self.geo_verification_headers())
+
+ if 'durl' not in video_info:
+ self._report_error(video_info)
entries = []
@@ -85,7 +133,7 @@ class BiliBiliIE(InfoExtractor):
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
description = self._html_search_meta('description', webpage)
timestamp = unified_timestamp(self._html_search_regex(
- r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
+ r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
# TODO 'view_count' requires deobfuscating Javascript
@@ -99,7 +147,7 @@ class BiliBiliIE(InfoExtractor):
}
uploader_mobj = re.search(
- r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
+ r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
webpage)
if uploader_mobj:
info.update({
@@ -123,3 +171,70 @@ class BiliBiliIE(InfoExtractor):
'description': description,
'entries': entries,
}
+
+
+class BiliBiliBangumiIE(InfoExtractor):
+ _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
+
+ IE_NAME = 'bangumi.bilibili.com'
+ IE_DESC = 'BiliBili番剧'
+
+ _TESTS = [{
+ 'url': 'http://bangumi.bilibili.com/anime/1869',
+ 'info_dict': {
+ 'id': '1869',
+ 'title': '混沌武士',
+ 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
+ },
+ 'playlist_count': 26,
+ }, {
+ 'url': 'http://bangumi.bilibili.com/anime/1869',
+ 'info_dict': {
+ 'id': '1869',
+ 'title': '混沌武士',
+ 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
+ },
+ 'playlist': [{
+ 'md5': '91da8621454dd58316851c27c68b0c13',
+ 'info_dict': {
+ 'id': '40062',
+ 'ext': 'mp4',
+ 'title': '混沌武士',
+ 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
+ 'timestamp': 1414538739,
+ 'upload_date': '20141028',
+ 'episode': '疾风怒涛 Tempestuous Temperaments',
+ 'episode_number': 1,
+ },
+ }],
+ 'params': {
+ 'playlist_items': '1',
+ },
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ bangumi_id = self._match_id(url)
+
+ # Sometimes this API returns a JSONP response
+ season_info = self._download_json(
+ 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
+ bangumi_id, transform_source=strip_jsonp)['result']
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
+ 'ie_key': BiliBiliIE.ie_key(),
+ 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
+ 'episode': episode.get('index_title'),
+ 'episode_number': int_or_none(episode.get('index')),
+ } for episode in season_info['episodes']]
+
+ entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
+
+ return self.playlist_result(
+ entries, bangumi_id,
+ season_info.get('bangumi_title'), season_info.get('evaluate'))
diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py
index 7608c0a..b92031c 100644
--- a/youtube_dl/extractor/biobiochiletv.py
+++ b/youtube_dl/extractor/biobiochiletv.py
@@ -19,7 +19,7 @@ class BioBioChileTVIE(InfoExtractor):
'id': 'sobre-camaras-y-camarillas-parlamentarias',
'ext': 'mp4',
'title': 'Sobre Cámaras y camarillas parlamentarias',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Fernando Atria',
},
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
@@ -31,7 +31,7 @@ class BioBioChileTVIE(InfoExtractor):
'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades',
'ext': 'mp4',
'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Piangella Obrador',
},
'params': {
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py
index 2a8cd64..c5e11e8 100644
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -45,7 +45,8 @@ class BloombergIE(InfoExtractor):
name = self._match_id(url)
webpage = self._download_webpage(url, name)
video_id = self._search_regex(
- r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1',
+ (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
+ r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
webpage, 'id', group='url', default=None)
if not video_id:
bplayer_data = self._parse_json(self._search_regex(
diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py
index 725859b..5a87c26 100644
--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dl/extractor/breakcom.py
@@ -1,9 +1,9 @@
from __future__ import unicode_literals
import re
-import json
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
int_or_none,
parse_age_limit,
@@ -11,7 +11,7 @@ from ..utils import (
class BreakIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>break|screenjunkies)\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
_TESTS = [{
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
'info_dict': {
@@ -21,44 +21,123 @@ class BreakIE(InfoExtractor):
'age_limit': 13,
}
}, {
+ 'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915',
+ 'md5': '5c2b686bec3d43de42bde9ec047536b0',
+ 'info_dict': {
+ 'id': '2841915',
+ 'display_id': 'best-quentin-tarantino-movie',
+ 'ext': 'mp4',
+ 'title': 'Best Quentin Tarantino Movie',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 3671,
+ 'age_limit': 13,
+ 'tags': list,
+ },
+ }, {
+ 'url': 'http://www.screenjunkies.com/video/honest-trailers-the-dark-knight',
+ 'info_dict': {
+ 'id': '2348808',
+ 'display_id': 'honest-trailers-the-dark-knight',
+ 'ext': 'mp4',
+ 'title': 'Honest Trailers - The Dark Knight',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'age_limit': 10,
+ 'tags': list,
+ },
+ }, {
+ # requires subscription but worked around
+ 'url': 'http://www.screenjunkies.com/video/knocking-dead-ep-1-the-show-so-far-3003285',
+ 'info_dict': {
+ 'id': '3003285',
+ 'display_id': 'knocking-dead-ep-1-the-show-so-far',
+ 'ext': 'mp4',
+ 'title': 'State of The Dead Recap: Knocking Dead Pilot',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 3307,
+ 'age_limit': 13,
+ 'tags': list,
+ },
+ }, {
'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
'only_matching': True,
}]
+ _DEFAULT_BITRATES = (48, 150, 320, 496, 864, 2240, 3264)
+
def _real_extract(self, url):
- video_id = self._match_id(url)
+ site, display_id, video_id = re.match(self._VALID_URL, url).groups()
+
+ if not video_id:
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ (r'src=["\']/embed/(\d+)', r'data-video-content-id=["\'](\d+)'),
+ webpage, 'video id')
+
webpage = self._download_webpage(
- 'http://www.break.com/embed/%s' % video_id, video_id)
- info = json.loads(self._search_regex(
- r'var embedVars = ({.*})\s*?</script>',
- webpage, 'info json', flags=re.DOTALL))
+ 'http://www.%s.com/embed/%s' % (site, video_id),
+ display_id, 'Downloading video embed page')
+ embed_vars = self._parse_json(
+ self._search_regex(
+ r'(?s)embedVars\s*=\s*({.+?})\s*</script>', webpage, 'embed vars'),
+ display_id)
- youtube_id = info.get('youtubeId')
+ youtube_id = embed_vars.get('youtubeId')
if youtube_id:
return self.url_result(youtube_id, 'Youtube')
- formats = [{
- 'url': media['uri'] + '?' + info['AuthToken'],
- 'tbr': media['bitRate'],
- 'width': media['width'],
- 'height': media['height'],
- } for media in info['media'] if media.get('mediaPurpose') == 'play']
+ title = embed_vars['contentName']
- if not formats:
+ formats = []
+ bitrates = []
+ for f in embed_vars.get('media', []):
+ if not f.get('uri') or f.get('mediaPurpose') != 'play':
+ continue
+ bitrate = int_or_none(f.get('bitRate'))
+ if bitrate:
+ bitrates.append(bitrate)
formats.append({
- 'url': info['videoUri']
+ 'url': f['uri'],
+ 'format_id': 'http-%d' % bitrate if bitrate else 'http',
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'tbr': bitrate,
+ 'format': 'mp4',
})
- self._sort_formats(formats)
+ if not bitrates:
+ # When subscriptionLevel > 0, i.e. plus subscription is required
+ # media list will be empty. However, hds and hls uris are still
+ # available. We can grab them assuming bitrates to be default.
+ bitrates = self._DEFAULT_BITRATES
+
+ auth_token = embed_vars.get('AuthToken')
- duration = int_or_none(info.get('videoLengthInSeconds'))
- age_limit = parse_age_limit(info.get('audienceRating'))
+ def construct_manifest_url(base_url, ext):
+ pieces = [base_url]
+ pieces.extend([compat_str(b) for b in bitrates])
+ pieces.append('_kbps.mp4.%s?%s' % (ext, auth_token))
+ return ','.join(pieces)
+
+ if bitrates and auth_token:
+ hds_url = embed_vars.get('hdsUri')
+ if hds_url:
+ formats.extend(self._extract_f4m_formats(
+ construct_manifest_url(hds_url, 'f4m'),
+ display_id, f4m_id='hds', fatal=False))
+ hls_url = embed_vars.get('hlsUri')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ construct_manifest_url(hls_url, 'm3u8'),
+ display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
return {
'id': video_id,
- 'title': info['contentName'],
- 'thumbnail': info['thumbUri'],
- 'duration': duration,
- 'age_limit': age_limit,
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': embed_vars.get('thumbUri'),
+ 'duration': int_or_none(embed_vars.get('videoLengthInSeconds')) or None,
+ 'age_limit': parse_age_limit(embed_vars.get('audienceRating')),
+ 'tags': embed_vars.get('tags', '').split(','),
'formats': formats,
}
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 945cf19..5c6e99d 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -179,7 +179,7 @@ class BrightcoveLegacyIE(InfoExtractor):
params = {}
- playerID = find_param('playerID')
+ playerID = find_param('playerID') or find_param('playerId')
if playerID is None:
raise ExtractorError('Cannot find player ID')
params['playerID'] = playerID
@@ -204,7 +204,7 @@ class BrightcoveLegacyIE(InfoExtractor):
# // build Brightcove <object /> XML
# }
m = re.search(
- r'''(?x)customBC.\createVideo\(
+ r'''(?x)customBC\.createVideo\(
.*? # skipping width and height
["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
@@ -232,13 +232,16 @@ class BrightcoveLegacyIE(InfoExtractor):
"""Return a list of all Brightcove URLs from the webpage """
url_m = re.search(
- r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]',
- webpage)
+ r'''(?x)
+ <meta\s+
+ (?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+
+ content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
+ ''', webpage)
if url_m:
- url = unescapeHTML(url_m.group(1))
+ url = unescapeHTML(url_m.group('url'))
# Some sites don't add it, we can't download with this url, for example:
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
- if 'playerKey' in url or 'videoId' in url:
+ if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
return [url]
matches = re.findall(
@@ -259,7 +262,7 @@ class BrightcoveLegacyIE(InfoExtractor):
url, smuggled_data = unsmuggle_url(url, {})
# Change the 'videoId' and others field to '@videoPlayer'
- url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
+ url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
# Change bckey (used by bcove.me urls) to playerKey
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
mobj = re.match(self._VALID_URL, url)
@@ -548,7 +551,7 @@ class BrightcoveNewIE(InfoExtractor):
container = source.get('container')
ext = mimetype2ext(source.get('type'))
src = source.get('src')
- if ext == 'ism':
+ if ext == 'ism' or container == 'WVM':
continue
elif ext == 'm3u8' or container == 'M2TS':
if not src:
diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py
index 4be175d..8ef0896 100644
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@@ -16,7 +16,7 @@ class BYUtvIE(InfoExtractor):
'ext': 'mp4',
'title': 'Season 5 Episode 5',
'description': 'md5:e07269172baff037f8e8bf9956bc9747',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1486.486,
},
'params': {
diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py
index d4e6fbd..8f0c6c5 100644
--- a/youtube_dl/extractor/camdemy.py
+++ b/youtube_dl/extractor/camdemy.py
@@ -26,7 +26,7 @@ class CamdemyIE(InfoExtractor):
'id': '5181',
'ext': 'mp4',
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'creator': 'ss11spring',
'duration': 1591,
'upload_date': '20130114',
@@ -41,7 +41,7 @@ class CamdemyIE(InfoExtractor):
'id': '13885',
'ext': 'mp4',
'title': 'EverCam + Camdemy QuickStart',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
'creator': 'evercam',
'duration': 318,
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py
index 1c3c41d..4b9fa2d 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -27,6 +27,7 @@ class CanalplusIE(InfoExtractor):
(?:www\.)?d8\.tv|
(?:www\.)?c8\.fr|
(?:www\.)?d17\.tv|
+ (?:(?:football|www)\.)?cstar\.fr|
(?:www\.)?itele\.fr
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
player\.canalplus\.fr/#/(?P<id>\d+)
@@ -40,6 +41,7 @@ class CanalplusIE(InfoExtractor):
'd8': 'd8',
'c8': 'd8',
'd17': 'd17',
+ 'cstar': 'd17',
'itele': 'itele',
}
@@ -87,6 +89,19 @@ class CanalplusIE(InfoExtractor):
'upload_date': '20161014',
},
}, {
+ 'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
+ 'info_dict': {
+ 'id': '1416769',
+ 'display_id': 'pid7566-feminines-videos',
+ 'ext': 'mp4',
+ 'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
+ 'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
+ 'upload_date': '20160921',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'http://m.canalplus.fr/?vid=1398231',
'only_matching': True,
}, {
@@ -105,8 +120,9 @@ class CanalplusIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
- r'id=["\']canal_video_player(?P<id>\d+)'],
- webpage, 'video id', group='id')
+ r'id=["\']canal_video_player(?P<id>\d+)',
+ r'data-video=["\'](?P<id>\d+)'],
+ webpage, 'video id', default=mobj.group('vid'), group='id')
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py
index d183d5d..544c665 100644
--- a/youtube_dl/extractor/canvas.py
+++ b/youtube_dl/extractor/canvas.py
@@ -17,7 +17,7 @@ class CanvasIE(InfoExtractor):
'ext': 'mp4',
'title': 'De afspraak veilt voor de Warmste Week',
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 49.02,
}
}, {
@@ -29,7 +29,7 @@ class CanvasIE(InfoExtractor):
'ext': 'mp4',
'title': 'Pieter 0167',
'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2553.08,
'subtitles': {
'nl': [{
@@ -48,7 +48,7 @@ class CanvasIE(InfoExtractor):
'ext': 'mp4',
'title': 'Herbekijk Sorry voor alles',
'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 3788.06,
},
'params': {
@@ -89,6 +89,9 @@ class CanvasIE(InfoExtractor):
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, display_id, f4m_id=format_type, fatal=False))
+ elif format_type == 'MPEG_DASH':
+ formats.extend(self._extract_mpd_formats(
+ format_url, display_id, mpd_id=format_type, fatal=False))
else:
formats.append({
'format_id': format_type,
diff --git a/youtube_dl/extractor/carambatv.py b/youtube_dl/extractor/carambatv.py
index 66c0f90..9ba909a 100644
--- a/youtube_dl/extractor/carambatv.py
+++ b/youtube_dl/extractor/carambatv.py
@@ -21,7 +21,7 @@ class CarambaTVIE(InfoExtractor):
'id': '191910501',
'ext': 'mp4',
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 2678.31,
},
}, {
@@ -69,7 +69,7 @@ class CarambaTVPageIE(InfoExtractor):
'id': '475222',
'ext': 'flv',
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
# duration reported by videomore is incorrect
'duration': int,
},
diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py
index d71fddf..cf678e7 100644
--- a/youtube_dl/extractor/cbc.py
+++ b/youtube_dl/extractor/cbc.py
@@ -90,36 +90,49 @@ class CBCIE(InfoExtractor):
},
}],
'skip': 'Geo-restricted to Canada',
+ }, {
+ # multiple CBC.APP.Caffeine.initInstance(...)
+ 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
+ 'info_dict': {
+ 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
+ 'id': 'dog-indoor-exercise-winter-1.3928238',
+ },
+ 'playlist_mincount': 6,
}]
@classmethod
def suitable(cls, url):
return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
+ def _extract_player_init(self, player_init, display_id):
+ player_info = self._parse_json(player_init, display_id, js_to_json)
+ media_id = player_info.get('mediaId')
+ if not media_id:
+ clip_id = player_info['clipId']
+ feed = self._download_json(
+ 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
+ clip_id, fatal=False)
+ if feed:
+ media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
+ if not media_id:
+ media_id = self._download_json(
+ 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
+ clip_id)['entries'][0]['id'].split('/')[-1]
+ return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- player_init = self._search_regex(
- r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
- default=None)
- if player_init:
- player_info = self._parse_json(player_init, display_id, js_to_json)
- media_id = player_info.get('mediaId')
- if not media_id:
- clip_id = player_info['clipId']
- feed = self._download_json(
- 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
- clip_id, fatal=False)
- if feed:
- media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
- if not media_id:
- media_id = self._download_json(
- 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
- clip_id)['entries'][0]['id'].split('/')[-1]
- return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
- else:
- entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
- return self.playlist_result(entries)
+ entries = [
+ self._extract_player_init(player_init, display_id)
+ for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
+ entries.extend([
+ self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+ for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
+ return self.playlist_result(
+ entries, display_id,
+ self._og_search_title(webpage, fatal=False),
+ self._og_search_description(webpage))
class CBCPlayerIE(InfoExtractor):
@@ -283,11 +296,12 @@ class CBCWatchVideoIE(CBCWatchBaseIE):
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
if len(formats) < 2:
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
- # Despite metadata in m3u8 all video+audio formats are
- # actually video-only (no audio)
for f in formats:
- if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
- f['acodec'] = 'none'
+ format_id = f.get('format_id')
+ if format_id.startswith('AAC'):
+ f['acodec'] = 'aac'
+ elif format_id.startswith('AC3'):
+ f['acodec'] = 'ac-3'
self._sort_formats(formats)
info = {
diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py
index 91b0f5f..17bb9af 100644
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@@ -39,7 +39,7 @@ class CBSNewsIE(CBSIE):
'upload_date': '20140404',
'timestamp': 1396650660,
'uploader': 'CBSI-NEW',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 205,
'subtitles': {
'en': [{
diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py
index 8f7f09e..7347021 100644
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@@ -19,7 +19,7 @@ class CCCIE(InfoExtractor):
'ext': 'mp4',
'title': 'Introduction to Processor Design',
'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20131228',
'timestamp': 1388188800,
'duration': 3710,
@@ -32,7 +32,7 @@ class CCCIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- event_id = self._search_regex("data-id='(\d+)'", webpage, 'event id')
+ event_id = self._search_regex(r"data-id='(\d+)'", webpage, 'event id')
event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
formats = []
diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py
new file mode 100644
index 0000000..39938c9
--- /dev/null
+++ b/youtube_dl/extractor/ccma.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ clean_html,
+)
+
+
+class CCMAIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
+ 'md5': '7296ca43977c8ea4469e719c609b0871',
+ 'info_dict': {
+ 'id': '5630208',
+ 'ext': 'mp4',
+ 'title': 'L\'espot de La Marató de TV3',
+ 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
+ 'timestamp': 1470918540,
+ 'upload_date': '20160811',
+ }
+ }, {
+ 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
+ 'md5': 'fa3e38f269329a278271276330261425',
+ 'info_dict': {
+ 'id': '943685',
+ 'ext': 'mp3',
+ 'title': 'El Consell de Savis analitza el derbi',
+ 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
+ 'upload_date': '20171205',
+ 'timestamp': 1512507300,
+ }
+ }]
+
+ def _real_extract(self, url):
+ media_type, media_id = re.match(self._VALID_URL, url).groups()
+ media_data = {}
+ formats = []
+ profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc']
+ for i, profile in enumerate(profiles):
+ md = self._download_json('http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
+ 'media': media_type,
+ 'idint': media_id,
+ 'profile': profile,
+ }, fatal=False)
+ if md:
+ media_data = md
+ media_url = media_data.get('media', {}).get('url')
+ if media_url:
+ formats.append({
+ 'format_id': profile,
+ 'url': media_url,
+ 'quality': i,
+ })
+ self._sort_formats(formats)
+
+ informacio = media_data['informacio']
+ title = informacio['titol']
+ durada = informacio.get('durada', {})
+ duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
+ timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
+
+ subtitles = {}
+ subtitols = media_data.get('subtitols', {})
+ if subtitols:
+ sub_url = subtitols.get('url')
+ if sub_url:
+ subtitles.setdefault(
+ subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
+ 'url': sub_url,
+ })
+
+ thumbnails = []
+ imatges = media_data.get('imatges', {})
+ if imatges:
+ thumbnail_url = imatges.get('url')
+ if thumbnail_url:
+ thumbnails = [{
+ 'url': thumbnail_url,
+ 'width': int_or_none(imatges.get('amplada')),
+ 'height': int_or_none(imatges.get('alcada')),
+ }]
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'description': clean_html(informacio.get('descripcio')),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'thumnails': thumbnails,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/cctv.py b/youtube_dl/extractor/cctv.py
index 72a72cb..c76f361 100644
--- a/youtube_dl/extractor/cctv.py
+++ b/youtube_dl/extractor/cctv.py
@@ -4,50 +4,188 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import float_or_none
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ try_get,
+ unified_timestamp,
+)
class CCTVIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:.+?\.)?
- (?:
- cctv\.(?:com|cn)|
- cntv\.cn
- )/
- (?:
- video/[^/]+/(?P<id>[0-9a-f]{32})|
- \d{4}/\d{2}/\d{2}/(?P<display_id>VID[0-9A-Za-z]+)
- )'''
+ IE_DESC = '央视网'
+ _VALID_URL = r'https?://(?:(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)|(?:www\.)?ncpa-classic\.com)/(?:[^/]+/)*?(?P<id>[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)'
_TESTS = [{
- 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
- 'md5': '819c7b49fc3927d529fb4cd555621823',
+ # fo.addVariable("videoCenterId","id")
+ 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml',
+ 'md5': 'd61ec00a493e09da810bf406a078f691',
'info_dict': {
- 'id': '454368eb19ad44a1925bf1eb96140a61',
+ 'id': '5ecdbeab623f4973b40ff25f18b174e8',
'ext': 'mp4',
- 'title': 'Portrait of Real Current Life 09/03/2016 Modern Inventors Part 1',
- }
+ 'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)',
+ 'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95',
+ 'duration': 98,
+ 'uploader': 'songjunjie',
+ 'timestamp': 1455279956,
+ 'upload_date': '20160212',
+ },
+ }, {
+ # var guid = "id"
+ 'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml',
+ 'info_dict': {
+ 'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae',
+ 'ext': 'mp4',
+ 'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)',
+ 'description': '2月4日,蒙特泽莫罗透露了关于“车王”舒马赫恢复情况,但情况是否属实遭到了质疑。',
+ 'duration': 37,
+ 'uploader': 'shujun',
+ 'timestamp': 1454677291,
+ 'upload_date': '20160205',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # changePlayer('id')
+ 'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml',
+ 'info_dict': {
+ 'id': '4bb9bb4db7a6471ba85fdeda5af0381e',
+ 'ext': 'mp4',
+ 'title': 'NHnews008 ANNUAL POLITICAL SEASON',
+ 'description': 'Four Comprehensives',
+ 'duration': 60,
+ 'uploader': 'zhangyunlei',
+ 'timestamp': 1425385521,
+ 'upload_date': '20150303',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # loadvideo('id')
+ 'url': 'http://cctv.cntv.cn/lm/tvseries_russian/yilugesanghua/index.shtml',
+ 'info_dict': {
+ 'id': 'b15f009ff45c43968b9af583fc2e04b2',
+ 'ext': 'mp4',
+ 'title': 'Путь,усыпанный космеями Серия 1',
+ 'description': 'Путь, усыпанный космеями',
+ 'duration': 2645,
+ 'uploader': 'renxue',
+ 'timestamp': 1477479241,
+ 'upload_date': '20161026',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # var initMyAray = 'id'
+ 'url': 'http://www.ncpa-classic.com/2013/05/22/VIDE1369219508996867.shtml',
+ 'info_dict': {
+ 'id': 'a194cfa7f18c426b823d876668325946',
+ 'ext': 'mp4',
+ 'title': '小泽征尔音乐塾 音乐梦想无国界',
+ 'duration': 2173,
+ 'timestamp': 1369248264,
+ 'upload_date': '20130522',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # var ids = ["id"]
+ 'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
+ 'info_dict': {
+ 'id': 'a8606119a4884588a79d81c02abecc16',
+ 'ext': 'mp3',
+ 'title': '来自维也纳的新年贺礼',
+ 'description': 'md5:f13764ae8dd484e84dd4b39d5bcba2a7',
+ 'duration': 1578,
+ 'uploader': 'djy',
+ 'timestamp': 1482942419,
+ 'upload_date': '20161228',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
+ 'only_matching': True,
}, {
'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml',
'only_matching': True,
}, {
'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44',
- 'only_matching': True
+ 'only_matching': True,
}]
def _real_extract(self, url):
- video_id, display_id = re.match(self._VALID_URL, url).groups()
- if not video_id:
- webpage = self._download_webpage(url, display_id)
- video_id = self._search_regex(
- r'(?:fo\.addVariable\("videoCenterId",\s*|guid\s*=\s*)"([0-9a-f]{32})',
- webpage, 'video_id')
- api_data = self._download_json(
- 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + video_id, video_id)
- m3u8_url = re.sub(r'maxbr=\d+&?', '', api_data['hls_url'])
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_id = self._search_regex(
+ [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
+ r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)',
+ r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
+ r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
+ r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',
+ r'var\s+ids\s*=\s*\[["\']([\da-fA-F]+)'],
+ webpage, 'video id')
+
+ data = self._download_json(
+ 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do', video_id,
+ query={
+ 'pid': video_id,
+ 'url': url,
+ 'idl': 32,
+ 'idlr': 32,
+ 'modifyed': 'false',
+ })
+
+ title = data['title']
+
+ formats = []
+
+ video = data.get('video')
+ if isinstance(video, dict):
+ for quality, chapters_key in enumerate(('lowChapters', 'chapters')):
+ video_url = try_get(
+ video, lambda x: x[chapters_key][0]['url'], compat_str)
+ if video_url:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http',
+ 'quality': quality,
+ 'preference': -1,
+ })
+
+ hls_url = try_get(data, lambda x: x['hls_url'], compat_str)
+ if hls_url:
+ hls_url = re.sub(r'maxbr=\d+&?', '', hls_url)
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ self._sort_formats(formats)
+
+ uploader = data.get('editer_name')
+ description = self._html_search_meta(
+ 'description', webpage, default=None)
+ timestamp = unified_timestamp(data.get('f_pgmtime'))
+ duration = float_or_none(try_get(video, lambda x: x['totalLength']))
return {
'id': video_id,
- 'title': api_data['title'],
- 'formats': self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False),
- 'duration': float_or_none(api_data.get('video', {}).get('totalLength')),
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py
index e00bdaf..ae7af2f 100755
--- a/youtube_dl/extractor/cda.py
+++ b/youtube_dl/extractor/cda.py
@@ -24,7 +24,7 @@ class CDAIE(InfoExtractor):
'height': 720,
'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
'description': 'md5:269ccd135d550da90d1662651fcb9772',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'average_rating': float,
'duration': 39
}
@@ -36,7 +36,7 @@ class CDAIE(InfoExtractor):
'ext': 'mp4',
'title': 'Lądowanie na lotnisku na Maderze',
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'crash404',
'view_count': int,
'average_rating': float,
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
index 4ec79d1..4f88c31 100644
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -25,7 +25,7 @@ class CeskaTelevizeIE(InfoExtractor):
'ext': 'mp4',
'title': 'Hyde Park Civilizace',
'description': 'md5:fe93f6eda372d150759d11644ebbfb4a',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 3350,
},
'params': {
@@ -39,7 +39,7 @@ class CeskaTelevizeIE(InfoExtractor):
'ext': 'mp4',
'title': 'Hyde Park Civilizace: Bonus 01 - En',
'description': 'English Subtittles',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 81.3,
},
'params': {
@@ -52,7 +52,7 @@ class CeskaTelevizeIE(InfoExtractor):
'info_dict': {
'id': 402,
'ext': 'mp4',
- 'title': 're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
},
'params': {
@@ -80,7 +80,7 @@ class CeskaTelevizeIE(InfoExtractor):
'id': '61924494877068022',
'ext': 'mp4',
'title': 'Queer: Bogotart (Queer)',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
}],
diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py
index 34d4e61..865dbca 100644
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -31,7 +31,7 @@ class Channel9IE(InfoExtractor):
'title': 'Developer Kick-Off Session: Stuff We Love',
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
'duration': 4576,
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'session_code': 'KOS002',
'session_day': 'Day 1',
'session_room': 'Arena 1A',
@@ -47,7 +47,7 @@ class Channel9IE(InfoExtractor):
'title': 'Self-service BI with Power BI - nuclear testing',
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
'duration': 1540,
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'authors': ['Mike Wilmot'],
},
}, {
@@ -59,7 +59,7 @@ class Channel9IE(InfoExtractor):
'title': 'Ranges for the Standard Library',
'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
'duration': 5646,
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
},
'params': {
'skip_download': True,
diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py
index 4bf2cf7..2d517f2 100644
--- a/youtube_dl/extractor/charlierose.py
+++ b/youtube_dl/extractor/charlierose.py
@@ -13,7 +13,7 @@ class CharlieRoseIE(InfoExtractor):
'id': '27996',
'ext': 'mp4',
'title': 'Remembering Zaha Hadid',
- 'thumbnail': 're:^https?://.*\.jpg\?\d+',
+ 'thumbnail': r're:^https?://.*\.jpg\?\d+',
'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.',
'subtitles': {
'en': [{
diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py
index 29a8820..8fbc91c 100644
--- a/youtube_dl/extractor/chaturbate.py
+++ b/youtube_dl/extractor/chaturbate.py
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import ExtractorError
@@ -31,30 +33,35 @@ class ChaturbateIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- m3u8_url = self._search_regex(
- r'src=(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage,
- 'playlist', default=None, group='url')
+ m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer(
+ r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)]
- if not m3u8_url:
+ if not m3u8_formats:
error = self._search_regex(
[r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
webpage, 'error', group='error', default=None)
if not error:
- if any(p not in webpage for p in (
+ if any(p in webpage for p in (
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
error = self._ROOM_OFFLINE
if error:
raise ExtractorError(error, expected=True)
raise ExtractorError('Unable to find stream URL')
- formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+ formats = []
+ for m3u8_id, m3u8_url in m3u8_formats:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4',
+ # ffmpeg skips segments for fast m3u8
+ preference=-10 if m3u8_id == 'fast' else None,
+ m3u8_id=m3u8_id, fatal=False, live=True))
self._sort_formats(formats)
return {
'id': video_id,
'title': self._live_title(video_id),
- 'thumbnail': 'https://cdn-s.highwebmedia.com/uHK3McUtGCG3SMFcd4ZJsRv8/roomimage/%s.jpg' % video_id,
+ 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id,
'age_limit': self._rta_search(webpage),
'is_live': True,
'formats': formats,
diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py
index f35df14..4815b34 100644
--- a/youtube_dl/extractor/chirbit.py
+++ b/youtube_dl/extractor/chirbit.py
@@ -19,6 +19,7 @@ class ChirbitIE(InfoExtractor):
'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
'duration': 306,
+ 'uploader': 'Gerryaudio',
},
'params': {
'skip_download': True,
@@ -54,6 +55,9 @@ class ChirbitIE(InfoExtractor):
duration = parse_duration(self._search_regex(
r'class=["\']c-length["\'][^>]*>([^<]+)',
webpage, 'duration', fatal=False))
+ uploader = self._search_regex(
+ r'id=["\']chirbit-username["\'][^>]*>([^<]+)',
+ webpage, 'uploader', fatal=False)
return {
'id': audio_id,
@@ -61,6 +65,7 @@ class ChirbitIE(InfoExtractor):
'title': title,
'description': description,
'duration': duration,
+ 'uploader': uploader,
}
diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py
index 252c2e8..ab651d1 100644
--- a/youtube_dl/extractor/cliphunter.py
+++ b/youtube_dl/extractor/cliphunter.py
@@ -30,7 +30,7 @@ class CliphunterIE(InfoExtractor):
'id': '1012420',
'ext': 'flv',
'title': 'Fun Jynx Maze solo',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 18,
},
'skip': 'Video gone',
@@ -41,7 +41,7 @@ class CliphunterIE(InfoExtractor):
'id': '2019449',
'ext': 'mp4',
'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 18,
},
}]
diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dl/extractor/clipsyndicate.py
index 0b6ad89..6cdb42f 100644
--- a/youtube_dl/extractor/clipsyndicate.py
+++ b/youtube_dl/extractor/clipsyndicate.py
@@ -18,7 +18,7 @@ class ClipsyndicateIE(InfoExtractor):
'ext': 'mp4',
'title': 'Brick Briscoe',
'duration': 612,
- 'thumbnail': 're:^https?://.+\.jpg',
+ 'thumbnail': r're:^https?://.+\.jpg',
},
}, {
'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack',
diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py
index f7ee3a8..98f9cb5 100644
--- a/youtube_dl/extractor/clubic.py
+++ b/youtube_dl/extractor/clubic.py
@@ -19,7 +19,7 @@ class ClubicIE(InfoExtractor):
'ext': 'mp4',
'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité',
'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
- 'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$',
+ 'thumbnail': r're:^http://img\.clubic\.com/.*\.jpg$',
}
}, {
'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html',
diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py
index 7d3e9b0..e701fbe 100644
--- a/youtube_dl/extractor/cmt.py
+++ b/youtube_dl/extractor/cmt.py
@@ -1,13 +1,11 @@
from __future__ import unicode_literals
from .mtv import MTVIE
-from ..utils import ExtractorError
class CMTIE(MTVIE):
IE_NAME = 'cmt.com'
- _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)'
- _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
+ _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
@@ -33,17 +31,24 @@ class CMTIE(MTVIE):
}, {
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
'only_matching': True,
+ }, {
+ 'url': 'http://www.cmt.com/full-episodes/537qb3/nashville-the-wayfaring-stranger-season-5-ep-501',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.cmt.com/video-clips/t9e4ci/nashville-juliette-in-2-minutes',
+ 'only_matching': True,
}]
- @classmethod
- def _transform_rtmp_url(cls, rtmp_video_url):
- if 'error_not_available.swf' in rtmp_video_url:
- raise ExtractorError(
- '%s said: video is not available' % cls.IE_NAME, expected=True)
-
- return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
-
def _extract_mgid(self, webpage):
- return self._search_regex(
+ mgid = self._search_regex(
r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
- webpage, 'mgid', group='mgid')
+ webpage, 'mgid', group='mgid', default=None)
+ if not mgid:
+ mgid = self._extract_triforce_mgid(webpage)
+ return mgid
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ mgid = self._extract_mgid(webpage)
+ return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
diff --git a/youtube_dl/extractor/collegerama.py b/youtube_dl/extractor/collegerama.py
index f9e8419..18c7347 100644
--- a/youtube_dl/extractor/collegerama.py
+++ b/youtube_dl/extractor/collegerama.py
@@ -21,7 +21,7 @@ class CollegeRamaIE(InfoExtractor):
'ext': 'mp4',
'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
'description': '',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 7713.088,
'timestamp': 1413309600,
'upload_date': '20141014',
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index 0239dfd..4cac294 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -48,15 +48,7 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
-
- feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed')
- feed = self._parse_json(feed_json, playlist_id)
- zones = feed['manifest']['zones']
-
- video_zone = zones['t2_lc_promo1']
- feed = self._download_json(video_zone['feed'], playlist_id)
- mgid = feed['result']['data']['id']
-
+ mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1')
videos_info = self._get_videos_info(mgid)
return videos_info
@@ -79,7 +71,7 @@ class ToshIE(MTVServicesInfoExtractor):
'ext': 'mp4',
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
'description': 'Tosh asked fans to share their summer plans.',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
# It's really reported to be published on year 2077
'upload_date': '20770610',
'timestamp': 3390510600,
@@ -93,12 +85,6 @@ class ToshIE(MTVServicesInfoExtractor):
'only_matching': True,
}]
- @classmethod
- def _transform_rtmp_url(cls, rtmp_video_url):
- new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url)
- new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm')
- return new_urls
-
class ComedyCentralTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 05c51fa..0b4e2ac 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -59,6 +59,7 @@ from ..utils import (
parse_m3u8_attributes,
extract_attributes,
parse_codecs,
+ urljoin,
)
@@ -120,9 +121,19 @@ class InfoExtractor(object):
download, lower-case.
"http", "https", "rtsp", "rtmp", "rtmpe",
"m3u8", "m3u8_native" or "http_dash_segments".
- * fragments A list of fragments of the fragmented media,
- with the following entries:
- * "url" (mandatory) - fragment's URL
+ * fragment_base_url
+ Base URL for fragments. Each fragment's path
+ value (if present) will be relative to
+ this URL.
+ * fragments A list of fragments of a fragmented media.
+ Each fragment entry must contain either an url
+ or a path. If an url is present it should be
+ considered by a client. Otherwise both path and
+ fragment_base_url must be present. Here is
+ the list of all potential fields:
+ * "url" - fragment's URL
+ * "path" - fragment's path relative to
+ fragment_base_url
* "duration" (optional, int or float)
* "filesize" (optional, int)
* preference Order number of this format. If this field is
@@ -188,9 +199,10 @@ class InfoExtractor(object):
uploader_url: Full URL to a personal webpage of the video uploader.
location: Physical location where the video was filmed.
subtitles: The available subtitles as a dictionary in the format
- {language: subformats}. "subformats" is a list sorted from
- lower to higher preference, each element is a dictionary
- with the "ext" entry and one of:
+ {tag: subformats}. "tag" is usually a language code, and
+ "subformats" is a list sorted from lower to higher
+ preference, each element is a dictionary with the "ext"
+ entry and one of:
* "data": The subtitles file contents
* "url": A URL pointing to the subtitles file
"ext" will be calculated from URL if missing
@@ -1013,13 +1025,13 @@ class InfoExtractor(object):
unique_formats.append(f)
formats[:] = unique_formats
- def _is_valid_url(self, url, video_id, item='video'):
+ def _is_valid_url(self, url, video_id, item='video', headers={}):
url = self._proto_relative_url(url, scheme='http:')
# For now assume non HTTP(S) URLs always valid
if not (url.startswith('http://') or url.startswith('https://')):
return True
try:
- self._request_webpage(url, video_id, 'Checking %s URL' % item)
+ self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
return True
except ExtractorError as e:
if isinstance(e.cause, compat_urllib_error.URLError):
@@ -1224,6 +1236,7 @@ class InfoExtractor(object):
'protocol': entry_protocol,
'preference': preference,
}]
+ audio_in_video_stream = {}
last_info = {}
last_media = {}
for line in m3u8_doc.splitlines():
@@ -1233,25 +1246,32 @@ class InfoExtractor(object):
media = parse_m3u8_attributes(line)
media_type = media.get('TYPE')
if media_type in ('VIDEO', 'AUDIO'):
+ group_id = media.get('GROUP-ID')
media_url = media.get('URI')
if media_url:
format_id = []
- for v in (media.get('GROUP-ID'), media.get('NAME')):
+ for v in (group_id, media.get('NAME')):
if v:
format_id.append(v)
- formats.append({
+ f = {
'format_id': '-'.join(format_id),
'url': format_url(media_url),
'language': media.get('LANGUAGE'),
- 'vcodec': 'none' if media_type == 'AUDIO' else None,
'ext': ext,
'protocol': entry_protocol,
'preference': preference,
- })
+ }
+ if media_type == 'AUDIO':
+ f['vcodec'] = 'none'
+ if group_id and not audio_in_video_stream.get(group_id):
+ audio_in_video_stream[group_id] = False
+ formats.append(f)
else:
# When there is no URI in EXT-X-MEDIA let this tag's
# data be used by regular URI lines below
last_media = media
+ if media_type == 'AUDIO' and group_id:
+ audio_in_video_stream[group_id] = True
elif line.startswith('#') or not line.strip():
continue
else:
@@ -1295,6 +1315,9 @@ class InfoExtractor(object):
'abr': abr,
})
f.update(parse_codecs(last_info.get('CODECS')))
+ if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none':
+ # TODO: update acodec for audio only formats with the same GROUP-ID
+ f['acodec'] = 'none'
formats.append(f)
last_info = {}
last_media = {}
@@ -1614,21 +1637,16 @@ class InfoExtractor(object):
segment_template = element.find(_add_ns('SegmentTemplate'))
if segment_template is not None:
extract_common(segment_template)
- media_template = segment_template.get('media')
- if media_template:
- ms_info['media_template'] = media_template
+ media = segment_template.get('media')
+ if media:
+ ms_info['media'] = media
initialization = segment_template.get('initialization')
if initialization:
- ms_info['initialization_url'] = initialization
+ ms_info['initialization'] = initialization
else:
extract_Initialization(segment_template)
return ms_info
- def combine_url(base_url, target_url):
- if re.match(r'^https?://', target_url):
- return target_url
- return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
-
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats = []
for period in mpd_doc.findall(_add_ns('Period')):
@@ -1668,6 +1686,7 @@ class InfoExtractor(object):
lang = representation_attrib.get('lang')
url_el = representation.find(_add_ns('BaseURL'))
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
+ bandwidth = int_or_none(representation_attrib.get('bandwidth'))
f = {
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
'url': base_url,
@@ -1675,23 +1694,41 @@ class InfoExtractor(object):
'ext': mimetype2ext(mime_type),
'width': int_or_none(representation_attrib.get('width')),
'height': int_or_none(representation_attrib.get('height')),
- 'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
+ 'tbr': int_or_none(bandwidth, 1000),
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
'fps': int_or_none(representation_attrib.get('frameRate')),
- 'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'),
- 'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
'format_note': 'DASH %s' % content_type,
'filesize': filesize,
}
+ f.update(parse_codecs(representation_attrib.get('codecs')))
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
- if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
- media_template = representation_ms_info['media_template']
- media_template = media_template.replace('$RepresentationID$', representation_id)
- media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
- media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
- media_template.replace('$$', '$')
+ def prepare_template(template_name, identifiers):
+ t = representation_ms_info[template_name]
+ t = t.replace('$RepresentationID$', representation_id)
+ t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
+ t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
+ t.replace('$$', '$')
+ return t
+
+ # @initialization is a regular template like @media one
+ # so it should be handled just the same way (see
+ # https://github.com/rg3/youtube-dl/issues/11605)
+ if 'initialization' in representation_ms_info:
+ initialization_template = prepare_template(
+ 'initialization',
+ # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
+ # $Time$ shall not be included for @initialization thus
+ # only $Bandwidth$ remains
+ ('Bandwidth', ))
+ representation_ms_info['initialization_url'] = initialization_template % {
+ 'Bandwidth': bandwidth,
+ }
+
+ if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
+
+ media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
# can't be used at the same time
@@ -1703,7 +1740,7 @@ class InfoExtractor(object):
representation_ms_info['fragments'] = [{
'url': media_template % {
'Number': segment_number,
- 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
+ 'Bandwidth': bandwidth,
},
'duration': segment_duration,
} for segment_number in range(
@@ -1721,7 +1758,7 @@ class InfoExtractor(object):
def add_segment_url():
segment_url = media_template % {
'Time': segment_time,
- 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
+ 'Bandwidth': bandwidth,
'Number': segment_number,
}
representation_ms_info['fragments'].append({
@@ -1744,14 +1781,16 @@ class InfoExtractor(object):
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
# or any YouTube dashsegments video
fragments = []
- s_num = 0
- for segment_url in representation_ms_info['segment_urls']:
- s = representation_ms_info['s'][s_num]
+ segment_index = 0
+ timescale = representation_ms_info['timescale']
+ for s in representation_ms_info['s']:
+ duration = float_or_none(s['d'], timescale)
for r in range(s.get('r', 0) + 1):
fragments.append({
- 'url': segment_url,
- 'duration': float_or_none(s['d'], representation_ms_info['timescale']),
+ 'url': representation_ms_info['segment_urls'][segment_index],
+ 'duration': duration,
})
+ segment_index += 1
representation_ms_info['fragments'] = fragments
# NB: MPD manifest may contain direct URLs to unfragmented media.
# No fragments key is present in this case.
@@ -1761,13 +1800,13 @@ class InfoExtractor(object):
'protocol': 'http_dash_segments',
})
if 'initialization_url' in representation_ms_info:
- initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
+ initialization_url = representation_ms_info['initialization_url']
if not f.get('url'):
f['url'] = initialization_url
f['fragments'].append({'url': initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
for fragment in f['fragments']:
- fragment['url'] = combine_url(base_url, fragment['url'])
+ fragment['url'] = urljoin(base_url, fragment['url'])
try:
existing_format = next(
fo for fo in formats
@@ -1881,7 +1920,7 @@ class InfoExtractor(object):
})
return formats
- def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
+ def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None):
def absolute_url(video_url):
return compat_urlparse.urljoin(base_url, video_url)
@@ -1898,11 +1937,16 @@ class InfoExtractor(object):
def _media_formats(src, cur_media_type):
full_url = absolute_url(src)
- if determine_ext(full_url) == 'm3u8':
+ ext = determine_ext(full_url)
+ if ext == 'm3u8':
is_plain_url = False
formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4',
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
+ elif ext == 'mpd':
+ is_plain_url = False
+ formats = self._extract_mpd_formats(
+ full_url, video_id, mpd_id=mpd_id)
else:
is_plain_url = True
formats = [{
@@ -1915,7 +1959,12 @@ class InfoExtractor(object):
media_tags = [(media_tag, media_type, '')
for media_tag, media_type
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
- media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
+ media_tags.extend(re.findall(
+ # We only allow video|audio followed by a whitespace or '>'.
+ # Allowing more characters may end up in significant slow down (see
+ # https://github.com/rg3/youtube-dl/issues/11979, example URL:
+ # http://www.porntrex.com/maps/videositemap.xml).
+ r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
for media_tag, media_type, media_content in media_tags:
media_info = {
'formats': [],
@@ -1955,10 +2004,13 @@ class InfoExtractor(object):
entries.append(media_info)
return entries
- def _extract_akamai_formats(self, manifest_url, video_id):
+ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
formats = []
hdcore_sign = 'hdcore=3.7.0'
- f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+ f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+ hds_host = hosts.get('hds')
+ if hds_host:
+ f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
if 'hdcore=' not in f4m_url:
f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
f4m_formats = self._extract_f4m_formats(
@@ -1966,7 +2018,10 @@ class InfoExtractor(object):
for entry in f4m_formats:
entry.update({'extra_param_to_segment_url': hdcore_sign})
formats.extend(f4m_formats)
- m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+ m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+ hls_host = hosts.get('hls')
+ if hls_host:
+ m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
diff --git a/youtube_dl/extractor/coub.py b/youtube_dl/extractor/coub.py
index a901b8d..5fa1f00 100644
--- a/youtube_dl/extractor/coub.py
+++ b/youtube_dl/extractor/coub.py
@@ -20,7 +20,7 @@ class CoubIE(InfoExtractor):
'id': '5u5n1',
'ext': 'mp4',
'title': 'The Matrix Moonwalk',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 4.6,
'timestamp': 1428527772,
'upload_date': '20150408',
diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py
index cc68f1c..377fb45 100644
--- a/youtube_dl/extractor/crackle.py
+++ b/youtube_dl/extractor/crackle.py
@@ -6,7 +6,7 @@ from ..utils import int_or_none
class CrackleIE(InfoExtractor):
- _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
+ _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
_TEST = {
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
'info_dict': {
@@ -14,7 +14,7 @@ class CrackleIE(InfoExtractor):
'ext': 'mp4',
'title': 'Everybody Respects A Bloody Nose',
'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 906,
'series': 'Comedians In Cars Getting Coffee',
'season_number': 8,
@@ -31,8 +31,32 @@ class CrackleIE(InfoExtractor):
}
}
+ _THUMBNAIL_RES = [
+ (120, 90),
+ (208, 156),
+ (220, 124),
+ (220, 220),
+ (240, 180),
+ (250, 141),
+ (315, 236),
+ (320, 180),
+ (360, 203),
+ (400, 300),
+ (421, 316),
+ (460, 330),
+ (460, 460),
+ (462, 260),
+ (480, 270),
+ (587, 330),
+ (640, 480),
+ (700, 330),
+ (700, 394),
+ (854, 480),
+ (1024, 1024),
+ (1920, 1080),
+ ]
+
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
- _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
_MEDIA_FILE_SLOTS = {
'c544.flv': {
'width': 544,
@@ -61,17 +85,25 @@ class CrackleIE(InfoExtractor):
item = self._download_xml(
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
- video_id).find('i')
+ video_id, headers=self.geo_verification_headers()).find('i')
title = item.attrib['t']
subtitles = {}
formats = self._extract_m3u8_formats(
'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id),
video_id, 'mp4', m3u8_id='hls', fatal=None)
- thumbnail = None
+ thumbnails = []
path = item.attrib.get('p')
if path:
- thumbnail = self._THUMBNAIL_TEMPLATE % path
+ for width, height in self._THUMBNAIL_RES:
+ res = '%dx%d' % (width, height)
+ thumbnails.append({
+ 'id': res,
+ 'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res),
+ 'width': width,
+ 'height': height,
+ 'resolution': res,
+ })
http_base_url = 'http://ahttp.crackle.com/' + path
for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
formats.append({
@@ -86,10 +118,11 @@ class CrackleIE(InfoExtractor):
if locale and v:
if locale not in subtitles:
subtitles[locale] = []
- subtitles[locale] = [{
- 'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v),
- 'ext': 'ttml',
- }]
+ for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')):
+ subtitles.setdefault(locale, []).append({
+ 'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext),
+ 'ext': ext,
+ })
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
return {
@@ -100,7 +133,7 @@ class CrackleIE(InfoExtractor):
'series': item.attrib.get('sn'),
'season_number': int_or_none(item.attrib.get('se')),
'episode_number': int_or_none(item.attrib.get('ep')),
- 'thumbnail': thumbnail,
+ 'thumbnails': thumbnails,
'subtitles': subtitles,
'formats': formats,
}
diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py
index cf6a5d6..f7815b9 100644
--- a/youtube_dl/extractor/criterion.py
+++ b/youtube_dl/extractor/criterion.py
@@ -14,7 +14,7 @@ class CriterionIE(InfoExtractor):
'ext': 'mp4',
'title': 'Le Samouraï',
'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}
diff --git a/youtube_dl/extractor/crooksandliars.py b/youtube_dl/extractor/crooksandliars.py
index 443eb76..7fb782d 100644
--- a/youtube_dl/extractor/crooksandliars.py
+++ b/youtube_dl/extractor/crooksandliars.py
@@ -16,7 +16,7 @@ class CrooksAndLiarsIE(InfoExtractor):
'ext': 'mp4',
'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!',
'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1428207000,
'upload_date': '20150405',
'uploader': 'Heather',
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 8d5b69f..109d1c5 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -142,7 +142,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
'ext': 'flv',
'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Danny Choo Network',
'upload_date': '20120213',
},
@@ -158,7 +158,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
'ext': 'mp4',
'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'TV TOKYO',
'upload_date': '20160508',
},
@@ -167,6 +167,25 @@ class CrunchyrollIE(CrunchyrollBaseIE):
'skip_download': True,
},
}, {
+ 'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
+ 'info_dict': {
+ 'id': '727589',
+ 'ext': 'mp4',
+ 'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance from this Judicial Injustice!",
+ 'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Kadokawa Pictures Inc.',
+ 'upload_date': '20170118',
+ 'series': "KONOSUBA -God's blessing on this wonderful world!",
+ 'season_number': 2,
+ 'episode': 'Give Me Deliverance from this Judicial Injustice!',
+ 'episode_number': 1,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
'only_matching': True,
}, {
@@ -236,8 +255,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
- output += """ScaledBorderAndShadow: no
-
+ output += """
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
"""
@@ -439,6 +457,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
subtitles = self.extract_subtitles(video_id, webpage)
+ # webpage provide more accurate data than series_title from XML
+ series = self._html_search_regex(
+ r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)',
+ webpage, 'series', default=xpath_text(metadata, 'series_title'))
+
+ episode = xpath_text(metadata, 'episode_title')
+ episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
+
+ season_number = int_or_none(self._search_regex(
+ r'(?s)<h4[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h4>\s*<h4>\s*Season (\d+)',
+ webpage, 'season number', default=None))
+
return {
'id': video_id,
'title': video_title,
@@ -446,9 +476,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'thumbnail': xpath_text(metadata, 'episode_image_url'),
'uploader': video_uploader,
'upload_date': video_upload_date,
- 'series': xpath_text(metadata, 'series_title'),
- 'episode': xpath_text(metadata, 'episode_title'),
- 'episode_number': int_or_none(xpath_text(metadata, 'episode_number')),
+ 'series': series,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
'subtitles': subtitles,
'formats': formats,
}
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py
index 7e5d4f2..d457616 100644
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -12,6 +12,7 @@ from ..utils import (
ExtractorError,
)
from .senateisvp import SenateISVPIE
+from .ustream import UstreamIE
class CSpanIE(InfoExtractor):
@@ -22,14 +23,13 @@ class CSpanIE(InfoExtractor):
'md5': '94b29a4f131ff03d23471dd6f60b6a1d',
'info_dict': {
'id': '315139',
- 'ext': 'mp4',
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
- 'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.',
},
+ 'playlist_mincount': 2,
'skip': 'Regularly fails on travis, for unknown reasons',
}, {
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
- 'md5': '8e5fbfabe6ad0f89f3012a7943c1287b',
+ # md5 is unstable
'info_dict': {
'id': 'c4486943',
'ext': 'mp4',
@@ -38,14 +38,11 @@ class CSpanIE(InfoExtractor):
}
}, {
'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
- 'md5': '2ae5051559169baadba13fc35345ae74',
'info_dict': {
'id': '342759',
- 'ext': 'mp4',
'title': 'General Motors Ignition Switch Recall',
- 'duration': 14848,
- 'description': 'md5:118081aedd24bf1d3b68b3803344e7f3'
},
+ 'playlist_mincount': 6,
}, {
# Video from senate.gov
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
@@ -57,12 +54,30 @@ class CSpanIE(InfoExtractor):
'params': {
'skip_download': True, # m3u8 downloads
}
+ }, {
+ # Ustream embedded video
+ 'url': 'https://www.c-span.org/video/?114917-1/armed-services',
+ 'info_dict': {
+ 'id': '58428542',
+ 'ext': 'flv',
+ 'title': 'USHR07 Armed Services Committee',
+ 'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee',
+ 'timestamp': 1423060374,
+ 'upload_date': '20150204',
+ 'uploader': 'HouseCommittee',
+ 'uploader_id': '12987475',
+ },
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_type = None
webpage = self._download_webpage(url, video_id)
+
+ ustream_url = UstreamIE._extract_url(webpage)
+ if ustream_url:
+ return self.url_result(ustream_url, UstreamIE.ie_key())
+
# We first look for clipid, because clipprog always appears before
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py
index 83ca90c..d565335 100644
--- a/youtube_dl/extractor/ctsnews.py
+++ b/youtube_dl/extractor/ctsnews.py
@@ -28,7 +28,7 @@ class CtsNewsIE(InfoExtractor):
'ext': 'mp4',
'title': '韓國31歲童顏男 貌如十多歲小孩',
'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1378205880,
'upload_date': '20130903',
}
@@ -41,7 +41,7 @@ class CtsNewsIE(InfoExtractor):
'ext': 'mp4',
'title': 'iPhone6熱銷 蘋果財報亮眼',
'description': 'md5:f395d4f485487bb0f992ed2c4b07aa7d',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20150128',
'uploader_id': 'TBSCTS',
'uploader': '中華電視公司',
diff --git a/youtube_dl/extractor/ctvnews.py b/youtube_dl/extractor/ctvnews.py
index 1023b61..55a127b 100644
--- a/youtube_dl/extractor/ctvnews.py
+++ b/youtube_dl/extractor/ctvnews.py
@@ -8,7 +8,7 @@ from ..utils import orderedSet
class CTVNewsIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
+ _VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
@@ -40,6 +40,9 @@ class CTVNewsIE(InfoExtractor):
}, {
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
'only_matching': True,
+ }, {
+ 'url': 'http://vancouverisland.ctvnews.ca/video?clipId=761241',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/cultureunplugged.py b/youtube_dl/extractor/cultureunplugged.py
index 9f26fa5..bcdf273 100644
--- a/youtube_dl/extractor/cultureunplugged.py
+++ b/youtube_dl/extractor/cultureunplugged.py
@@ -21,7 +21,7 @@ class CultureUnpluggedIE(InfoExtractor):
'ext': 'mp4',
'title': 'The Next, Best West',
'description': 'md5:0423cd00833dea1519cf014e9d0903b1',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'creator': 'Coldstream Creative',
'duration': 2203,
'view_count': int,
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 4a3314e..31bf5fa 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -58,7 +58,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'ext': 'mp4',
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
'description': 'Several come bundled with the Steam Controller.',
- 'thumbnail': 're:^https?:.*\.(?:jpg|png)$',
+ 'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
'duration': 74,
'timestamp': 1425657362,
'upload_date': '20150306',
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py
index 732b436..76f0218 100644
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -32,7 +32,7 @@ class DaumIE(InfoExtractor):
'title': '마크 헌트 vs 안토니오 실바',
'description': 'Mark Hunt vs Antonio Silva',
'upload_date': '20131217',
- 'thumbnail': 're:^https?://.*\.(?:jpg|png)',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'duration': 2117,
'view_count': int,
'comment_count': int,
@@ -45,7 +45,7 @@ class DaumIE(InfoExtractor):
'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
'description': 'md5:79794514261164ff27e36a21ad229fc5',
'upload_date': '20150604',
- 'thumbnail': 're:^https?://.*\.(?:jpg|png)',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'duration': 154,
'view_count': int,
'comment_count': int,
@@ -61,7 +61,7 @@ class DaumIE(InfoExtractor):
'title': '01-Korean War ( Trouble on the horizon )',
'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름',
'upload_date': '20080223',
- 'thumbnail': 're:^https?://.*\.(?:jpg|png)',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'duration': 249,
'view_count': int,
'comment_count': int,
@@ -139,7 +139,7 @@ class DaumClipIE(InfoExtractor):
'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
'upload_date': '20130831',
- 'thumbnail': 're:^https?://.*\.(?:jpg|png)',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'duration': 3868,
'view_count': int,
},
diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py
index 6d880d4..f232f0d 100644
--- a/youtube_dl/extractor/dbtv.py
+++ b/youtube_dl/extractor/dbtv.py
@@ -17,7 +17,7 @@ class DBTVIE(InfoExtractor):
'ext': 'mp4',
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
- 'thumbnail': 're:https?://.*\.jpg',
+ 'thumbnail': r're:https?://.*\.jpg',
'timestamp': 1404039863,
'upload_date': '20140629',
'duration': 69.544,
diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py
index 14ba887..00fbbff 100644
--- a/youtube_dl/extractor/dctp.py
+++ b/youtube_dl/extractor/dctp.py
@@ -17,7 +17,7 @@ class DctpTvIE(InfoExtractor):
'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm',
'upload_date': '20110407',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}
diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py
index 7a07f32..ec87b94 100644
--- a/youtube_dl/extractor/deezer.py
+++ b/youtube_dl/extractor/deezer.py
@@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor):
'id': '176747451',
'title': 'Best!',
'uploader': 'Anonymous',
- 'thumbnail': 're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$',
+ 'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$',
},
'playlist_count': 30,
'skip': 'Only available in .de',
diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py
index 44e0c5d..aee72a6 100644
--- a/youtube_dl/extractor/dhm.py
+++ b/youtube_dl/extractor/dhm.py
@@ -17,7 +17,7 @@ class DHMIE(InfoExtractor):
'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
'description': 'md5:1fabd480c153f97b07add61c44407c82',
'duration': 660,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
@@ -26,7 +26,7 @@ class DHMIE(InfoExtractor):
'id': 'rolle-1',
'ext': 'flv',
'title': 'ROLLE 1',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}]
diff --git a/youtube_dl/extractor/digiteka.py b/youtube_dl/extractor/digiteka.py
index 7bb79ff..3dfde0d 100644
--- a/youtube_dl/extractor/digiteka.py
+++ b/youtube_dl/extractor/digiteka.py
@@ -36,7 +36,7 @@ class DigitekaIE(InfoExtractor):
'id': 's8uk0r',
'ext': 'mp4',
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 74,
'upload_date': '20150317',
'timestamp': 1426604939,
@@ -50,7 +50,7 @@ class DigitekaIE(InfoExtractor):
'id': 'xvpfp8',
'ext': 'mp4',
'title': 'Two - C\'est La Vie (clip)',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 233,
'upload_date': '20150224',
'timestamp': 1424760500,
diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py
index c4e83b2..2042493 100644
--- a/youtube_dl/extractor/discoverygo.py
+++ b/youtube_dl/extractor/discoverygo.py
@@ -6,7 +6,6 @@ from ..utils import (
extract_attributes,
int_or_none,
parse_age_limit,
- unescapeHTML,
ExtractorError,
)
@@ -49,7 +48,7 @@ class DiscoveryGoIE(InfoExtractor):
webpage, 'video container'))
video = self._parse_json(
- unescapeHTML(container.get('data-video') or container.get('data-json')),
+ container.get('data-video') or container.get('data-json'),
display_id)
title = video['name']
diff --git a/youtube_dl/extractor/disney.py b/youtube_dl/extractor/disney.py
new file mode 100644
index 0000000..396873c
--- /dev/null
+++ b/youtube_dl/extractor/disney.py
@@ -0,0 +1,115 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_strdate,
+ compat_str,
+ determine_ext,
+)
+
+
+class DisneyIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|starwars\.com))/(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})'''
+ _TESTS = [{
+ 'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
+ 'info_dict': {
+ 'id': '545ed1857afee5a0ec239977',
+ 'ext': 'mp4',
+ 'title': 'Moana - Trailer',
+ 'description': 'A fun adventure for the entire Family! Bring home Moana on Digital HD Feb 21 & Blu-ray March 7',
+ 'upload_date': '20170112',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.en.disneyme.com/watch/future-worm/robo-carp-2001-544b66002aa7353cdd3f5114',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.disneyturkiye.com.tr/izle/7c-7-cuceler/kimin-sesi-zaten-5456f3d015f6b36c8afdd0e2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://disneyjunior.disney.com/embed/546a4798ddba3d1612e4005d',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, video_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(
+ 'http://%s/embed/%s' % (domain, video_id), video_id)
+ video_data = self._parse_json(self._search_regex(
+ r'Disney\.EmbedVideo=({.+});', webpage, 'embed data'), video_id)['video']
+
+ for external in video_data.get('externals', []):
+ if external.get('source') == 'vevo':
+ return self.url_result('vevo:' + external['data_id'], 'Vevo')
+
+ title = video_data['title']
+
+ formats = []
+ for flavor in video_data.get('flavors', []):
+ flavor_format = flavor.get('format')
+ flavor_url = flavor.get('url')
+ if not flavor_url or not re.match(r'https?://', flavor_url):
+ continue
+ tbr = int_or_none(flavor.get('bitrate'))
+ if tbr == 99999:
+ formats.extend(self._extract_m3u8_formats(
+ flavor_url, video_id, 'mp4', m3u8_id=flavor_format, fatal=False))
+ continue
+ format_id = []
+ if flavor_format:
+ format_id.append(flavor_format)
+ if tbr:
+ format_id.append(compat_str(tbr))
+ ext = determine_ext(flavor_url)
+ if flavor_format == 'applehttp' or ext == 'm3u8':
+ ext = 'mp4'
+ width = int_or_none(flavor.get('width'))
+ height = int_or_none(flavor.get('height'))
+ formats.append({
+ 'format_id': '-'.join(format_id),
+ 'url': flavor_url,
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'ext': ext,
+ 'vcodec': 'none' if (width == 0 and height == 0) else None,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for caption in video_data.get('captions', []):
+ caption_url = caption.get('url')
+ caption_format = caption.get('format')
+ if not caption_url or caption_format.startswith('unknown'):
+ continue
+ subtitles.setdefault(caption.get('language', 'en'), []).append({
+ 'url': caption_url,
+ 'ext': {
+ 'webvtt': 'vtt',
+ }.get(caption_format, caption_format),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description') or video_data.get('short_desc'),
+ 'thumbnail': video_data.get('thumb') or video_data.get('thumb_secure'),
+ 'duration': int_or_none(video_data.get('duration_sec')),
+ 'upload_date': unified_strdate(video_data.get('publish_date')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py
index e366e17..9115944 100644
--- a/youtube_dl/extractor/douyutv.py
+++ b/youtube_dl/extractor/douyutv.py
@@ -18,7 +18,7 @@ from ..utils import (
class DouyuTVIE(InfoExtractor):
IE_DESC = '斗鱼'
- _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?P<id>[A-Za-z0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?:[^/]+/)*(?P<id>[A-Za-z0-9]+)'
_TESTS = [{
'url': 'http://www.douyutv.com/iseven',
'info_dict': {
@@ -26,8 +26,8 @@ class DouyuTVIE(InfoExtractor):
'display_id': 'iseven',
'ext': 'flv',
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'description': 're:.*m7show@163\.com.*',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'description': r're:.*m7show@163\.com.*',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '7师傅',
'is_live': True,
},
@@ -42,7 +42,7 @@ class DouyuTVIE(InfoExtractor):
'ext': 'flv',
'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'douyu小漠',
'is_live': True,
},
@@ -57,8 +57,8 @@ class DouyuTVIE(InfoExtractor):
'display_id': '17732',
'ext': 'flv',
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'description': 're:.*m7show@163\.com.*',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'description': r're:.*m7show@163\.com.*',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '7师傅',
'is_live': True,
},
@@ -68,6 +68,10 @@ class DouyuTVIE(InfoExtractor):
}, {
'url': 'http://www.douyu.com/xiaocang',
'only_matching': True,
+ }, {
+ # \"room_id\"
+ 'url': 'http://www.douyu.com/t/lpl',
+ 'only_matching': True,
}]
# Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf
@@ -82,7 +86,7 @@ class DouyuTVIE(InfoExtractor):
else:
page = self._download_webpage(url, video_id)
room_id = self._html_search_regex(
- r'"room_id"\s*:\s*(\d+),', page, 'room id')
+ r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
room = self._download_json(
'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py
index 5790553..32028bc 100644
--- a/youtube_dl/extractor/dplay.py
+++ b/youtube_dl/extractor/dplay.py
@@ -8,6 +8,7 @@ import time
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
+ USER_AGENTS,
int_or_none,
update_url_query,
)
@@ -102,10 +103,16 @@ class DPlayIE(InfoExtractor):
manifest_url, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
- # ourselves
+ # ourselves. Also fragments' URLs are only served signed for
+ # Safari user agent.
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
for m3u8_format in m3u8_formats:
- m3u8_format['url'] = update_url_query(m3u8_format['url'], query)
+ m3u8_format.update({
+ 'url': update_url_query(m3u8_format['url'], query),
+ 'http_headers': {
+ 'User-Agent': USER_AGENTS['Safari'],
+ },
+ })
formats.extend(m3u8_formats)
elif protocol == 'hds':
formats.extend(self._extract_f4m_formats(
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
index c115956..bcd9fe2 100644
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -66,7 +66,7 @@ class DramaFeverBaseIE(AMPIE):
class DramaFeverIE(DramaFeverBaseIE):
IE_NAME = 'dramafever'
- _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
+ _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
_TESTS = [{
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
'info_dict': {
@@ -76,7 +76,7 @@ class DramaFeverIE(DramaFeverBaseIE):
'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
'episode': 'Episode 1',
'episode_number': 1,
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1404336058,
'upload_date': '20140702',
'duration': 343,
@@ -94,7 +94,7 @@ class DramaFeverIE(DramaFeverBaseIE):
'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91',
'episode': 'Mnet Asian Music Awards 2015 - Part 3',
'episode_number': 4,
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1450213200,
'upload_date': '20151215',
'duration': 5602,
@@ -103,6 +103,9 @@ class DramaFeverIE(DramaFeverBaseIE):
# m3u8 download
'skip_download': True,
},
+ }, {
+ 'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -148,7 +151,7 @@ class DramaFeverIE(DramaFeverBaseIE):
class DramaFeverSeriesIE(DramaFeverBaseIE):
IE_NAME = 'dramafever:series'
- _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
+ _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
_TESTS = [{
'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
'info_dict': {
diff --git a/youtube_dl/extractor/drbonanza.py b/youtube_dl/extractor/drbonanza.py
index 01271f8..79ec212 100644
--- a/youtube_dl/extractor/drbonanza.py
+++ b/youtube_dl/extractor/drbonanza.py
@@ -20,7 +20,7 @@ class DRBonanzaIE(InfoExtractor):
'ext': 'mp4',
'title': 'Talkshowet - Leonard Cohen',
'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca',
- 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
'timestamp': 1295537932,
'upload_date': '20110120',
'duration': 3664,
@@ -36,7 +36,7 @@ class DRBonanzaIE(InfoExtractor):
'ext': 'mp3',
'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission',
'description': 'md5:501e5a195749480552e214fbbed16c4e',
- 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
'timestamp': 1223274900,
'upload_date': '20081006',
'duration': 7369,
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py
index 908c9e5..f138025 100644
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -2,10 +2,19 @@ from __future__ import unicode_literals
import re
-from .zdf import ZDFIE
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_strdate,
+ xpath_text,
+ determine_ext,
+ qualities,
+ float_or_none,
+ ExtractorError,
+)
-class DreiSatIE(ZDFIE):
+class DreiSatIE(InfoExtractor):
IE_NAME = '3sat'
_VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TESTS = [
@@ -31,6 +40,163 @@ class DreiSatIE(ZDFIE):
},
]
+ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+ param_groups = {}
+ for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
+ group_id = param_group.attrib.get(self._xpath_ns('id', 'http://www.w3.org/XML/1998/namespace'))
+ params = {}
+ for param in param_group:
+ params[param.get('name')] = param.get('value')
+ param_groups[group_id] = params
+
+ formats = []
+ for video in smil.findall(self._xpath_ns('.//video', namespace)):
+ src = video.get('src')
+ if not src:
+ continue
+ bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+ group_id = video.get('paramGroup')
+ param_group = param_groups[group_id]
+ for proto in param_group['protocols'].split(','):
+ formats.append({
+ 'url': '%s://%s' % (proto, param_group['host']),
+ 'app': param_group['app'],
+ 'play_path': src,
+ 'ext': 'flv',
+ 'format_id': '%s-%d' % (proto, bitrate),
+ 'tbr': bitrate,
+ })
+ self._sort_formats(formats)
+ return formats
+
+ def extract_from_xml_url(self, video_id, xml_url):
+ doc = self._download_xml(
+ xml_url, video_id,
+ note='Downloading video info',
+ errnote='Failed to download video info')
+
+ status_code = doc.find('./status/statuscode')
+ if status_code is not None and status_code.text != 'ok':
+ code = status_code.text
+ if code == 'notVisibleAnymore':
+ message = 'Video %s is not available' % video_id
+ else:
+ message = '%s returned error: %s' % (self.IE_NAME, code)
+ raise ExtractorError(message, expected=True)
+
+ title = doc.find('.//information/title').text
+ description = xpath_text(doc, './/information/detail', 'description')
+ duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
+ uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
+ uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
+ upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))
+
+ def xml_to_thumbnails(fnode):
+ thumbnails = []
+ for node in fnode:
+ thumbnail_url = node.text
+ if not thumbnail_url:
+ continue
+ thumbnail = {
+ 'url': thumbnail_url,
+ }
+ if 'key' in node.attrib:
+ m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key'])
+ if m:
+ thumbnail['width'] = int(m.group(1))
+ thumbnail['height'] = int(m.group(2))
+ thumbnails.append(thumbnail)
+ return thumbnails
+
+ thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage'))
+
+ format_nodes = doc.findall('.//formitaeten/formitaet')
+ quality = qualities(['veryhigh', 'high', 'med', 'low'])
+
+ def get_quality(elem):
+ return quality(xpath_text(elem, 'quality'))
+ format_nodes.sort(key=get_quality)
+ format_ids = []
+ formats = []
+ for fnode in format_nodes:
+ video_url = fnode.find('url').text
+ is_available = 'http://www.metafilegenerator' not in video_url
+ if not is_available:
+ continue
+ format_id = fnode.attrib['basetype']
+ quality = xpath_text(fnode, './quality', 'quality')
+ format_m = re.match(r'''(?x)
+ (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
+ (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
+ ''', format_id)
+
+ ext = determine_ext(video_url, None) or format_m.group('container')
+ if ext not in ('smil', 'f4m', 'm3u8'):
+ format_id = format_id + '-' + quality
+ if format_id in format_ids:
+ continue
+
+ if ext == 'meta':
+ continue
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ video_url, video_id, fatal=False))
+ elif ext == 'm3u8':
+ # the certificates are misconfigured (see
+ # https://github.com/rg3/youtube-dl/issues/8665)
+ if video_url.startswith('https://'):
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id=format_id, fatal=False))
+ else:
+ proto = format_m.group('proto').lower()
+
+ abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000)
+ vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000)
+
+ width = int_or_none(xpath_text(fnode, './width', 'width'))
+ height = int_or_none(xpath_text(fnode, './height', 'height'))
+
+ filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize'))
+
+ format_note = ''
+ if not format_note:
+ format_note = None
+
+ formats.append({
+ 'format_id': format_id,
+ 'url': video_url,
+ 'ext': ext,
+ 'acodec': format_m.group('acodec'),
+ 'vcodec': format_m.group('vcodec'),
+ 'abr': abr,
+ 'vbr': vbr,
+ 'width': width,
+ 'height': height,
+ 'filesize': filesize,
+ 'format_note': format_note,
+ 'protocol': proto,
+ '_available': is_available,
+ })
+ format_ids.append(format_id)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py
index 22da8e4..1eca82b 100644
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dl/extractor/drtuber.py
@@ -22,7 +22,7 @@ class DrTuberIE(InfoExtractor):
'like_count': int,
'comment_count': int,
'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'],
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'age_limit': 18,
}
}, {
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py
index 88d096b..e966d74 100644
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@@ -9,12 +9,13 @@ from ..utils import (
mimetype2ext,
parse_iso8601,
remove_end,
+ update_url_query,
)
class DRTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
-
+ _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
+ IE_NAME = 'drtv'
_TESTS = [{
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
'md5': '25e659cccc9a2ed956110a299fdf5983',
@@ -79,9 +80,10 @@ class DRTVIE(InfoExtractor):
subtitles = {}
for asset in data['Assets']:
- if asset.get('Kind') == 'Image':
+ kind = asset.get('Kind')
+ if kind == 'Image':
thumbnail = asset.get('Uri')
- elif asset.get('Kind') == 'VideoResource':
+ elif kind in ('VideoResource', 'AudioResource'):
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
restricted_to_denmark = asset.get('RestrictedToDenmark')
spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
@@ -96,9 +98,13 @@ class DRTVIE(InfoExtractor):
preference = -1
format_id += '-spoken-subtitles'
if target == 'HDS':
- formats.extend(self._extract_f4m_formats(
+ f4m_formats = self._extract_f4m_formats(
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
- video_id, preference, f4m_id=format_id))
+ video_id, preference, f4m_id=format_id)
+ if kind == 'AudioResource':
+ for f in f4m_formats:
+ f['vcodec'] = 'none'
+ formats.extend(f4m_formats)
elif target == 'HLS':
formats.extend(self._extract_m3u8_formats(
uri, video_id, 'mp4', entry_protocol='m3u8_native',
@@ -112,6 +118,7 @@ class DRTVIE(InfoExtractor):
'format_id': format_id,
'tbr': int_or_none(bitrate),
'ext': link.get('FileFormat'),
+ 'vcodec': 'none' if kind == 'AudioResource' else None,
})
subtitles_list = asset.get('SubtitlesList')
if isinstance(subtitles_list, list):
@@ -144,3 +151,58 @@ class DRTVIE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
}
+
+
+class DRTVLiveIE(InfoExtractor):
+ IE_NAME = 'drtv:live'
+ _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
+ _TEST = {
+ 'url': 'https://www.dr.dk/tv/live/dr1',
+ 'info_dict': {
+ 'id': 'dr1',
+ 'ext': 'mp4',
+ 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ channel_data = self._download_json(
+ 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
+ channel_id)
+ title = self._live_title(channel_data['Title'])
+
+ formats = []
+ for streaming_server in channel_data.get('StreamingServers', []):
+ server = streaming_server.get('Server')
+ if not server:
+ continue
+ link_type = streaming_server.get('LinkType')
+ for quality in streaming_server.get('Qualities', []):
+ for stream in quality.get('Streams', []):
+ stream_path = stream.get('Stream')
+ if not stream_path:
+ continue
+ stream_url = update_url_query(
+ '%s/%s' % (server, stream_path), {'b': ''})
+ if link_type == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, channel_id, 'mp4',
+ m3u8_id=link_type, fatal=False, live=True))
+ elif link_type == 'HDS':
+ formats.extend(self._extract_f4m_formats(update_url_query(
+ '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
+ channel_id, f4m_id=link_type, fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': channel_id,
+ 'title': title,
+ 'thumbnail': channel_data.get('PrimaryImageUri'),
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py
index e5aadcd..c9fc9b5 100644
--- a/youtube_dl/extractor/dumpert.py
+++ b/youtube_dl/extractor/dumpert.py
@@ -21,7 +21,7 @@ class DumpertIE(InfoExtractor):
'ext': 'mp4',
'title': 'Ik heb nieuws voor je',
'description': 'Niet schrikken hoor',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/',
diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py
index c2f593e..76d39ad 100644
--- a/youtube_dl/extractor/eagleplatform.py
+++ b/youtube_dl/extractor/eagleplatform.py
@@ -31,7 +31,7 @@ class EaglePlatformIE(InfoExtractor):
'ext': 'mp4',
'title': 'Навальный вышел на свободу',
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 87,
'view_count': int,
'age_limit': 0,
@@ -45,7 +45,7 @@ class EaglePlatformIE(InfoExtractor):
'id': '12820',
'ext': 'mp4',
'title': "'O Sole Mio",
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 216,
'view_count': int,
},
diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py
new file mode 100644
index 0000000..db92146
--- /dev/null
+++ b/youtube_dl/extractor/egghead.py
@@ -0,0 +1,39 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class EggheadCourseIE(InfoExtractor):
+ IE_DESC = 'egghead.io course'
+ IE_NAME = 'egghead:course'
+ _VALID_URL = r'https://egghead\.io/courses/(?P<id>[a-zA-Z_0-9-]+)'
+ _TEST = {
+ 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
+ 'playlist_count': 29,
+ 'info_dict': {
+ 'id': 'professor-frisby-introduces-composable-functional-javascript',
+ 'title': 'Professor Frisby Introduces Composable Functional JavaScript',
+ 'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
+ },
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ title = self._html_search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'title')
+ ul = self._search_regex(r'(?s)<ul class="series-lessons-list">(.*?)</ul>', webpage, 'session list')
+
+ found = re.findall(r'(?s)<a class="[^"]*"\s*href="([^"]+)">\s*<li class="item', ul)
+ entries = [self.url_result(m) for m in found]
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'entries': entries,
+ }
diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py
index 443865a..6ca07a1 100644
--- a/youtube_dl/extractor/einthusan.py
+++ b/youtube_dl/extractor/einthusan.py
@@ -19,7 +19,7 @@ class EinthusanIE(InfoExtractor):
'id': '2447',
'ext': 'mp4',
'title': 'Ek Villain',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:9d29fc91a7abadd4591fb862fa560d93',
}
},
@@ -30,7 +30,7 @@ class EinthusanIE(InfoExtractor):
'id': '1671',
'ext': 'mp4',
'title': 'Soodhu Kavvuum',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c',
}
},
diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py
index 8c725a4..99e00cf 100644
--- a/youtube_dl/extractor/elpais.py
+++ b/youtube_dl/extractor/elpais.py
@@ -2,7 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import strip_jsonp, unified_strdate
class ElPaisIE(InfoExtractor):
@@ -29,6 +29,16 @@ class ElPaisIE(InfoExtractor):
'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
'upload_date': '20160303',
}
+ }, {
+ 'url': 'http://elpais.com/elpais/2017/01/26/ciencia/1485456786_417876.html',
+ 'md5': '9c79923a118a067e1a45789e1e0b0f9c',
+ 'info_dict': {
+ 'id': '1485456786_417876',
+ 'ext': 'mp4',
+ 'title': 'Hallado un barco de la antigua Roma que naufragó en Baleares hace 1.800 años',
+ 'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas',
+ 'upload_date': '20170127',
+ },
}]
def _real_extract(self, url):
@@ -37,8 +47,15 @@ class ElPaisIE(InfoExtractor):
prefix = self._html_search_regex(
r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
- video_suffix = self._search_regex(
- r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
+ id_multimedia = self._search_regex(
+ r"id_multimedia\s*=\s*'([^']+)'", webpage, 'ID multimedia', default=None)
+ if id_multimedia:
+ url_info = self._download_json(
+ 'http://elpais.com/vdpep/1/?pepid=' + id_multimedia, video_id, transform_source=strip_jsonp)
+ video_suffix = url_info['mp4']
+ else:
+ video_suffix = self._search_regex(
+ r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
video_url = prefix + video_suffix
thumbnail_suffix = self._search_regex(
r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py
index 297f8a6..c08643a 100644
--- a/youtube_dl/extractor/eroprofile.py
+++ b/youtube_dl/extractor/eroprofile.py
@@ -22,7 +22,7 @@ class EroProfileIE(InfoExtractor):
'display_id': 'sexy-babe-softcore',
'ext': 'm4v',
'title': 'sexy babe softcore',
- 'thumbnail': 're:https?://.*\.jpg',
+ 'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
}
}, {
@@ -32,7 +32,7 @@ class EroProfileIE(InfoExtractor):
'id': '1133519',
'ext': 'm4v',
'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
- 'thumbnail': 're:https?://.*\.jpg',
+ 'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
},
'skip': 'Requires login',
diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py
index a3d7bbb..4d8a3c1 100644
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@@ -45,7 +45,7 @@ class EscapistIE(InfoExtractor):
'ext': 'mp4',
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
'title': "Breaking Down Baldur's Gate",
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 264,
'uploader': 'The Escapist',
}
@@ -57,7 +57,7 @@ class EscapistIE(InfoExtractor):
'ext': 'mp4',
'description': 'This week, Zero Punctuation reviews Evolve.',
'title': 'Evolve - One vs Multiplayer',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 304,
'uploader': 'The Escapist',
}
diff --git a/youtube_dl/extractor/esri.py b/youtube_dl/extractor/esri.py
index d4205d7..e9dcaeb 100644
--- a/youtube_dl/extractor/esri.py
+++ b/youtube_dl/extractor/esri.py
@@ -22,7 +22,7 @@ class EsriVideoIE(InfoExtractor):
'ext': 'mp4',
'title': 'ArcGIS Online - Developing Applications',
'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 185,
'upload_date': '20120419',
}
diff --git a/youtube_dl/extractor/europa.py b/youtube_dl/extractor/europa.py
index adc4391..1efc0b2 100644
--- a/youtube_dl/extractor/europa.py
+++ b/youtube_dl/extractor/europa.py
@@ -23,7 +23,7 @@ class EuropaIE(InfoExtractor):
'ext': 'mp4',
'title': 'TRADE - Wikileaks on TTIP',
'description': 'NEW LIVE EC Midday press briefing of 11/08/2015',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20150811',
'duration': 34,
'view_count': int,
diff --git a/youtube_dl/extractor/expotv.py b/youtube_dl/extractor/expotv.py
index ef11962..95a8977 100644
--- a/youtube_dl/extractor/expotv.py
+++ b/youtube_dl/extractor/expotv.py
@@ -17,7 +17,7 @@ class ExpoTVIE(InfoExtractor):
'ext': 'mp4',
'title': 'NYX Butter Lipstick Little Susie',
'description': 'Goes on like butter, but looks better!',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Stephanie S.',
'upload_date': '20150520',
'view_count': int,
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 46d007b..12cda36 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -30,7 +30,10 @@ from .aenetworks import (
AENetworksIE,
HistoryTopicIE,
)
-from .afreecatv import AfreecaTVIE
+from .afreecatv import (
+ AfreecaTVIE,
+ AfreecaTVGlobalIE,
+)
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
@@ -38,10 +41,7 @@ from .amcnetworks import AMCNetworksIE
from .animeondemand import AnimeOnDemandIE
from .anitube import AnitubeIE
from .anysex import AnySexIE
-from .aol import (
- AolIE,
- AolFeaturesIE,
-)
+from .aol import AolIE
from .allocine import AllocineIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
@@ -80,6 +80,10 @@ from .awaan import (
AWAANLiveIE,
AWAANSeasonIE,
)
+from .azmedien import (
+ AZMedienIE,
+ AZMedienPlaylistIE,
+)
from .azubu import AzubuIE, AzubuLiveIE
from .baidu import BaiduVideoIE
from .bambuser import BambuserIE, BambuserChannelIE
@@ -91,6 +95,7 @@ from .bbc import (
BBCCoUkPlaylistIE,
BBCIE,
)
+from .beampro import BeamProLiveIE
from .beeg import BeegIE
from .behindkink import BehindKinkIE
from .bellmedia import BellMediaIE
@@ -98,7 +103,10 @@ from .beatport import BeatportIE
from .bet import BetIE
from .bigflix import BigflixIE
from .bild import BildIE
-from .bilibili import BiliBiliIE
+from .bilibili import (
+ BiliBiliIE,
+ BiliBiliBangumiIE,
+)
from .biobiochiletv import BioBioChileTVIE
from .biqle import BIQLEIE
from .bleacherreport import (
@@ -150,6 +158,7 @@ from .cbsnews import (
)
from .cbssports import CBSSportsIE
from .ccc import CCCIE
+from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
from .ceskatelevize import CeskaTelevizeIE
@@ -239,12 +248,16 @@ from .dramafever import (
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
from .drtuber import DrTuberIE
-from .drtv import DRTVIE
+from .drtv import (
+ DRTVIE,
+ DRTVLiveIE,
+)
from .dvtv import DVTVIE
from .dumpert import DumpertIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .discoverygo import DiscoveryGoIE
+from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
from .dropbox import DropboxIE
from .dw import (
@@ -254,6 +267,7 @@ from .dw import (
from .eagleplatform import EaglePlatformIE
from .ebaumsworld import EbaumsWorldIE
from .echomsk import EchoMskIE
+from .egghead import EggheadCourseIE
from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .einthusan import EinthusanIE
@@ -288,6 +302,10 @@ from .fc2 import (
FC2EmbedIE,
)
from .fczenit import FczenitIE
+from .filmon import (
+ FilmOnIE,
+ FilmOnChannelIE,
+)
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE
@@ -319,7 +337,6 @@ from .francetv import (
)
from .freesound import FreesoundIE
from .freespeech import FreespeechIE
-from .freevideo import FreeVideoIE
from .funimation import FunimationIE
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
@@ -332,6 +349,7 @@ from .gameone import (
from .gamersyde import GamersydeIE
from .gamespot import GameSpotIE
from .gamestar import GameStarIE
+from .gaskrank import GaskrankIE
from .gazeta import GazetaIE
from .gdcvault import GDCVaultIE
from .generic import GenericIE
@@ -369,6 +387,7 @@ from .hgtv import (
)
from .historicfilms import HistoricFilmsIE
from .hitbox import HitboxIE, HitboxLiveIE
+from .hitrecord import HitRecordIE
from .hornbunny import HornBunnyIE
from .hotnewhiphop import HotNewHipHopIE
from .hotstar import HotStarIE
@@ -396,6 +415,7 @@ from .imgur import (
ImgurAlbumIE,
)
from .ina import InaIE
+from .inc import IncIE
from .indavideo import (
IndavideoIE,
IndavideoEmbedIE,
@@ -406,6 +426,7 @@ from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE
from .iqiyi import IqiyiIE
from .ir90tv import Ir90TvIE
+from .itv import ITVIE
from .ivi import (
IviIE,
IviCompilationIE
@@ -446,7 +467,10 @@ from .kuwo import (
KuwoMvIE,
)
from .la7 import LA7IE
-from .laola1tv import Laola1TvIE
+from .laola1tv import (
+ Laola1TvEmbedIE,
+ Laola1TvIE,
+)
from .lci import LCIIE
from .lcp import (
LcpPlayIE,
@@ -498,6 +522,8 @@ from .mangomolo import (
)
from .matchtv import MatchTVIE
from .mdr import MDRIE
+from .meipai import MeipaiIE
+from .melonvod import MelonVODIE
from .meta import METAIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
@@ -539,6 +565,7 @@ from .mtv import (
MTVVideoIE,
MTVServicesEmbeddedIE,
MTVDEIE,
+ MTV81IE,
)
from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE
@@ -588,6 +615,7 @@ from .nextmedia import (
NextMediaIE,
NextMediaActionNewsIE,
AppleDailyIE,
+ NextTVIE,
)
from .nfb import NFBIE
from .nfl import NFLIE
@@ -649,6 +677,9 @@ from .nrk import (
NRKPlaylistIE,
NRKSkoleIE,
NRKTVIE,
+ NRKTVDirekteIE,
+ NRKTVEpisodesIE,
+ NRKTVSeriesIE,
)
from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
@@ -661,6 +692,7 @@ from .nzz import NZZIE
from .odatv import OdaTVIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
+from .ondemandkorea import OnDemandKoreaIE
from .onet import (
OnetIE,
OnetChannelIE,
@@ -691,6 +723,7 @@ from .periscope import (
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
+from .piksel import PikselIE
from .pinkbike import PinkbikeIE
from .pladform import PladformIE
from .playfm import PlayFMIE
@@ -710,6 +743,7 @@ from .polskieradio import (
)
from .porn91 import Porn91IE
from .porncom import PornComIE
+from .pornflip import PornFlipIE
from .pornhd import PornHdIE
from .pornhub import (
PornHubIE,
@@ -804,7 +838,6 @@ from .sbs import SBSIE
from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE
-from .screenjunkies import ScreenJunkiesIE
from .seeker import SeekerIE
from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE
@@ -815,7 +848,7 @@ from .shared import (
SharedIE,
VivoIE,
)
-from .sharesix import ShareSixIE
+from .showroomlive import ShowRoomLiveIE
from .sina import SinaIE
from .sixplay import SixPlayIE
from .skynewsarabia import (
@@ -859,10 +892,7 @@ from .spiegeltv import SpiegeltvIE
from .spike import SpikeIE
from .stitcher import StitcherIE
from .sport5 import Sport5IE
-from .sportbox import (
- SportBoxIE,
- SportBoxEmbedIE,
-)
+from .sportbox import SportBoxEmbedIE
from .sportdeutschland import SportDeutschlandIE
from .sportschau import SportschauIE
from .srgssr import (
@@ -966,6 +996,7 @@ from .tv2 import (
)
from .tv3 import TV3IE
from .tv4 import TV4IE
+from .tva import TVAIE
from .tvanouvelles import (
TVANouvellesIE,
TVANouvellesArticleIE,
@@ -998,7 +1029,10 @@ from .twitch import (
TwitchChapterIE,
TwitchVodIE,
TwitchProfileIE,
+ TwitchAllVideosIE,
+ TwitchUploadsIE,
TwitchPastBroadcastsIE,
+ TwitchHighlightsIE,
TwitchStreamIE,
TwitchClipsIE,
)
@@ -1012,6 +1046,7 @@ from .udemy import (
UdemyCourseIE
)
from .udn import UDNEmbedIE
+from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE
from .unistra import UnistraIE
from .uol import UOLIE
@@ -1051,6 +1086,7 @@ from .vice import (
from .viceland import VicelandIE
from .vidbit import VidbitIE
from .viddler import ViddlerIE
+from .videa import VideaIE
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE
@@ -1060,7 +1096,7 @@ from .videomore import (
VideomoreSeasonIE,
)
from .videopremium import VideoPremiumIE
-from .videott import VideoTtIE
+from .videopress import VideoPressIE
from .vidio import VidioIE
from .vidme import (
VidmeIE,
@@ -1095,12 +1131,20 @@ from .viki import (
VikiIE,
VikiChannelIE,
)
+from .viu import (
+ ViuIE,
+ ViuPlaylistIE,
+ ViuOTTIE,
+)
from .vk import (
VKIE,
VKUserVideosIE,
VKWallPostIE,
)
-from .vlive import VLiveIE
+from .vlive import (
+ VLiveIE,
+ VLiveChannelIE
+)
from .vodlocker import VodlockerIE
from .vodplatform import VODPlatformIE
from .voicerepublic import VoiceRepublicIE
@@ -1109,6 +1153,7 @@ from .vporn import VpornIE
from .vrt import VRTIE
from .vube import VubeIE
from .vuclip import VuClipIE
+from .vvvvid import VVVVIDIE
from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE
from .walla import WallaIE
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index b4d38e5..b325c82 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -12,14 +12,16 @@ from ..compat import (
compat_urllib_parse_unquote_plus,
)
from ..utils import (
+ clean_html,
error_to_compat_str,
ExtractorError,
+ get_element_by_id,
int_or_none,
+ js_to_json,
limit_length,
sanitized_Request,
+ try_get,
urlencode_postdata,
- get_element_by_id,
- clean_html,
)
@@ -27,7 +29,7 @@ class FacebookIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
https?://
- (?:[\w-]+\.)?facebook\.com/
+ (?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/
(?:[^#]*?\#!/)?
(?:
(?:
@@ -71,7 +73,7 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '274175099429670',
'ext': 'mp4',
- 'title': 'Facebook video #274175099429670',
+ 'title': 'Asif Nawab Butt posted a video to his Timeline.',
'uploader': 'Asif Nawab Butt',
'upload_date': '20140506',
'timestamp': 1399398998,
@@ -150,6 +152,9 @@ class FacebookIE(InfoExtractor):
}, {
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
+ 'only_matching': True,
}]
@staticmethod
@@ -240,12 +245,30 @@ class FacebookIE(InfoExtractor):
video_data = None
+ def extract_video_data(instances):
+ for item in instances:
+ if item[1][0] == 'VideoConfig':
+ video_item = item[2][0]
+ if video_item.get('video_id') == video_id:
+ return video_item['videoData']
+
server_js_data = self._parse_json(self._search_regex(
- r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
- for item in server_js_data.get('instances', []):
- if item[1][0] == 'VideoConfig':
- video_data = item[2][0]['videoData']
- break
+ r'handleServerJS\(({.+})(?:\);|,")', webpage,
+ 'server js data', default='{}'), video_id, fatal=False)
+
+ if server_js_data:
+ video_data = extract_video_data(server_js_data.get('instances', []))
+
+ if not video_data:
+ server_js_data = self._parse_json(
+ self._search_regex(
+ r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet',
+ webpage, 'js data', default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+ if server_js_data:
+ video_data = extract_video_data(try_get(
+ server_js_data, lambda x: x['jsmods']['instances'],
+ list) or [])
if not video_data:
if not fatal_if_no_video:
@@ -255,6 +278,8 @@ class FacebookIE(InfoExtractor):
raise ExtractorError(
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
expected=True)
+ elif '>You must log in to continue' in webpage:
+ self.raise_login_required()
else:
raise ExtractorError('Cannot parse data')
@@ -293,10 +318,16 @@ class FacebookIE(InfoExtractor):
video_title = self._html_search_regex(
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
webpage, 'alternative title', default=None)
- video_title = limit_length(video_title, 80)
if not video_title:
+ video_title = self._html_search_meta(
+ 'description', webpage, 'title')
+ if video_title:
+ video_title = limit_length(video_title, 80)
+ else:
video_title = 'Facebook video #%s' % video_id
- uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
+ uploader = clean_html(get_element_by_id(
+ 'fbPhotoPageAuthorName', webpage)) or self._search_regex(
+ r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None))
diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py
index c032d4d..448647d 100644
--- a/youtube_dl/extractor/fc2.py
+++ b/youtube_dl/extractor/fc2.py
@@ -133,7 +133,7 @@ class FC2EmbedIE(InfoExtractor):
'id': '201403223kCqB3Ez',
'ext': 'flv',
'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}
diff --git a/youtube_dl/extractor/filmon.py b/youtube_dl/extractor/filmon.py
new file mode 100644
index 0000000..f775fe0
--- /dev/null
+++ b/youtube_dl/extractor/filmon.py
@@ -0,0 +1,178 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_HTTPError,
+)
+from ..utils import (
+ qualities,
+ strip_or_none,
+ int_or_none,
+ ExtractorError,
+)
+
+
+class FilmOnIE(InfoExtractor):
+ IE_NAME = 'filmon'
+ _VALID_URL = r'(?:https?://(?:www\.)?filmon\.com/vod/view/|filmon:)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space',
+ 'info_dict': {
+ 'id': '24869',
+ 'ext': 'mp4',
+ 'title': 'Plan 9 From Outer Space',
+ 'description': 'Dead human, zombies and vampires',
+ },
+ }, {
+ 'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1',
+ 'info_dict': {
+ 'id': '2825',
+ 'title': 'Popeye Series 1',
+ 'description': 'The original series of Popeye.',
+ },
+ 'playlist_mincount': 8,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ try:
+ response = self._download_json(
+ 'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
+ video_id)['response']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason']
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
+ raise
+
+ title = response['title']
+ description = strip_or_none(response.get('description'))
+
+ if response.get('type_id') == 1:
+ entries = [self.url_result('filmon:' + episode_id) for episode_id in response.get('episodes', [])]
+ return self.playlist_result(entries, video_id, title, description)
+
+ QUALITY = qualities(('low', 'high'))
+ formats = []
+ for format_id, stream in response.get('streams', {}).items():
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': stream_url,
+ 'ext': 'mp4',
+ 'quality': QUALITY(stream.get('quality')),
+ 'protocol': 'm3u8_native',
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ poster = response.get('poster', {})
+ thumbs = poster.get('thumbs', {})
+ thumbs['poster'] = poster
+ for thumb_id, thumb in thumbs.items():
+ thumb_url = thumb.get('url')
+ if not thumb_url:
+ continue
+ thumbnails.append({
+ 'id': thumb_id,
+ 'url': thumb_url,
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ }
+
+
+class FilmOnChannelIE(InfoExtractor):
+ IE_NAME = 'filmon:channel'
+ _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)'
+ _TESTS = [{
+ # VOD
+ 'url': 'http://www.filmon.com/tv/sports-haters',
+ 'info_dict': {
+ 'id': '4190',
+ 'ext': 'mp4',
+ 'title': 'Sports Haters',
+ 'description': 'md5:dabcb4c1d9cfc77085612f1a85f8275d',
+ },
+ }, {
+ # LIVE
+ 'url': 'https://www.filmon.com/channel/filmon-sports',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.filmon.com/tv/2894',
+ 'only_matching': True,
+ }]
+
+ _THUMBNAIL_RES = [
+ ('logo', 56, 28),
+ ('big_logo', 106, 106),
+ ('extra_big_logo', 300, 300),
+ ]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ try:
+ channel_data = self._download_json(
+ 'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message']
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
+ raise
+
+ channel_id = compat_str(channel_data['id'])
+ is_live = not channel_data.get('is_vod') and not channel_data.get('is_vox')
+ title = channel_data['title']
+
+ QUALITY = qualities(('low', 'high'))
+ formats = []
+ for stream in channel_data.get('streams', []):
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ if not is_live:
+ formats.extend(self._extract_wowza_formats(
+ stream_url, channel_id, skip_protocols=['dash', 'rtmp', 'rtsp']))
+ continue
+ quality = stream.get('quality')
+ formats.append({
+ 'format_id': quality,
+ # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats
+ # because it doesn't have bitrate variants anyway
+ 'url': stream_url,
+ 'ext': 'mp4',
+ 'quality': QUALITY(quality),
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for name, width, height in self._THUMBNAIL_RES:
+ thumbnails.append({
+ 'id': name,
+ 'url': 'http://static.filmon.com/assets/channels/%s/%s.png' % (channel_id, name),
+ 'width': width,
+ 'height': height,
+ })
+
+ return {
+ 'id': channel_id,
+ 'display_id': channel_data.get('alias'),
+ 'title': self._live_title(title) if is_live else title,
+ 'description': channel_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py
index 6b662cc..081c718 100644
--- a/youtube_dl/extractor/firsttv.py
+++ b/youtube_dl/extractor/firsttv.py
@@ -2,7 +2,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
from ..utils import (
int_or_none,
qualities,
@@ -22,9 +25,8 @@ class FirstTVIE(InfoExtractor):
'info_dict': {
'id': '40049',
'ext': 'mp4',
- 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
- 'description': 'md5:36a39c1d19618fec57d12efe212a8370',
- 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
+ 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
'upload_date': '20150212',
'duration': 2694,
},
@@ -34,9 +36,8 @@ class FirstTVIE(InfoExtractor):
'info_dict': {
'id': '364746',
'ext': 'mp4',
- 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
- 'description': 'md5:a242eea0031fd180a4497d52640a9572',
- 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
+ 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
'upload_date': '20160407',
'duration': 179,
'formats': 'mincount:3',
@@ -44,6 +45,17 @@ class FirstTVIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'http://www.1tv.ru/news/issue/2016-12-01/14:00',
+ 'info_dict': {
+ 'id': '14:00',
+ 'title': 'Выпуск новостей в 14:00 1 декабря 2016 года. Новости. Первый канал',
+ 'description': 'md5:2e921b948f8c1ff93901da78ebdb1dfd',
+ },
+ 'playlist_count': 13,
+ }, {
+ 'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -51,43 +63,91 @@ class FirstTVIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
playlist_url = compat_urlparse.urljoin(url, self._search_regex(
- r'data-playlist-url="([^"]+)', webpage, 'playlist url'))
+ r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'playlist url', group='url'))
+
+ parsed_url = compat_urlparse.urlparse(playlist_url)
+ qs = compat_urlparse.parse_qs(parsed_url.query)
+ item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]')
+
+ items = self._download_json(playlist_url, display_id)
+
+ if item_ids:
+ items = [
+ item for item in items
+ if item.get('uid') and compat_str(item['uid']) in item_ids]
+ else:
+ items = [items[0]]
+
+ entries = []
+ QUALITIES = ('ld', 'sd', 'hd', )
+
+ for item in items:
+ title = item['title']
+ quality = qualities(QUALITIES)
+ formats = []
+ path = None
+ for f in item.get('mbr', []):
+ src = f.get('src')
+ if not src or not isinstance(src, compat_str):
+ continue
+ tbr = int_or_none(self._search_regex(
+ r'_(\d{3,})\.mp4', src, 'tbr', default=None))
+ if not path:
+ path = self._search_regex(
+ r'//[^/]+/(.+?)_\d+\.mp4', src,
+ 'm3u8 path', default=None)
+ formats.append({
+ 'url': src,
+ 'format_id': f.get('name'),
+ 'tbr': tbr,
+ 'source_preference': quality(f.get('name')),
+ })
+ # m3u8 URL format is reverse engineered from [1] (search for
+ # master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
+ # is taken from [2].
+ # 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted
+ # 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834
+ if not path and len(formats) == 1:
+ path = self._search_regex(
+ r'//[^/]+/(.+?$)', formats[0]['url'],
+ 'm3u8 path', default=None)
+ if path:
+ if len(formats) == 1:
+ m3u8_path = ','
+ else:
+ tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)]
+ m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4')
+ formats.extend(self._extract_m3u8_formats(
+ 'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8'
+ % (path, m3u8_path),
+ display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
+ duration = int_or_none(item.get('duration') or self._html_search_meta(
+ 'video:duration', webpage, 'video duration', fatal=False))
+ upload_date = unified_strdate(self._html_search_meta(
+ 'ya:ovs:upload_date', webpage, 'upload date', default=None))
- item = self._download_json(playlist_url, display_id)[0]
- video_id = item['id']
- quality = qualities(('ld', 'sd', 'hd', ))
- formats = []
- for f in item.get('mbr', []):
- src = f.get('src')
- if not src:
- continue
- fname = f.get('name')
- formats.append({
- 'url': src,
- 'format_id': fname,
- 'quality': quality(fname),
+ entries.append({
+ 'id': compat_str(item.get('id') or item['uid']),
+ 'thumbnail': thumbnail,
+ 'title': title,
+ 'upload_date': upload_date,
+ 'duration': int_or_none(duration),
+ 'formats': formats
})
- self._sort_formats(formats)
title = self._html_search_regex(
(r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
r"'title'\s*:\s*'([^']+)'"),
- webpage, 'title', default=None) or item['title']
+ webpage, 'title', default=None) or self._og_search_title(
+ webpage, default=None)
description = self._html_search_regex(
r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
webpage, 'description', default=None) or self._html_search_meta(
- 'description', webpage, 'description')
- duration = int_or_none(self._html_search_meta(
- 'video:duration', webpage, 'video duration', fatal=False))
- upload_date = unified_strdate(self._html_search_meta(
- 'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
+ 'description', webpage, 'description', default=None)
- return {
- 'id': video_id,
- 'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage),
- 'title': title,
- 'description': description,
- 'upload_date': upload_date,
- 'duration': int_or_none(duration),
- 'formats': formats
- }
+ return self.playlist_result(entries, display_id, title, description)
diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py
index 13fbc4d..15736c9 100644
--- a/youtube_dl/extractor/fivetv.py
+++ b/youtube_dl/extractor/fivetv.py
@@ -25,7 +25,7 @@ class FiveTVIE(InfoExtractor):
'ext': 'mp4',
'title': 'Россияне выбрали имя для общенациональной платежной системы',
'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 180,
},
}, {
@@ -35,7 +35,7 @@ class FiveTVIE(InfoExtractor):
'ext': 'mp4',
'title': '3D принтер',
'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 180,
},
}, {
@@ -44,7 +44,7 @@ class FiveTVIE(InfoExtractor):
'id': 'glavnoe',
'ext': 'mp4',
'title': 'Итоги недели с 8 по 14 июня 2015 года',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py
index a3a2915..2958452 100644
--- a/youtube_dl/extractor/fktv.py
+++ b/youtube_dl/extractor/fktv.py
@@ -19,7 +19,7 @@ class FKTVIE(InfoExtractor):
'id': '1',
'ext': 'mp4',
'title': 'Folge 1 vom 10. April 2007',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}
diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py
index 1902a23..b7be40f 100644
--- a/youtube_dl/extractor/flipagram.py
+++ b/youtube_dl/extractor/flipagram.py
@@ -81,7 +81,7 @@ class FlipagramIE(InfoExtractor):
'filesize': int_or_none(cover.get('size')),
} for cover in flipagram.get('covers', []) if cover.get('url')]
- # Note that this only retrieves comments that are initally loaded.
+ # Note that this only retrieves comments that are initially loaded.
# For videos with large amounts of comments, most won't be retrieved.
comments = []
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
diff --git a/youtube_dl/extractor/foxgay.py b/youtube_dl/extractor/foxgay.py
index 39174fc..e887ae4 100644
--- a/youtube_dl/extractor/foxgay.py
+++ b/youtube_dl/extractor/foxgay.py
@@ -20,7 +20,7 @@ class FoxgayIE(InfoExtractor):
'title': 'Fuck Turkish-style',
'description': 'md5:6ae2d9486921891efe89231ace13ffdf',
'age_limit': 18,
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
},
}
diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py
index 229bcb1..dc0662f 100644
--- a/youtube_dl/extractor/foxnews.py
+++ b/youtube_dl/extractor/foxnews.py
@@ -22,7 +22,7 @@ class FoxNewsIE(AMPIE):
'duration': 265,
'timestamp': 1304411491,
'upload_date': '20110503',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
@@ -36,7 +36,7 @@ class FoxNewsIE(AMPIE):
'duration': 292,
'timestamp': 1417662047,
'upload_date': '20141204',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
# m3u8 download
@@ -111,7 +111,7 @@ class FoxNewsInsiderIE(InfoExtractor):
'description': 'Is campus censorship getting out of control?',
'timestamp': 1472168725,
'upload_date': '20160825',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
# m3u8 download
diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py
index 56048ff..b98da69 100644
--- a/youtube_dl/extractor/franceculture.py
+++ b/youtube_dl/extractor/franceculture.py
@@ -17,7 +17,7 @@ class FranceCultureIE(InfoExtractor):
'display_id': 'rendez-vous-au-pays-des-geeks',
'ext': 'mp3',
'title': 'Rendez-vous au pays des geeks',
- 'thumbnail': 're:^https?://.*\\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140301',
'vcodec': 'none',
}
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index e7068d1..48d43ae 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -168,7 +168,7 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
'id': 'NI_173343',
'ext': 'mp4',
'title': 'Les entreprises familiales : le secret de la réussite',
- 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
'timestamp': 1433273139,
'upload_date': '20150602',
},
@@ -184,7 +184,7 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
'ext': 'mp4',
'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"',
'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
- 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
'timestamp': 1458300695,
'upload_date': '20160318',
},
diff --git a/youtube_dl/extractor/freesound.py b/youtube_dl/extractor/freesound.py
index 5ff62af..138b6bc 100644
--- a/youtube_dl/extractor/freesound.py
+++ b/youtube_dl/extractor/freesound.py
@@ -3,10 +3,16 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ get_element_by_class,
+ get_element_by_id,
+ unified_strdate,
+)
class FreesoundIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P<id>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/[^/]+/sounds/(?P<id>[^/]+)'
_TEST = {
'url': 'http://www.freesound.org/people/miklovan/sounds/194503/',
'md5': '12280ceb42c81f19a515c745eae07650',
@@ -14,26 +20,60 @@ class FreesoundIE(InfoExtractor):
'id': '194503',
'ext': 'mp3',
'title': 'gulls in the city.wav',
- 'uploader': 'miklovan',
'description': 'the sounds of seagulls in the city',
+ 'duration': 130.233,
+ 'uploader': 'miklovan',
+ 'upload_date': '20130715',
+ 'tags': list,
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- music_id = mobj.group('id')
- webpage = self._download_webpage(url, music_id)
- title = self._html_search_regex(
- r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',
- webpage, 'music title', flags=re.DOTALL)
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, audio_id)
+
+ audio_url = self._og_search_property('audio', webpage, 'song url')
+ title = self._og_search_property('audio:title', webpage, 'song title')
+
description = self._html_search_regex(
- r'<div id="sound_description">(.*?)</div>', webpage, 'description',
- fatal=False, flags=re.DOTALL)
+ r'(?s)id=["\']sound_description["\'][^>]*>(.+?)</div>',
+ webpage, 'description', fatal=False)
+
+ duration = float_or_none(
+ get_element_by_class('duration', webpage), scale=1000)
+
+ upload_date = unified_strdate(get_element_by_id('sound_date', webpage))
+ uploader = self._og_search_property(
+ 'audio:artist', webpage, 'uploader', fatal=False)
+
+ channels = self._html_search_regex(
+ r'Channels</dt><dd>(.+?)</dd>', webpage,
+ 'channels info', fatal=False)
+
+ tags_str = get_element_by_class('tags', webpage)
+ tags = re.findall(r'<a[^>]+>([^<]+)', tags_str) if tags_str else None
+
+ audio_urls = [audio_url]
+
+ LQ_FORMAT = '-lq.mp3'
+ if LQ_FORMAT in audio_url:
+ audio_urls.append(audio_url.replace(LQ_FORMAT, '-hq.mp3'))
+
+ formats = [{
+ 'url': format_url,
+ 'format_note': channels,
+ 'quality': quality,
+ } for quality, format_url in enumerate(audio_urls)]
+ self._sort_formats(formats)
return {
- 'id': music_id,
+ 'id': audio_id,
'title': title,
- 'url': self._og_search_property('audio', webpage, 'music url'),
- 'uploader': self._og_search_property('audio:artist', webpage, 'music uploader'),
'description': description,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'tags': tags,
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/freevideo.py b/youtube_dl/extractor/freevideo.py
deleted file mode 100644
index cd8423a..0000000
--- a/youtube_dl/extractor/freevideo.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import ExtractorError
-
-
-class FreeVideoIE(InfoExtractor):
- _VALID_URL = r'^https?://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
-
- _TEST = {
- 'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html',
- 'info_dict': {
- 'id': 'vysukany-zadecek-22033',
- 'ext': 'mp4',
- 'title': 'vysukany-zadecek-22033',
- 'age_limit': 18,
- },
- 'skip': 'Blocked outside .cz',
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage, handle = self._download_webpage_handle(url, video_id)
- if '//www.czechav.com/' in handle.geturl():
- raise ExtractorError(
- 'Access to freevideo is blocked from your location',
- expected=True)
-
- video_url = self._search_regex(
- r'\s+url: "(http://[a-z0-9-]+.cdn.freevideo.cz/stream/.*?/video.mp4)"',
- webpage, 'video URL')
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': video_id,
- 'age_limit': 18,
- }
diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py
index 0ad0d9b..eba00cd 100644
--- a/youtube_dl/extractor/funimation.py
+++ b/youtube_dl/extractor/funimation.py
@@ -29,7 +29,7 @@ class FunimationIE(InfoExtractor):
'ext': 'mp4',
'title': 'Air - 1 - Breeze',
'description': 'md5:1769f43cd5fc130ace8fd87232207892',
- 'thumbnail': 're:https?://.*\.jpg',
+ 'thumbnail': r're:https?://.*\.jpg',
},
'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
}, {
@@ -40,7 +40,7 @@ class FunimationIE(InfoExtractor):
'ext': 'mp4',
'title': '.hack//SIGN - 1 - Role Play',
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
- 'thumbnail': 're:https?://.*\.jpg',
+ 'thumbnail': r're:https?://.*\.jpg',
},
'skip': 'Access without user interaction is forbidden by CloudFlare',
}, {
@@ -51,7 +51,7 @@ class FunimationIE(InfoExtractor):
'ext': 'mp4',
'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
- 'thumbnail': 're:https?://.*\.(?:jpg|png)',
+ 'thumbnail': r're:https?://.*\.(?:jpg|png)',
},
'skip': 'Access without user interaction is forbidden by CloudFlare',
}]
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index f2928b5..81c0ce9 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -17,7 +17,7 @@ class FunnyOrDieIE(InfoExtractor):
'ext': 'mp4',
'title': 'Heart-Shaped Box: Literal Video Version',
'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
- 'thumbnail': 're:^http:.*\.jpg$',
+ 'thumbnail': r're:^http:.*\.jpg$',
},
}, {
'url': 'http://www.funnyordie.com/embed/e402820827',
@@ -26,7 +26,7 @@ class FunnyOrDieIE(InfoExtractor):
'ext': 'mp4',
'title': 'Please Use This Song (Jon Lajoie)',
'description': 'Please use this to sell something. www.jonlajoie.com',
- 'thumbnail': 're:^http:.*\.jpg$',
+ 'thumbnail': r're:^http:.*\.jpg$',
},
'params': {
'skip_download': True,
diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py
index b4ab4cb..ede729b 100644
--- a/youtube_dl/extractor/fusion.py
+++ b/youtube_dl/extractor/fusion.py
@@ -29,7 +29,7 @@ class FusionIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
ooyala_code = self._search_regex(
- r'data-video-id=(["\'])(?P<code>.+?)\1',
+ r'data-ooyala-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
webpage, 'ooyala code', group='code')
return OoyalaIE._build_url_result(ooyala_code)
diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py
index d545e01..a218a69 100644
--- a/youtube_dl/extractor/gamersyde.py
+++ b/youtube_dl/extractor/gamersyde.py
@@ -20,7 +20,7 @@ class GamersydeIE(InfoExtractor):
'ext': 'mp4',
'duration': 372,
'title': 'Bloodborne - Birth of a hero',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index 4e859e0..682c49e 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -63,7 +63,7 @@ class GameSpotIE(OnceIE):
streams, ('progressive_hd', 'progressive_high', 'progressive_low'))
if progressive_url and manifest_url:
qualities_basename = self._search_regex(
- '/([^/]+)\.csmil/',
+ r'/([^/]+)\.csmil/',
manifest_url, 'qualities basename', default=None)
if qualities_basename:
QUALITIES_RE = r'((,\d+)+,?)'
diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py
index 55a3460..e607d6a 100644
--- a/youtube_dl/extractor/gamestar.py
+++ b/youtube_dl/extractor/gamestar.py
@@ -18,7 +18,7 @@ class GameStarIE(InfoExtractor):
'ext': 'mp4',
'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1406542020,
'upload_date': '20140728',
'duration': 17
diff --git a/youtube_dl/extractor/gaskrank.py b/youtube_dl/extractor/gaskrank.py
new file mode 100644
index 0000000..36ba7d8
--- /dev/null
+++ b/youtube_dl/extractor/gaskrank.py
@@ -0,0 +1,123 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ unified_strdate,
+)
+
+
+class GaskrankIE(InfoExtractor):
+ """InfoExtractor for gaskrank.tv"""
+ _VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
+ _TESTS = [
+ {
+ 'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
+ 'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
+ 'info_dict': {
+ 'id': '201601/26955',
+ 'ext': 'mp4',
+ 'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'categories': ['motorrad-fun'],
+ 'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
+ 'uploader_id': 'Bikefun',
+ 'upload_date': '20170110',
+ 'uploader_url': None,
+ }
+ },
+ {
+ 'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
+ 'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
+ 'info_dict': {
+ 'id': '201106/15920',
+ 'ext': 'mp4',
+ 'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'categories': ['racing'],
+ 'display_id': 'isle-of-man-tt-2011-michael-du-15920',
+ 'uploader_id': 'IOM',
+ 'upload_date': '20160506',
+ 'uploader_url': 'www.iomtt.com',
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ """extract information from gaskrank.tv"""
+ def fix_json(code):
+ """Removes trailing comma in json: {{},} --> {{}}"""
+ return re.sub(r',\s*}', r'}', js_to_json(code))
+
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ categories = [re.match(self._VALID_URL, url).group('categories')]
+ title = self._search_regex(
+ r'movieName\s*:\s*\'([^\']*)\'',
+ webpage, 'title')
+ thumbnail = self._search_regex(
+ r'poster\s*:\s*\'([^\']*)\'',
+ webpage, 'thumbnail', default=None)
+
+ mobj = re.search(
+ r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
+ webpage)
+ if mobj is not None:
+ uploader_id = mobj.groupdict().get('uploader_id')
+ upload_date = unified_strdate(mobj.groupdict().get('upload_date'))
+
+ uploader_url = self._search_regex(
+ r'Homepage:\s*<[^>]*>(?P<uploader_url>[^<]*)',
+ webpage, 'uploader_url', default=None)
+ tags = re.findall(
+ r'/tv/tags/[^/]+/"\s*>(?P<tag>[^<]*?)<',
+ webpage)
+
+ view_count = self._search_regex(
+ r'class\s*=\s*"gkRight"(?:[^>]*>\s*<[^>]*)*icon-eye-open(?:[^>]*>\s*<[^>]*)*>\s*(?P<view_count>[0-9\.]*)',
+ webpage, 'view_count', default=None)
+ if view_count:
+ view_count = int_or_none(view_count.replace('.', ''))
+
+ average_rating = self._search_regex(
+ r'itemprop\s*=\s*"ratingValue"[^>]*>\s*(?P<average_rating>[0-9,]+)',
+ webpage, 'average_rating')
+ if average_rating:
+ average_rating = float_or_none(average_rating.replace(',', '.'))
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'playlist\s*:\s*\[([^\]]*)\]',
+ webpage, 'playlist', default='{}'),
+ display_id, transform_source=fix_json, fatal=False)
+
+ video_id = self._search_regex(
+ r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
+ playlist.get('0').get('src'), 'video id')
+
+ formats = []
+ for key in playlist:
+ formats.append({
+ 'url': playlist[key]['src'],
+ 'format_id': key,
+ 'quality': playlist[key].get('quality')})
+ self._sort_formats(formats, field_preference=['format_id'])
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'categories': categories,
+ 'display_id': display_id,
+ 'uploader_id': uploader_id,
+ 'upload_date': upload_date,
+ 'uploader_url': uploader_url,
+ 'tags': tags,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ }
diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py
index 18ef5c2..57c67a4 100644
--- a/youtube_dl/extractor/gazeta.py
+++ b/youtube_dl/extractor/gazeta.py
@@ -16,7 +16,7 @@ class GazetaIE(InfoExtractor):
'ext': 'mp4',
'title': '«70–80 процентов гражданских в Донецке на грани голода»',
'description': 'md5:38617526050bd17b234728e7f9620a71',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
},
'skip': 'video not found',
}, {
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 3949c8b..1c233f0 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -29,6 +29,7 @@ from ..utils import (
UnsupportedError,
xpath_text,
)
+from .commonprotocols import RtmpIE
from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
@@ -73,8 +74,15 @@ from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
from .soundcloud import SoundcloudIE
+from .tunein import TuneInBaseIE
from .vbox7 import Vbox7IE
from .dbtv import DBTVIE
+from .piksel import PikselIE
+from .videa import VideaIE
+from .twentymin import TwentyMinutenIE
+from .ustream import UstreamIE
+from .openload import OpenloadIE
+from .videopress import VideoPressIE
class GenericIE(InfoExtractor):
@@ -236,7 +244,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'Tikibad ontruimd wegens brand',
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 33,
},
'params': {
@@ -297,7 +305,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20130224',
'uploader_id': 'TheVerge',
- 'description': 're:^Chris Ziegler takes a look at the\.*',
+ 'description': r're:^Chris Ziegler takes a look at the\.*',
'uploader': 'The Verge',
'title': 'First Firefox OS phones side-by-side',
},
@@ -343,10 +351,10 @@ class GenericIE(InfoExtractor):
},
'skip': 'There is a limit of 200 free downloads / month for the test song',
},
- # embedded brightcove video
- # it also tests brightcove videos that need to set the 'Referer' in the
- # http requests
{
+ # embedded brightcove video
+ # it also tests brightcove videos that need to set the 'Referer'
+ # in the http requests
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
'info_dict': {
@@ -361,6 +369,24 @@ class GenericIE(InfoExtractor):
},
},
{
+ # embedded with itemprop embedURL and video id spelled as `idVideo`
+ 'add_id': ['BrightcoveLegacy'],
+ 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
+ 'info_dict': {
+ 'id': '5255628253001',
+ 'ext': 'mp4',
+ 'title': 'md5:37c519b1128915607601e75a87995fc0',
+ 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
+ 'uploader': 'BFM BUSINESS',
+ 'uploader_id': '876450612001',
+ 'timestamp': 1482255315,
+ 'upload_date': '20161220',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
# https://github.com/rg3/youtube-dl/issues/2253
'url': 'http://bcove.me/i6nfkrc3',
'md5': '0ba9446db037002366bab3b3eb30c88c',
@@ -401,6 +427,26 @@ class GenericIE(InfoExtractor):
'skip_download': True, # m3u8 download
},
},
+ {
+ # Brightcove with alternative playerID key
+ 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
+ 'info_dict': {
+ 'id': 'nmeth.2062_SV1',
+ 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2228375078001',
+ 'ext': 'mp4',
+ 'title': 'nmeth.2062-sv1',
+ 'description': 'nmeth.2062-sv1',
+ 'timestamp': 1363357591,
+ 'upload_date': '20130315',
+ 'uploader': 'Nature Publishing Group',
+ 'uploader_id': '1964492299001',
+ },
+ }],
+ },
# ooyala video
{
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
@@ -518,7 +564,7 @@ class GenericIE(InfoExtractor):
'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
'ext': 'mp4',
'title': 'Ужастики, русский трейлер (2015)',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 153,
}
},
@@ -546,17 +592,6 @@ class GenericIE(InfoExtractor):
'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
}
},
- # Embedded Ustream video
- {
- 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
- 'md5': '27b99cdb639c9b12a79bca876a073417',
- 'info_dict': {
- 'id': '45734260',
- 'ext': 'flv',
- 'uploader': 'AU SPA: The NSA and Privacy',
- 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
- }
- },
# nowvideo embed hidden behind percent encoding
{
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
@@ -738,7 +773,7 @@ class GenericIE(InfoExtractor):
'duration': 48,
'timestamp': 1401537900,
'upload_date': '20140531',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
# Wistia embed
@@ -808,6 +843,21 @@ class GenericIE(InfoExtractor):
},
'playlist_mincount': 7,
},
+ # TuneIn station embed
+ {
+ 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
+ 'info_dict': {
+ 'id': '204146',
+ 'ext': 'mp3',
+ 'title': 'CNRV',
+ 'location': 'Paris, France',
+ 'is_live': True,
+ },
+ 'params': {
+ # Live stream
+ 'skip_download': True,
+ },
+ },
# Livestream embed
{
'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
@@ -898,6 +948,19 @@ class GenericIE(InfoExtractor):
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
},
},
+ # jwplayer rtmp
+ {
+ 'url': 'http://www.suffolk.edu/sjc/',
+ 'info_dict': {
+ 'id': 'sjclive',
+ 'ext': 'flv',
+ 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
+ 'uploader': 'www.suffolk.edu',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
# rtl.nl embed
{
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
@@ -972,6 +1035,20 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
+ {
+ # Kaltura embedded, some fileExt broken (#11480)
+ 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
+ 'info_dict': {
+ 'id': '1_sgtvehim',
+ 'ext': 'mp4',
+ 'title': 'Our "Standard Models" of particle physics and cosmology',
+ 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
+ 'timestamp': 1321158993,
+ 'upload_date': '20111113',
+ 'uploader_id': 'kps1',
+ },
+ 'add_ie': ['Kaltura'],
+ },
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -981,7 +1058,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'Навальный вышел на свободу',
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 87,
'view_count': int,
'age_limit': 0,
@@ -995,7 +1072,7 @@ class GenericIE(InfoExtractor):
'id': '12820',
'ext': 'mp4',
'title': "'O Sole Mio",
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 216,
'view_count': int,
},
@@ -1008,7 +1085,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 694,
'age_limit': 0,
},
@@ -1020,7 +1097,7 @@ class GenericIE(InfoExtractor):
'id': '3519514',
'ext': 'mp4',
'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
- 'thumbnail': 're:^https?://.*\.png$',
+ 'thumbnail': r're:^https?://.*\.png$',
'duration': 45.115,
},
},
@@ -1103,7 +1180,7 @@ class GenericIE(InfoExtractor):
'id': '300346',
'ext': 'mp4',
'title': '中一中男師變性 全校師生力挺',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
# m3u8 download
@@ -1149,7 +1226,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'Sauvons les abeilles ! - Le débat',
'description': 'md5:d9082128b1c5277987825d684939ca26',
- 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
'timestamp': 1434970506,
'upload_date': '20150622',
'uploader': 'Public Sénat',
@@ -1163,7 +1240,7 @@ class GenericIE(InfoExtractor):
'id': '2855',
'ext': 'mp4',
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
- 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
'uploader': 'ClickHole',
'uploader_id': 'clickhole',
}
@@ -1389,6 +1466,44 @@ class GenericIE(InfoExtractor):
},
'playlist_mincount': 3,
},
+ {
+ # Videa embeds
+ 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
+ 'info_dict': {
+ 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
+ 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
+ },
+ 'playlist_mincount': 2,
+ },
+ {
+ # 20 minuten embed
+ 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
+ 'info_dict': {
+ 'id': '523629',
+ 'ext': 'mp4',
+ 'title': 'So kommen Sie bei Eis und Schnee sicher an',
+ 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [TwentyMinutenIE.ie_key()],
+ },
+ {
+ # VideoPress embed
+ 'url': 'https://en.support.wordpress.com/videopress/',
+ 'info_dict': {
+ 'id': 'OcobLTqC',
+ 'ext': 'm4v',
+ 'title': 'IMG_5786',
+ 'timestamp': 1435711927,
+ 'upload_date': '20150701',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [VideoPressIE.ie_key()],
+ }
# {
# # TODO: find another test
# # http://schema.org/VideoObject
@@ -1880,7 +1995,14 @@ class GenericIE(InfoExtractor):
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
- return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
+ embed_token = self._search_regex(
+ r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
+ webpage, 'ooyala embed token', default=None)
+ return OoyalaIE._build_url_result(smuggle_url(
+ mobj.group('ec'), {
+ 'domain': url,
+ 'embed_token': embed_token,
+ }))
# Look for multiple Ooyala embeds on SBN network websites
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
@@ -2011,10 +2133,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'TED')
# Look for embedded Ustream videos
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Ustream')
+ ustream_url = UstreamIE._extract_url(webpage)
+ if ustream_url:
+ return self.url_result(ustream_url, UstreamIE.ie_key())
# Look for embedded arte.tv player
mobj = re.search(
@@ -2045,6 +2166,11 @@ class GenericIE(InfoExtractor):
if soundcloud_urls:
return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
+ # Look for tunein player
+ tunein_urls = TuneInBaseIE._extract_urls(webpage)
+ if tunein_urls:
+ return _playlist_from_matches(tunein_urls)
+
# Look for embedded mtvservices player
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
if mtvservices_url:
@@ -2211,6 +2337,11 @@ class GenericIE(InfoExtractor):
if arkena_url:
return self.url_result(arkena_url, ArkenaIE.ie_key())
+ # Look for Piksel embeds
+ piksel_url = PikselIE._extract_url(webpage)
+ if piksel_url:
+ return self.url_result(piksel_url, PikselIE.ie_key())
+
# Look for Limelight embeds
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
if mobj:
@@ -2320,6 +2451,29 @@ class GenericIE(InfoExtractor):
if dbtv_urls:
return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
+ # Look for Videa embeds
+ videa_urls = VideaIE._extract_urls(webpage)
+ if videa_urls:
+ return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
+
+ # Look for 20 minuten embeds
+ twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
+ if twentymin_urls:
+ return _playlist_from_matches(
+ twentymin_urls, ie=TwentyMinutenIE.ie_key())
+
+ # Look for Openload embeds
+ openload_urls = OpenloadIE._extract_urls(webpage)
+ if openload_urls:
+ return _playlist_from_matches(
+ openload_urls, ie=OpenloadIE.ie_key())
+
+ # Look for VideoPress embeds
+ videopress_urls = VideoPressIE._extract_urls(webpage)
+ if videopress_urls:
+ return _playlist_from_matches(
+ videopress_urls, ie=VideoPressIE.ie_key())
+
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')
@@ -2347,6 +2501,8 @@ class GenericIE(InfoExtractor):
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
+ if RtmpIE.suitable(vurl):
+ return True
vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath)
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
@@ -2454,6 +2610,15 @@ class GenericIE(InfoExtractor):
'age_limit': age_limit,
}
+ if RtmpIE.suitable(video_url):
+ entry_info_dict.update({
+ '_type': 'url_transparent',
+ 'ie_key': RtmpIE.ie_key(),
+ 'url': video_url,
+ })
+ entries.append(entry_info_dict)
+ continue
+
ext = determine_ext(video_url)
if ext == 'smil':
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
diff --git a/youtube_dl/extractor/giantbomb.py b/youtube_dl/extractor/giantbomb.py
index 87cd191..29b684d 100644
--- a/youtube_dl/extractor/giantbomb.py
+++ b/youtube_dl/extractor/giantbomb.py
@@ -23,7 +23,7 @@ class GiantBombIE(InfoExtractor):
'title': 'Quick Look: Destiny: The Dark Below',
'description': 'md5:0aa3aaf2772a41b91d44c63f30dfad24',
'duration': 2399,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}
diff --git a/youtube_dl/extractor/giga.py b/youtube_dl/extractor/giga.py
index 28eb733..5a9992a 100644
--- a/youtube_dl/extractor/giga.py
+++ b/youtube_dl/extractor/giga.py
@@ -24,7 +24,7 @@ class GigaIE(InfoExtractor):
'ext': 'mp4',
'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss',
'description': 'md5:afdf5862241aded4718a30dff6a57baf',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 578,
'timestamp': 1414749706,
'upload_date': '20141031',
diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py
index f0d9513..d94dfbf 100644
--- a/youtube_dl/extractor/glide.py
+++ b/youtube_dl/extractor/glide.py
@@ -14,7 +14,7 @@ class GlideIE(InfoExtractor):
'id': 'UZF8zlmuQbe4mr+7dCiQ0w==',
'ext': 'mp4',
'title': "Damon's Glide message",
- 'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
+ 'thumbnail': r're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
}
}
diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py
index c7776b1..a34779b 100644
--- a/youtube_dl/extractor/go.py
+++ b/youtube_dl/extractor/go.py
@@ -43,7 +43,10 @@ class GoIE(InfoExtractor):
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
if not video_id:
webpage = self._download_webpage(url, display_id)
- video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id')
+ video_id = self._search_regex(
+ # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
+ # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
+ r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
brand = self._BRANDS[sub_domain]
video_data = self._download_json(
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
diff --git a/youtube_dl/extractor/godtube.py b/youtube_dl/extractor/godtube.py
index 363dc66..92efd16 100644
--- a/youtube_dl/extractor/godtube.py
+++ b/youtube_dl/extractor/godtube.py
@@ -23,7 +23,7 @@ class GodTubeIE(InfoExtractor):
'timestamp': 1205712000,
'uploader': 'beverlybmusic',
'upload_date': '20080317',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
]
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
index 766fc26..fec36cb 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -6,6 +6,7 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
+ lowercase_escape,
)
@@ -13,12 +14,12 @@ class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
_TESTS = [{
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
- 'md5': '881f7700aec4f538571fa1e0eed4a7b6',
+ 'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
'info_dict': {
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
- 'duration': 46,
+ 'duration': 45,
}
}, {
# video id is longer than 28 characters
@@ -55,7 +56,7 @@ class GoogleDriveIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
- 'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
+ 'http://docs.google.com/file/d/%s' % video_id, video_id)
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
@@ -74,7 +75,7 @@ class GoogleDriveIE(InfoExtractor):
resolution = fmt.split('/')[1]
width, height = resolution.split('x')
formats.append({
- 'url': fmt_url,
+ 'url': lowercase_escape(fmt_url),
'format_id': fmt_id,
'resolution': resolution,
'width': int_or_none(width),
diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py
index 74e1720..377981d 100644
--- a/youtube_dl/extractor/goshgay.py
+++ b/youtube_dl/extractor/goshgay.py
@@ -19,7 +19,7 @@ class GoshgayIE(InfoExtractor):
'id': '299069',
'ext': 'flv',
'title': 'DIESEL SFW XXX Video',
- 'thumbnail': 're:^http://.*\.jpg$',
+ 'thumbnail': r're:^http://.*\.jpg$',
'duration': 80,
'age_limit': 18,
}
diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py
index cbf7743..8116ad9 100644
--- a/youtube_dl/extractor/hbo.py
+++ b/youtube_dl/extractor/hbo.py
@@ -120,7 +120,7 @@ class HBOIE(HBOBaseIE):
'id': '1437839',
'ext': 'mp4',
'title': 'Ep. 64 Clip: Encryption',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'duration': 1072,
}
}
@@ -141,7 +141,7 @@ class HBOEpisodeIE(HBOBaseIE):
'display_id': 'ep-52-inside-the-episode',
'ext': 'mp4',
'title': 'Ep. 52: Inside the Episode',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'duration': 240,
},
}, {
diff --git a/youtube_dl/extractor/hearthisat.py b/youtube_dl/extractor/hearthisat.py
index 2564538..18c2520 100644
--- a/youtube_dl/extractor/hearthisat.py
+++ b/youtube_dl/extractor/hearthisat.py
@@ -25,7 +25,7 @@ class HearThisAtIE(InfoExtractor):
'id': '150939',
'ext': 'wav',
'title': 'Moofi - Dr. Kreep',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421564134,
'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP',
'upload_date': '20150118',
@@ -46,7 +46,7 @@ class HearThisAtIE(InfoExtractor):
'description': 'Listen to DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance',
'upload_date': '20160328',
'timestamp': 1459186146,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'comment_count': int,
'view_count': int,
'like_count': int,
diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py
index 278d9f5..1629cdb 100644
--- a/youtube_dl/extractor/heise.py
+++ b/youtube_dl/extractor/heise.py
@@ -29,7 +29,7 @@ class HeiseIE(InfoExtractor):
'timestamp': 1411812600,
'upload_date': '20140927',
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
- 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
}
}
diff --git a/youtube_dl/extractor/hellporno.py b/youtube_dl/extractor/hellporno.py
index 10da140..0ee8ea7 100644
--- a/youtube_dl/extractor/hellporno.py
+++ b/youtube_dl/extractor/hellporno.py
@@ -20,7 +20,7 @@ class HellPornoIE(InfoExtractor):
'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
'ext': 'mp4',
'title': 'Dixie is posing with naked ass very erotic',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'age_limit': 18,
}
}, {
diff --git a/youtube_dl/extractor/historicfilms.py b/youtube_dl/extractor/historicfilms.py
index 6a36933..56343e9 100644
--- a/youtube_dl/extractor/historicfilms.py
+++ b/youtube_dl/extractor/historicfilms.py
@@ -14,7 +14,7 @@ class HistoricFilmsIE(InfoExtractor):
'ext': 'mov',
'title': 'Historic Films: GP-7',
'description': 'md5:1a86a0f3ac54024e419aba97210d959a',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2096,
},
}
diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py
index ff79743..e21ebb8 100644
--- a/youtube_dl/extractor/hitbox.py
+++ b/youtube_dl/extractor/hitbox.py
@@ -25,7 +25,7 @@ class HitboxIE(InfoExtractor):
'alt_title': 'hitboxlive - Aug 9th #6',
'description': '',
'ext': 'mp4',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 215.1666,
'resolution': 'HD 720p',
'uploader': 'hitboxlive',
@@ -163,7 +163,7 @@ class HitboxLiveIE(HitboxIE):
if cdn.get('rtmpSubscribe') is True:
continue
base_url = cdn.get('netConnectionUrl')
- host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
+ host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
if base_url not in servers:
servers.append(base_url)
for stream in cdn.get('bitrates'):
diff --git a/youtube_dl/extractor/hitrecord.py b/youtube_dl/extractor/hitrecord.py
new file mode 100644
index 0000000..01a6946
--- /dev/null
+++ b/youtube_dl/extractor/hitrecord.py
@@ -0,0 +1,68 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ float_or_none,
+ int_or_none,
+ try_get,
+)
+
+
+class HitRecordIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hitrecord\.org/records/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://hitrecord.org/records/2954362',
+ 'md5': 'fe1cdc2023bce0bbb95c39c57426aa71',
+ 'info_dict': {
+ 'id': '2954362',
+ 'ext': 'mp4',
+ 'title': 'A Very Different World (HITRECORD x ACLU)',
+ 'description': 'md5:e62defaffab5075a5277736bead95a3d',
+ 'duration': 139.327,
+ 'timestamp': 1471557582,
+ 'upload_date': '20160818',
+ 'uploader': 'Zuzi.C12',
+ 'uploader_id': '362811',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'tags': list,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://hitrecord.org/api/web/records/%s' % video_id, video_id)
+
+ title = video['title']
+ video_url = video['source_url']['mp4_url']
+
+ tags = None
+ tags_list = try_get(video, lambda x: x['tags'], list)
+ if tags_list:
+ tags = [
+ t['text']
+ for t in tags_list
+ if isinstance(t, dict) and t.get('text') and
+ isinstance(t['text'], compat_str)]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': clean_html(video.get('body')),
+ 'duration': float_or_none(video.get('duration'), 1000),
+ 'timestamp': int_or_none(video.get('created_at_i')),
+ 'uploader': try_get(
+ video, lambda x: x['user']['username'], compat_str),
+ 'uploader_id': try_get(
+ video, lambda x: compat_str(x['user']['id'])),
+ 'view_count': int_or_none(video.get('total_views_count')),
+ 'like_count': int_or_none(video.get('hearts_count')),
+ 'comment_count': int_or_none(video.get('comments_count')),
+ 'tags': tags,
+ }
diff --git a/youtube_dl/extractor/hornbunny.py b/youtube_dl/extractor/hornbunny.py
index 0615f06..c458a95 100644
--- a/youtube_dl/extractor/hornbunny.py
+++ b/youtube_dl/extractor/hornbunny.py
@@ -20,7 +20,7 @@ class HornBunnyIE(InfoExtractor):
'duration': 550,
'age_limit': 18,
'view_count': int,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}
diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py
index 65ba2a4..2be68ab 100644
--- a/youtube_dl/extractor/howstuffworks.py
+++ b/youtube_dl/extractor/howstuffworks.py
@@ -21,7 +21,7 @@ class HowStuffWorksIE(InfoExtractor):
'title': 'Cool Jobs - Iditarod Musher',
'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.',
'display_id': 'cool-jobs-iditarod-musher',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 161,
},
'skip': 'Video broken',
@@ -34,7 +34,7 @@ class HowStuffWorksIE(InfoExtractor):
'title': 'Survival Zone: Food and Water In the Savanna',
'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.',
'display_id': 'survival-zone-food-and-water-in-the-savanna',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
@@ -45,7 +45,7 @@ class HowStuffWorksIE(InfoExtractor):
'title': 'Sword Swallowing #1 by Dan Meyer',
'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
'display_id': 'sword-swallowing-1-by-dan-meyer',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
diff --git a/youtube_dl/extractor/huajiao.py b/youtube_dl/extractor/huajiao.py
index cec0df0..4ca275d 100644
--- a/youtube_dl/extractor/huajiao.py
+++ b/youtube_dl/extractor/huajiao.py
@@ -20,7 +20,7 @@ class HuajiaoIE(InfoExtractor):
'title': '#新人求关注#',
'description': 're:.*',
'duration': 2424.0,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1475866459,
'upload_date': '20161007',
'uploader': 'Penny_余姿昀',
diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py
index 0590737..97e36f0 100644
--- a/youtube_dl/extractor/huffpost.py
+++ b/youtube_dl/extractor/huffpost.py
@@ -52,7 +52,7 @@ class HuffPostIE(InfoExtractor):
thumbnails = []
for url in filter(None, data['images'].values()):
- m = re.match('.*-([0-9]+x[0-9]+)\.', url)
+ m = re.match(r'.*-([0-9]+x[0-9]+)\.', url)
if not m:
continue
thumbnails.append({
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
index f0fc8d4..f95c00c 100644
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -13,7 +13,7 @@ from ..utils import (
class ImdbIE(InfoExtractor):
IE_NAME = 'imdb'
IE_DESC = 'Internet Movie Database trailers'
- _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
@@ -32,6 +32,9 @@ class ImdbIE(InfoExtractor):
}, {
'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/videoplayer/vi1562949145',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/inc.py b/youtube_dl/extractor/inc.py
new file mode 100644
index 0000000..241ec83
--- /dev/null
+++ b/youtube_dl/extractor/inc.py
@@ -0,0 +1,41 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+
+
+class IncIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?inc\.com/(?:[^/]+/)+(?P<id>[^.]+).html'
+ _TESTS = [{
+ 'url': 'http://www.inc.com/tip-sheet/bill-gates-says-these-5-books-will-make-you-smarter.html',
+ 'md5': '7416739c9c16438c09fa35619d6ba5cb',
+ 'info_dict': {
+ 'id': '1_wqig47aq',
+ 'ext': 'mov',
+ 'title': 'Bill Gates Says These 5 Books Will Make You Smarter',
+ 'description': 'md5:bea7ff6cce100886fc1995acb743237e',
+ 'timestamp': 1474414430,
+ 'upload_date': '20160920',
+ 'uploader_id': 'video@inc.com',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ partner_id = self._search_regex(
+ r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage, 'partner id')
+
+ kaltura_id = self._parse_json(self._search_regex(
+ r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
+ display_id)['vid_kaltura_id']
+
+ return self.url_result(
+ 'kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key())
diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py
index c6f0804..11cf3c6 100644
--- a/youtube_dl/extractor/indavideo.py
+++ b/youtube_dl/extractor/indavideo.py
@@ -19,7 +19,7 @@ class IndavideoEmbedIE(InfoExtractor):
'ext': 'mp4',
'title': 'Cicatánc',
'description': '',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'cukiajanlo',
'uploader_id': '83729',
'timestamp': 1439193826,
@@ -102,7 +102,7 @@ class IndavideoIE(InfoExtractor):
'ext': 'mp4',
'title': 'Vicces cica',
'description': 'Játszik a tablettel. :D',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Jet_Pack',
'uploader_id': '491217',
'timestamp': 1390821212,
diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py
index cca0b8a..9fb71e8 100644
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
import base64
-from ..compat import compat_urllib_parse_unquote
+from ..compat import (
+ compat_urllib_parse_unquote,
+ compat_urlparse,
+)
from ..utils import determine_ext
from .bokecc import BokeCCBaseIE
@@ -33,9 +36,21 @@ class InfoQIE(BokeCCBaseIE):
'ext': 'flv',
'description': 'md5:308d981fb28fa42f49f9568322c683ff',
},
+ }, {
+ 'url': 'https://www.infoq.com/presentations/Simple-Made-Easy',
+ 'md5': '0e34642d4d9ef44bf86f66f6399672db',
+ 'info_dict': {
+ 'id': 'Simple-Made-Easy',
+ 'title': 'Simple Made Easy',
+ 'ext': 'mp3',
+ 'description': 'md5:3e0e213a8bbd074796ef89ea35ada25b',
+ },
+ 'params': {
+ 'format': 'bestaudio',
+ },
}]
- def _extract_rtmp_videos(self, webpage):
+ def _extract_rtmp_video(self, webpage):
# The server URL is hardcoded
video_url = 'rtmpe://video.infoq.com/cfx/st/'
@@ -47,28 +62,53 @@ class InfoQIE(BokeCCBaseIE):
playpath = 'mp4:' + real_id
return [{
- 'format_id': 'rtmp',
+ 'format_id': 'rtmp_video',
'url': video_url,
'ext': determine_ext(playpath),
'play_path': playpath,
}]
- def _extract_http_videos(self, webpage):
- http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
-
+ def _extract_cookies(self, webpage):
policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
+ return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
+ policy, signature, key_pair_id)
+ def _extract_http_video(self, webpage):
+ http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
return [{
- 'format_id': 'http',
+ 'format_id': 'http_video',
'url': http_video_url,
'http_headers': {
- 'Cookie': 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
- policy, signature, key_pair_id),
+ 'Cookie': self._extract_cookies(webpage)
},
}]
+ def _extract_http_audio(self, webpage, video_id):
+ fields = self._hidden_inputs(webpage)
+ http_audio_url = fields['filename']
+ if http_audio_url is None:
+ return []
+
+ cookies_header = {'Cookie': self._extract_cookies(webpage)}
+
+ # base URL is found in the Location header in the response returned by
+ # GET https://www.infoq.com/mp3download.action?filename=... when logged in.
+ http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
+
+ # audio file seem to be missing some times even if there is a download link
+ # so probe URL to make sure
+ if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
+ return []
+
+ return [{
+ 'format_id': 'http_audio',
+ 'url': http_audio_url,
+ 'vcodec': 'none',
+ 'http_headers': cookies_header,
+ }]
+
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@@ -80,7 +120,10 @@ class InfoQIE(BokeCCBaseIE):
# for China videos, HTTP video URL exists but always fails with 403
formats = self._extract_bokecc_formats(webpage, video_id)
else:
- formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage)
+ formats = (
+ self._extract_rtmp_video(webpage) +
+ self._extract_http_video(webpage) +
+ self._extract_http_audio(webpage, video_id))
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 196407b..98f408c 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -22,7 +22,7 @@ class InstagramIE(InfoExtractor):
'ext': 'mp4',
'title': 'Video by naomipq',
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1371748545,
'upload_date': '20130620',
'uploader_id': 'naomipq',
@@ -38,7 +38,7 @@ class InstagramIE(InfoExtractor):
'id': 'BA-pQFBG8HZ',
'ext': 'mp4',
'title': 'Video by britneyspears',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1453760977,
'upload_date': '20160125',
'uploader_id': 'britneyspears',
@@ -169,7 +169,7 @@ class InstagramUserIE(InfoExtractor):
'id': '614605558512799803_462752227',
'ext': 'mp4',
'title': '#Porsche Intelligent Performance.',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'Porsche',
'uploader_id': 'porsche',
'timestamp': 1387486713,
diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py
index da2cdc6..0fe5768 100644
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@@ -65,7 +65,7 @@ class IPrimaIE(InfoExtractor):
options = self._parse_json(
self._search_regex(
- r'(?s)var\s+playerOptions\s*=\s*({.+?});',
+ r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
playerpage, 'player options', default='{}'),
video_id, transform_source=js_to_json, fatal=False)
if options:
diff --git a/youtube_dl/extractor/ir90tv.py b/youtube_dl/extractor/ir90tv.py
index 214bcd5..d5a3f6f 100644
--- a/youtube_dl/extractor/ir90tv.py
+++ b/youtube_dl/extractor/ir90tv.py
@@ -14,7 +14,7 @@ class Ir90TvIE(InfoExtractor):
'id': '95719',
'ext': 'mp4',
'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'url': 'http://www.90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py
new file mode 100644
index 0000000..b0d8604
--- /dev/null
+++ b/youtube_dl/extractor/itv.py
@@ -0,0 +1,196 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import uuid
+import xml.etree.ElementTree as etree
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_etree_register_namespace,
+)
+from ..utils import (
+ extract_attributes,
+ xpath_with_ns,
+ xpath_element,
+ xpath_text,
+ int_or_none,
+ parse_duration,
+ ExtractorError,
+ determine_ext,
+)
+
+
+class ITVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
+ _TEST = {
+ 'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
+ 'info_dict': {
+ 'id': '2a2936a0053',
+ 'ext': 'flv',
+ 'title': 'Home Movie',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ params = extract_attributes(self._search_regex(
+ r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
+
+ ns_map = {
+ 'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/',
+ 'tem': 'http://tempuri.org/',
+ 'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types',
+ 'com': 'http://schemas.itv.com/2009/05/Common',
+ }
+ for ns, full_ns in ns_map.items():
+ compat_etree_register_namespace(ns, full_ns)
+
+ def _add_ns(name):
+ return xpath_with_ns(name, ns_map)
+
+ def _add_sub_element(element, name):
+ return etree.SubElement(element, _add_ns(name))
+
+ req_env = etree.Element(_add_ns('soapenv:Envelope'))
+ _add_sub_element(req_env, 'soapenv:Header')
+ body = _add_sub_element(req_env, 'soapenv:Body')
+ get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
+ request = _add_sub_element(get_playlist, 'tem:request')
+ _add_sub_element(request, 'itv:ProductionId').text = params['data-video-id']
+ _add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
+ vodcrid = _add_sub_element(request, 'itv:Vodcrid')
+ _add_sub_element(vodcrid, 'com:Id')
+ _add_sub_element(request, 'itv:Partition')
+ user_info = _add_sub_element(get_playlist, 'tem:userInfo')
+ _add_sub_element(user_info, 'itv:Broadcaster').text = 'Itv'
+ _add_sub_element(user_info, 'itv:DM')
+ _add_sub_element(user_info, 'itv:RevenueScienceValue')
+ _add_sub_element(user_info, 'itv:SessionId')
+ _add_sub_element(user_info, 'itv:SsoToken')
+ _add_sub_element(user_info, 'itv:UserToken')
+ site_info = _add_sub_element(get_playlist, 'tem:siteInfo')
+ _add_sub_element(site_info, 'itv:AdvertisingRestriction').text = 'None'
+ _add_sub_element(site_info, 'itv:AdvertisingSite').text = 'ITV'
+ _add_sub_element(site_info, 'itv:AdvertisingType').text = 'Any'
+ _add_sub_element(site_info, 'itv:Area').text = 'ITVPLAYER.VIDEO'
+ _add_sub_element(site_info, 'itv:Category')
+ _add_sub_element(site_info, 'itv:Platform').text = 'DotCom'
+ _add_sub_element(site_info, 'itv:Site').text = 'ItvCom'
+ device_info = _add_sub_element(get_playlist, 'tem:deviceInfo')
+ _add_sub_element(device_info, 'itv:ScreenSize').text = 'Big'
+ player_info = _add_sub_element(get_playlist, 'tem:playerInfo')
+ _add_sub_element(player_info, 'itv:Version').text = '2'
+
+ headers = self.geo_verification_headers()
+ headers.update({
+ 'Content-Type': 'text/xml; charset=utf-8',
+ 'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist',
+ })
+ resp_env = self._download_xml(
+ params['data-playlist-url'], video_id,
+ headers=headers, data=etree.tostring(req_env))
+ playlist = xpath_element(resp_env, './/Playlist')
+ if playlist is None:
+ fault_string = xpath_text(resp_env, './/faultstring')
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
+ title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
+ video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
+ media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
+ rtmp_url = media_files.attrib['base']
+
+ formats = []
+ for media_file in media_files.findall('MediaFile'):
+ play_path = xpath_text(media_file, 'URL')
+ if not play_path:
+ continue
+ tbr = int_or_none(media_file.get('bitrate'), 1000)
+ formats.append({
+ 'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
+ 'url': rtmp_url,
+ 'play_path': play_path,
+ 'tbr': tbr,
+ 'ext': 'flv',
+ })
+
+ ios_playlist_url = params.get('data-video-playlist')
+ hmac = params.get('data-video-hmac')
+ if ios_playlist_url and hmac:
+ headers = self.geo_verification_headers()
+ headers.update({
+ 'Accept': 'application/vnd.itv.vod.playlist.v2+json',
+ 'Content-Type': 'application/json',
+ 'hmac': hmac.upper(),
+ })
+ ios_playlist = self._download_json(
+ ios_playlist_url, video_id, data=json.dumps({
+ 'user': {
+ 'itvUserId': '',
+ 'entitlements': [],
+ 'token': ''
+ },
+ 'device': {
+ 'manufacturer': 'Apple',
+ 'model': 'iPad',
+ 'os': {
+ 'name': 'iPhone OS',
+ 'version': '9.3',
+ 'type': 'ios'
+ }
+ },
+ 'client': {
+ 'version': '4.1',
+ 'id': 'browser'
+ },
+ 'variantAvailability': {
+ 'featureset': {
+ 'min': ['hls', 'aes'],
+ 'max': ['hls', 'aes']
+ },
+ 'platformTag': 'mobile'
+ }
+ }).encode(), headers=headers, fatal=False)
+ if ios_playlist:
+ video_data = ios_playlist.get('Playlist', {}).get('Video', {})
+ ios_base_url = video_data.get('Base')
+ for media_file in video_data.get('MediaFiles', []):
+ href = media_file.get('Href')
+ if not href:
+ continue
+ if ios_base_url:
+ href = ios_base_url + href
+ ext = determine_ext(href)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(href, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': href,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
+ if not caption_url.text:
+ continue
+ ext = determine_ext(caption_url.text, 'ttml')
+ subtitles.setdefault('en', []).append({
+ 'url': caption_url.text,
+ 'ext': 'ttml' if ext == 'xml' else ext,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'episode_title': title,
+ 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
+ 'series': xpath_text(playlist, 'ProgrammeTitle'),
+ 'duartion': parse_duration(xpath_text(playlist, 'Duration')),
+ }
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py
index 7c8cb21..3d3c150 100644
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -28,7 +28,7 @@ class IviIE(InfoExtractor):
'title': 'Иван Васильевич меняет профессию',
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
'duration': 5498,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'Only works from Russia',
},
@@ -46,7 +46,7 @@ class IviIE(InfoExtractor):
'episode': 'Дело Гольдберга (1 часть)',
'episode_number': 1,
'duration': 2655,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'Only works from Russia',
},
@@ -60,7 +60,7 @@ class IviIE(InfoExtractor):
'title': 'Кукла',
'description': 'md5:ffca9372399976a2d260a407cc74cce6',
'duration': 5599,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'Only works from Russia',
}
diff --git a/youtube_dl/extractor/iwara.py b/youtube_dl/extractor/iwara.py
index 8d7e7f4..a7514fc 100644
--- a/youtube_dl/extractor/iwara.py
+++ b/youtube_dl/extractor/iwara.py
@@ -3,14 +3,18 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
-from ..utils import remove_end
+from ..utils import (
+ int_or_none,
+ mimetype2ext,
+ remove_end,
+)
class IwaraIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
- 'md5': '1d53866b2c514b23ed69e4352fdc9839',
+ # md5 is unstable
'info_dict': {
'id': 'amVwUl1EHpAD9RD',
'ext': 'mp4',
@@ -23,17 +27,17 @@ class IwaraIE(InfoExtractor):
'info_dict': {
'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
'ext': 'mp4',
- 'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4',
+ 'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4',
'age_limit': 18,
},
'add_ie': ['GoogleDrive'],
}, {
'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
- 'md5': '1d85f1e5217d2791626cff5ec83bb189',
+ # md5 is unstable
'info_dict': {
'id': '6liAP9s2Ojc',
'ext': 'mp4',
- 'age_limit': 0,
+ 'age_limit': 18,
'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
'upload_date': '20160910',
@@ -52,9 +56,9 @@ class IwaraIE(InfoExtractor):
# ecchi is 'sexy' in Japanese
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
- entries = self._parse_html5_media_entries(url, webpage, video_id)
+ video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id)
- if not entries:
+ if not video_data:
iframe_url = self._html_search_regex(
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
webpage, 'iframe URL', group='url')
@@ -67,11 +71,25 @@ class IwaraIE(InfoExtractor):
title = remove_end(self._html_search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
- info_dict = entries[0]
- info_dict.update({
+ formats = []
+ for a_format in video_data:
+ format_id = a_format.get('resolution')
+ height = int_or_none(self._search_regex(
+ r'(\d+)p', format_id, 'height', default=None))
+ formats.append({
+ 'url': a_format['uri'],
+ 'format_id': format_id,
+ 'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
+ 'height': height,
+ 'width': int_or_none(height / 9.0 * 16.0 if height else None),
+ 'quality': 1 if format_id == 'Source' else 0,
+ })
+
+ self._sort_formats(formats)
+
+ return {
'id': video_id,
'title': title,
'age_limit': age_limit,
- })
-
- return info_dict
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py
index aa0728a..b1d7217 100644
--- a/youtube_dl/extractor/izlesene.py
+++ b/youtube_dl/extractor/izlesene.py
@@ -29,7 +29,7 @@ class IzleseneIE(InfoExtractor):
'ext': 'mp4',
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
'description': 'md5:253753e2655dde93f59f74b572454f6d',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'uploader_id': 'pelikzzle',
'timestamp': int,
'upload_date': '20140702',
@@ -44,7 +44,7 @@ class IzleseneIE(InfoExtractor):
'id': '17997',
'ext': 'mp4',
'title': 'Tarkan Dortmund 2006 Konseri',
- 'thumbnail': 're:^https://.*\.jpg',
+ 'thumbnail': r're:^https://.*\.jpg',
'uploader_id': 'parlayankiz',
'timestamp': int,
'upload_date': '20061112',
diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py
index ee9acac..595d7a5 100644
--- a/youtube_dl/extractor/jamendo.py
+++ b/youtube_dl/extractor/jamendo.py
@@ -5,9 +5,27 @@ import re
from ..compat import compat_urlparse
from .common import InfoExtractor
-
-
-class JamendoIE(InfoExtractor):
+from ..utils import parse_duration
+
+
+class JamendoBaseIE(InfoExtractor):
+ def _extract_meta(self, webpage, fatal=True):
+ title = self._og_search_title(
+ webpage, default=None) or self._search_regex(
+ r'<title>([^<]+)', webpage,
+ 'title', default=None)
+ if title:
+ title = self._search_regex(
+ r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None)
+ if not title:
+ title = self._html_search_meta(
+ 'name', webpage, 'title', fatal=fatal)
+ mobj = re.search(r'(.+) - (.+)', title or '')
+ artist, second = mobj.groups() if mobj else [None] * 2
+ return title, artist, second
+
+
+class JamendoIE(JamendoBaseIE):
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)'
_TEST = {
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
@@ -16,8 +34,11 @@ class JamendoIE(InfoExtractor):
'id': '196219',
'display_id': 'stories-from-emona-i',
'ext': 'flac',
- 'title': 'Stories from Emona I',
- 'thumbnail': 're:^https?://.*\.jpg'
+ 'title': 'Maya Filipič - Stories from Emona I',
+ 'artist': 'Maya Filipič',
+ 'track': 'Stories from Emona I',
+ 'duration': 210,
+ 'thumbnail': r're:^https?://.*\.jpg'
}
}
@@ -28,7 +49,7 @@ class JamendoIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
- title = self._html_search_meta('name', webpage, 'title')
+ title, artist, track = self._extract_meta(webpage)
formats = [{
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
@@ -46,37 +67,47 @@ class JamendoIE(InfoExtractor):
thumbnail = self._html_search_meta(
'image', webpage, 'thumbnail', fatal=False)
+ duration = parse_duration(self._search_regex(
+ r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']',
+ webpage, 'duration', fatal=False))
return {
'id': track_id,
'display_id': display_id,
'thumbnail': thumbnail,
'title': title,
+ 'duration': duration,
+ 'artist': artist,
+ 'track': track,
'formats': formats
}
-class JamendoAlbumIE(InfoExtractor):
+class JamendoAlbumIE(JamendoBaseIE):
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
_TEST = {
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
'info_dict': {
'id': '121486',
- 'title': 'Duck On Cover'
+ 'title': 'Shearer - Duck On Cover'
},
'playlist': [{
'md5': 'e1a2fcb42bda30dfac990212924149a8',
'info_dict': {
'id': '1032333',
'ext': 'flac',
- 'title': 'Warmachine'
+ 'title': 'Shearer - Warmachine',
+ 'artist': 'Shearer',
+ 'track': 'Warmachine',
}
}, {
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
'info_dict': {
'id': '1032330',
'ext': 'flac',
- 'title': 'Without Your Ghost'
+ 'title': 'Shearer - Without Your Ghost',
+ 'artist': 'Shearer',
+ 'track': 'Without Your Ghost',
}
}],
'params': {
@@ -90,18 +121,18 @@ class JamendoAlbumIE(InfoExtractor):
webpage = self._download_webpage(url, mobj.group('display_id'))
- title = self._html_search_meta('name', webpage, 'title')
-
- entries = [
- self.url_result(
- compat_urlparse.urljoin(url, m.group('path')),
- ie=JamendoIE.ie_key(),
- video_id=self._search_regex(
- r'/track/(\d+)', m.group('path'),
- 'track id', default=None))
- for m in re.finditer(
- r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
- webpage)
- ]
+ title, artist, album = self._extract_meta(webpage, fatal=False)
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': compat_urlparse.urljoin(url, m.group('path')),
+ 'ie_key': JamendoIE.ie_key(),
+ 'id': self._search_regex(
+ r'/track/(\d+)', m.group('path'), 'track id', default=None),
+ 'artist': artist,
+ 'album': album,
+ } for m in re.finditer(
+ r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
+ webpage)]
return self.playlist_result(entries, album_id, title)
diff --git a/youtube_dl/extractor/jove.py b/youtube_dl/extractor/jove.py
index cf73cd7..f9a034b 100644
--- a/youtube_dl/extractor/jove.py
+++ b/youtube_dl/extractor/jove.py
@@ -21,7 +21,7 @@ class JoveIE(InfoExtractor):
'ext': 'mp4',
'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
'description': 'md5:015dd4509649c0908bc27f049e0262c6',
- 'thumbnail': 're:^https?://.*\.png$',
+ 'thumbnail': r're:^https?://.*\.png$',
'upload_date': '20110523',
}
},
@@ -33,7 +33,7 @@ class JoveIE(InfoExtractor):
'ext': 'mp4',
'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
'description': 'md5:35ff029261900583970c4023b70f1dc9',
- 'thumbnail': 're:^https?://.*\.png$',
+ 'thumbnail': r're:^https?://.*\.png$',
'upload_date': '20140802',
}
},
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py
index 5d56e0a..aff7ab4 100644
--- a/youtube_dl/extractor/jwplatform.py
+++ b/youtube_dl/extractor/jwplatform.py
@@ -11,6 +11,7 @@ from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
+ urljoin,
)
@@ -110,10 +111,14 @@ class JWPlatformBaseIE(InfoExtractor):
tracks = video_data.get('tracks')
if tracks and isinstance(tracks, list):
for track in tracks:
- if track.get('file') and track.get('kind') == 'captions':
- subtitles.setdefault(track.get('label') or 'en', []).append({
- 'url': self._proto_relative_url(track['file'])
- })
+ if track.get('kind') != 'captions':
+ continue
+ track_url = urljoin(base_url, track.get('file'))
+ if not track_url:
+ continue
+ subtitles.setdefault(track.get('label') or 'en', []).append({
+ 'url': self._proto_relative_url(track_url)
+ })
entries.append({
'id': this_video_id,
@@ -121,7 +126,7 @@ class JWPlatformBaseIE(InfoExtractor):
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
- 'duration': float_or_none(jwplayer_data.get('duration')),
+ 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles,
'formats': formats,
})
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index 91bc3a0..5ef382f 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -107,7 +107,7 @@ class KalturaIE(InfoExtractor):
(?P<q1>['\"])wid(?P=q1)\s*:\s*
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
- (?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4),
+ (?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
""", webpage) or
re.search(
r'''(?xs)
@@ -266,6 +266,12 @@ class KalturaIE(InfoExtractor):
# skip for now.
if f.get('fileExt') == 'chun':
continue
+ if not f.get('fileExt'):
+ # QT indicates QuickTime; some videos have broken fileExt
+ if f.get('containerFormat') == 'qt':
+ f['fileExt'] = 'mov'
+ else:
+ f['fileExt'] = 'mp4'
video_url = sign_url(
'%s/flavorId/%s' % (data_url, f['id']))
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
@@ -316,6 +322,6 @@ class KalturaIE(InfoExtractor):
'thumbnail': info.get('thumbnailUrl'),
'duration': info.get('duration'),
'timestamp': info.get('createdAt'),
- 'uploader_id': info.get('userId'),
+ 'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
'view_count': info.get('plays'),
}
diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py
index c05263e..4e9eb67 100644
--- a/youtube_dl/extractor/karrierevideos.py
+++ b/youtube_dl/extractor/karrierevideos.py
@@ -20,7 +20,7 @@ class KarriereVideosIE(InfoExtractor):
'ext': 'flv',
'title': 'AltenpflegerIn',
'description': 'md5:dbadd1259fde2159a9b28667cb664ae2',
- 'thumbnail': 're:^http://.*\.png',
+ 'thumbnail': r're:^http://.*\.png',
},
'params': {
# rtmp download
@@ -34,7 +34,7 @@ class KarriereVideosIE(InfoExtractor):
'ext': 'flv',
'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"',
'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33',
- 'thumbnail': 're:^http://.*\.png',
+ 'thumbnail': r're:^http://.*\.png',
},
'params': {
# rtmp download
diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py
index 588a4d0..e83115e 100644
--- a/youtube_dl/extractor/keezmovies.py
+++ b/youtube_dl/extractor/keezmovies.py
@@ -27,7 +27,7 @@ class KeezMoviesIE(InfoExtractor):
'display_id': 'petite-asian-lady-mai-playing-in-bathtub',
'ext': 'mp4',
'title': 'Petite Asian Lady Mai Playing In Bathtub',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'view_count': int,
'age_limit': 18,
}
diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py
index eb0a160..fb9c2db 100644
--- a/youtube_dl/extractor/ketnet.py
+++ b/youtube_dl/extractor/ketnet.py
@@ -13,7 +13,7 @@ class KetnetIE(InfoExtractor):
'ext': 'mp4',
'title': 'Gluur mee op de filmset en op Pennenzakkenrock',
'description': 'Gluur mee met Ghost Rockers op de filmset',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py
index 55291c6..c11cbcf 100644
--- a/youtube_dl/extractor/konserthusetplay.py
+++ b/youtube_dl/extractor/konserthusetplay.py
@@ -2,29 +2,31 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
+ determine_ext,
float_or_none,
int_or_none,
)
class KonserthusetPlayIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?.*\bm=(?P<id>[^&]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P<id>[^&]+)'
+ _TESTS = [{
'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
+ 'md5': 'e3fd47bf44e864bd23c08e487abe1967',
'info_dict': {
'id': 'CKDDnlCY-dhWAAqiMERd-A',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Orkesterns instrument: Valthornen',
'description': 'md5:f10e1f0030202020396a4d712d2fa827',
'thumbnail': 're:^https?://.*$',
- 'duration': 398.8,
+ 'duration': 398.76,
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- }
+ }, {
+ 'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -42,12 +44,18 @@ class KonserthusetPlayIE(InfoExtractor):
player_config = media['playerconfig']
playlist = player_config['playlist']
- source = next(f for f in playlist if f.get('bitrates'))
+ source = next(f for f in playlist if f.get('bitrates') or f.get('provider'))
FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
formats = []
+ m3u8_url = source.get('url')
+ if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+
fallback_url = source.get('fallbackUrl')
fallback_format_id = None
if fallback_url:
@@ -97,6 +105,13 @@ class KonserthusetPlayIE(InfoExtractor):
thumbnail = media.get('image')
duration = float_or_none(media.get('duration'), 1000)
+ subtitles = {}
+ captions = source.get('captionsAvailableLanguages')
+ if isinstance(captions, dict):
+ for lang, subtitle_url in captions.items():
+ if lang != 'none' and isinstance(subtitle_url, compat_str):
+ subtitles.setdefault(lang, []).append({'url': subtitle_url})
+
return {
'id': video_id,
'title': title,
@@ -104,4 +119,5 @@ class KonserthusetPlayIE(InfoExtractor):
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
+ 'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py
index cf8876f..d27d052 100644
--- a/youtube_dl/extractor/krasview.py
+++ b/youtube_dl/extractor/krasview.py
@@ -23,7 +23,7 @@ class KrasViewIE(InfoExtractor):
'title': 'Снег, лёд, заносы',
'description': 'Снято в городе Нягань, в Ханты-Мансийском автономном округе.',
'duration': 27,
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
},
'params': {
'skip_download': 'Not accessible from Travis CI server',
diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py
index 2e66e8c..6a7e3ba 100644
--- a/youtube_dl/extractor/kusi.py
+++ b/youtube_dl/extractor/kusi.py
@@ -27,7 +27,7 @@ class KUSIIE(InfoExtractor):
'duration': 223.586,
'upload_date': '20160826',
'timestamp': 1472233118,
- 'thumbnail': 're:^https?://.*\.jpg$'
+ 'thumbnail': r're:^https?://.*\.jpg$'
},
}, {
'url': 'http://kusi.com/video?clipId=12203019',
diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py
index 2fab380..3190b18 100644
--- a/youtube_dl/extractor/laola1tv.py
+++ b/youtube_dl/extractor/laola1tv.py
@@ -1,25 +1,115 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlencode,
- compat_urlparse,
-)
from ..utils import (
ExtractorError,
- sanitized_Request,
unified_strdate,
urlencode_postdata,
xpath_element,
xpath_text,
+ urljoin,
+ update_url_query,
)
+class Laola1TvEmbedIE(InfoExtractor):
+ IE_NAME = 'laola1tv:embed'
+ _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
+ _TEST = {
+ # flashvars.premium = "false";
+ 'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024',
+ 'info_dict': {
+ 'id': '708065',
+ 'ext': 'mp4',
+ 'title': 'MA Long CHN - FAN Zhendong CHN',
+ 'uploader': 'ITTF - International Table Tennis Federation',
+ 'upload_date': '20161211',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ flash_vars = self._search_regex(
+ r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars')
+
+ def get_flashvar(x, *args, **kwargs):
+ flash_var = self._search_regex(
+ r'%s\s*:\s*"([^"]+)"' % x,
+ flash_vars, x, default=None)
+ if not flash_var:
+ flash_var = self._search_regex([
+ r'flashvars\.%s\s*=\s*"([^"]+)"' % x,
+ r'%s\s*=\s*"([^"]+)"' % x],
+ webpage, x, *args, **kwargs)
+ return flash_var
+
+ hd_doc = self._download_xml(
+ 'http://www.laola1.tv/server/hd_video.php', video_id, query={
+ 'play': get_flashvar('streamid'),
+ 'partner': get_flashvar('partnerid'),
+ 'portal': get_flashvar('portalid'),
+ 'lang': get_flashvar('sprache'),
+ 'v5ident': '',
+ })
+
+ _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
+ title = _v('title', fatal=True)
+
+ token_url = None
+ premium = get_flashvar('premium', default=None)
+ if premium:
+ token_url = update_url_query(
+ _v('url', fatal=True), {
+ 'timestamp': get_flashvar('timestamp'),
+ 'auth': get_flashvar('auth'),
+ })
+ else:
+ data_abo = urlencode_postdata(
+ dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
+ token_url = self._download_json(
+ 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access',
+ video_id, query={
+ 'videoId': _v('id'),
+ 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
+ 'label': _v('label'),
+ 'area': _v('area'),
+ }, data=data_abo)['data']['stream-access'][0]
+
+ token_doc = self._download_xml(
+ token_url, video_id, 'Downloading token',
+ headers=self.geo_verification_headers())
+
+ token_attrib = xpath_element(token_doc, './/token').attrib
+
+ if token_attrib['status'] != '0':
+ raise ExtractorError(
+ 'Token error: %s' % token_attrib['comment'], expected=True)
+
+ formats = self._extract_akamai_formats(
+ '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
+ video_id)
+ self._sort_formats(formats)
+
+ categories_str = _v('meta_sports')
+ categories = categories_str.split(',') if categories_str else []
+ is_live = _v('islive') == 'true'
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'upload_date': unified_strdate(_v('time_date')),
+ 'uploader': _v('meta_organisation'),
+ 'categories': categories,
+ 'is_live': is_live,
+ 'formats': formats,
+ }
+
+
class Laola1TvIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/(?P<kind>[^/]+)/(?P<slug>[^/?#&]+)'
+ IE_NAME = 'laola1tv'
+ _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
'info_dict': {
@@ -67,85 +157,20 @@ class Laola1TvIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('slug')
- kind = mobj.group('kind')
- lang = mobj.group('lang')
- portal = mobj.group('portal')
+ display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
if 'Dieser Livestream ist bereits beendet.' in webpage:
raise ExtractorError('This live stream has already finished.', expected=True)
- iframe_url = self._search_regex(
+ iframe_url = urljoin(url, self._search_regex(
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
- webpage, 'iframe url')
-
- video_id = self._search_regex(
- r'videoid=(\d+)', iframe_url, 'video id')
-
- iframe = self._download_webpage(compat_urlparse.urljoin(
- url, iframe_url), display_id, 'Downloading iframe')
-
- partner_id = self._search_regex(
- r'partnerid\s*:\s*(["\'])(?P<partner_id>.+?)\1',
- iframe, 'partner id', group='partner_id')
-
- hd_doc = self._download_xml(
- 'http://www.laola1.tv/server/hd_video.php?%s'
- % compat_urllib_parse_urlencode({
- 'play': video_id,
- 'partner': partner_id,
- 'portal': portal,
- 'lang': lang,
- 'v5ident': '',
- }), display_id)
-
- _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
- title = _v('title', fatal=True)
-
- VS_TARGETS = {
- 'video': '2',
- 'livestream': '17',
- }
-
- req = sanitized_Request(
- 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' %
- compat_urllib_parse_urlencode({
- 'videoId': video_id,
- 'target': VS_TARGETS.get(kind, '2'),
- 'label': _v('label'),
- 'area': _v('area'),
- }),
- urlencode_postdata(
- dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))))
-
- token_url = self._download_json(req, display_id)['data']['stream-access'][0]
- token_doc = self._download_xml(token_url, display_id, 'Downloading token')
-
- token_attrib = xpath_element(token_doc, './/token').attrib
- token_auth = token_attrib['auth']
-
- if token_auth in ('blocked', 'restricted', 'error'):
- raise ExtractorError(
- 'Token error: %s' % token_attrib['comment'], expected=True)
-
- formats = self._extract_f4m_formats(
- '%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth),
- video_id, f4m_id='hds')
- self._sort_formats(formats)
-
- categories_str = _v('meta_sports')
- categories = categories_str.split(',') if categories_str else []
+ webpage, 'iframe url'))
return {
- 'id': video_id,
+ '_type': 'url',
'display_id': display_id,
- 'title': title,
- 'upload_date': unified_strdate(_v('time_date')),
- 'uploader': _v('meta_organisation'),
- 'categories': categories,
- 'is_live': _v('islive') == 'true',
- 'formats': formats,
+ 'url': iframe_url,
+ 'ie_key': 'Laola1TvEmbed',
}
diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py
index c48a5aa..4321f90 100644
--- a/youtube_dl/extractor/leeco.py
+++ b/youtube_dl/extractor/leeco.py
@@ -386,8 +386,8 @@ class LetvCloudIE(InfoExtractor):
return formats
def _real_extract(self, url):
- uu_mobj = re.search('uu=([\w]+)', url)
- vu_mobj = re.search('vu=([\w]+)', url)
+ uu_mobj = re.search(r'uu=([\w]+)', url)
+ vu_mobj = re.search(r'vu=([\w]+)', url)
if not uu_mobj or not vu_mobj:
raise ExtractorError('Invalid URL: %s' % url, expected=True)
diff --git a/youtube_dl/extractor/lemonde.py b/youtube_dl/extractor/lemonde.py
index be66fff..42568f3 100644
--- a/youtube_dl/extractor/lemonde.py
+++ b/youtube_dl/extractor/lemonde.py
@@ -12,7 +12,7 @@ class LemondeIE(InfoExtractor):
'id': 'lqm3kl',
'ext': 'mp4',
'title': "Comprendre l'affaire Bygmalion en 5 minutes",
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 320,
'upload_date': '20160119',
'timestamp': 1453194778,
diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py
index 0a94366..40295a3 100644
--- a/youtube_dl/extractor/libraryofcongress.py
+++ b/youtube_dl/extractor/libraryofcongress.py
@@ -25,7 +25,7 @@ class LibraryOfCongressIE(InfoExtractor):
'id': '90716351',
'ext': 'mp4',
'title': "Pa's trip to Mars",
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 0,
'view_count': int,
},
diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py
index d375695..4750b03 100644
--- a/youtube_dl/extractor/libsyn.py
+++ b/youtube_dl/extractor/libsyn.py
@@ -41,7 +41,7 @@ class LibsynIE(InfoExtractor):
formats = [{
'url': media_url,
- } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
+ } for media_url in set(re.findall(r'var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
podcast_title = self._search_regex(
r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None)
diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py
index afce201..42e263b 100644
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dl/extractor/lifenews.py
@@ -176,7 +176,7 @@ class LifeEmbedIE(InfoExtractor):
'id': 'e50c2dec2867350528e2574c899b8291',
'ext': 'mp4',
'title': 'e50c2dec2867350528e2574c899b8291',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
}
}, {
# with 1080p
diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py
index b7bfa7a..e635f3c 100644
--- a/youtube_dl/extractor/limelight.py
+++ b/youtube_dl/extractor/limelight.py
@@ -59,14 +59,26 @@ class LimelightBaseIE(InfoExtractor):
format_id = 'rtmp'
if stream.get('videoBitRate'):
format_id += '-%d' % int_or_none(stream['videoBitRate'])
- http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:]
- urls.append(http_url)
- http_fmt = fmt.copy()
- http_fmt.update({
- 'url': http_url,
- 'format_id': format_id.replace('rtmp', 'http'),
- })
- formats.append(http_fmt)
+ http_format_id = format_id.replace('rtmp', 'http')
+
+ CDN_HOSTS = (
+ ('delvenetworks.com', 'cpl.delvenetworks.com'),
+ ('video.llnw.net', 's2.content.video.llnw.net'),
+ )
+ for cdn_host, http_host in CDN_HOSTS:
+ if cdn_host not in rtmp.group('host').lower():
+ continue
+ http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
+ urls.append(http_url)
+ if self._is_valid_url(http_url, video_id, http_format_id):
+ http_fmt = fmt.copy()
+ http_fmt.update({
+ 'url': http_url,
+ 'format_id': http_format_id,
+ })
+ formats.append(http_fmt)
+ break
+
fmt.update({
'url': rtmp.group('url'),
'play_path': rtmp.group('playpath'),
@@ -164,7 +176,7 @@ class LimelightMediaIE(LimelightBaseIE):
'ext': 'mp4',
'title': 'HaP and the HB Prince Trailer',
'description': 'md5:8005b944181778e313d95c1237ddb640',
- 'thumbnail': 're:^https?://.*\.jpeg$',
+ 'thumbnail': r're:^https?://.*\.jpeg$',
'duration': 144.23,
'timestamp': 1244136834,
'upload_date': '20090604',
@@ -181,7 +193,7 @@ class LimelightMediaIE(LimelightBaseIE):
'id': 'a3e00274d4564ec4a9b29b9466432335',
'ext': 'mp4',
'title': '3Play Media Overview Video',
- 'thumbnail': 're:^https?://.*\.jpeg$',
+ 'thumbnail': r're:^https?://.*\.jpeg$',
'duration': 78.101,
'timestamp': 1338929955,
'upload_date': '20120605',
diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py
index ded717c..337b1b1 100644
--- a/youtube_dl/extractor/litv.py
+++ b/youtube_dl/extractor/litv.py
@@ -31,7 +31,7 @@ class LiTVIE(InfoExtractor):
'id': 'VOD00041610',
'ext': 'mp4',
'title': '花千骨第1集',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
'episode_number': 1,
},
@@ -80,7 +80,7 @@ class LiTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
program_info = self._parse_json(self._search_regex(
- 'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
+ r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
video_id)
season_list = list(program_info.get('seasonList', {}).values())
diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py
index b84e4dd..c7de653 100644
--- a/youtube_dl/extractor/liveleak.py
+++ b/youtube_dl/extractor/liveleak.py
@@ -18,7 +18,7 @@ class LiveLeakIE(InfoExtractor):
'description': 'extremely bad day for this guy..!',
'uploader': 'ljfriel2',
'title': 'Most unlucky car accident',
- 'thumbnail': 're:^https?://.*\.jpg$'
+ 'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
@@ -29,7 +29,7 @@ class LiveLeakIE(InfoExtractor):
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
'uploader': 'ARD_Stinkt',
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
- 'thumbnail': 're:^https?://.*\.jpg$'
+ 'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
@@ -52,7 +52,7 @@ class LiveLeakIE(InfoExtractor):
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
'uploader': 'bony333',
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
- 'thumbnail': 're:^https?://.*\.jpg$'
+ 'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
# Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index bc7894b..c863413 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -37,7 +37,7 @@ class LivestreamIE(InfoExtractor):
'duration': 5968.0,
'like_count': int,
'view_count': int,
- 'thumbnail': 're:^http://.*\.jpg$'
+ 'thumbnail': r're:^http://.*\.jpg$'
}
}, {
'url': 'http://new.livestream.com/tedx/cityenglish',
diff --git a/youtube_dl/extractor/lnkgo.py b/youtube_dl/extractor/lnkgo.py
index fd23b0b..068378c 100644
--- a/youtube_dl/extractor/lnkgo.py
+++ b/youtube_dl/extractor/lnkgo.py
@@ -22,7 +22,7 @@ class LnkGoIE(InfoExtractor):
'description': 'md5:d82a5e36b775b7048617f263a0e3475e',
'age_limit': 7,
'duration': 3019,
- 'thumbnail': 're:^https?://.*\.jpg$'
+ 'thumbnail': r're:^https?://.*\.jpg$'
},
'params': {
'skip_download': True, # HLS download
@@ -37,7 +37,7 @@ class LnkGoIE(InfoExtractor):
'description': 'md5:7352d113a242a808676ff17e69db6a69',
'age_limit': 18,
'duration': 346,
- 'thumbnail': 're:^https?://.*\.jpg$'
+ 'thumbnail': r're:^https?://.*\.jpg$'
},
'params': {
'skip_download': True, # HLS download
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py
index f4dcfd9..da94eab 100644
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -73,7 +73,7 @@ class LyndaBaseIE(InfoExtractor):
# Already logged in
if any(re.search(p, signin_page) for p in (
- 'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
+ r'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
return
# Step 2: submit email
diff --git a/youtube_dl/extractor/matchtv.py b/youtube_dl/extractor/matchtv.py
index 33b0b53..bc9933a 100644
--- a/youtube_dl/extractor/matchtv.py
+++ b/youtube_dl/extractor/matchtv.py
@@ -14,7 +14,7 @@ class MatchTVIE(InfoExtractor):
'info_dict': {
'id': 'matchtv-live',
'ext': 'flv',
- 'title': 're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
},
'params': {
diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py
index 2100583..6e4290a 100644
--- a/youtube_dl/extractor/mdr.py
+++ b/youtube_dl/extractor/mdr.py
@@ -72,7 +72,7 @@ class MDRIE(InfoExtractor):
data_url = self._search_regex(
r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
- webpage, 'data url', group='url').replace('\/', '/')
+ webpage, 'data url', group='url').replace(r'\/', '/')
doc = self._download_xml(
compat_urlparse.urljoin(url, data_url), video_id)
diff --git a/youtube_dl/extractor/meipai.py b/youtube_dl/extractor/meipai.py
new file mode 100644
index 0000000..c8eacb4
--- /dev/null
+++ b/youtube_dl/extractor/meipai.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ unified_timestamp,
+)
+
+
+class MeipaiIE(InfoExtractor):
+ IE_DESC = '美拍'
+ _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
+ _TESTS = [{
+ # regular uploaded video
+ 'url': 'http://www.meipai.com/media/531697625',
+ 'md5': 'e3e9600f9e55a302daecc90825854b4f',
+ 'info_dict': {
+ 'id': '531697625',
+ 'ext': 'mp4',
+ 'title': '#葉子##阿桑##余姿昀##超級女聲#',
+ 'description': '#葉子##阿桑##余姿昀##超級女聲#',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 152,
+ 'timestamp': 1465492420,
+ 'upload_date': '20160609',
+ 'view_count': 35511,
+ 'creator': '她她-TATA',
+ 'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'],
+ }
+ }, {
+ # record of live streaming
+ 'url': 'http://www.meipai.com/media/585526361',
+ 'md5': 'ff7d6afdbc6143342408223d4f5fb99a',
+ 'info_dict': {
+ 'id': '585526361',
+ 'ext': 'mp4',
+ 'title': '姿昀和善願 練歌練琴啦😁😁😁',
+ 'description': '姿昀和善願 練歌練琴啦😁😁😁',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 5975,
+ 'timestamp': 1474311799,
+ 'upload_date': '20160919',
+ 'view_count': 1215,
+ 'creator': '她她-TATA',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._html_search_regex(
+ r'<title[^>]*>([^<]+)</title>', webpage, 'title')
+
+ formats = []
+
+ # recorded playback of live streaming
+ m3u8_url = self._html_search_regex(
+ r'file:\s*encodeURIComponent\((["\'])(?P<url>(?:(?!\1).)+)\1\)',
+ webpage, 'm3u8 url', group='url', default=None)
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ if not formats:
+ # regular uploaded video
+ video_url = self._search_regex(
+ r'data-video=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'video url',
+ group='url', default=None)
+ if video_url:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http',
+ })
+
+ timestamp = unified_timestamp(self._og_search_property(
+ 'video:release_date', webpage, 'release date', fatal=False))
+
+ tags = self._og_search_property(
+ 'video:tag', webpage, 'tags', default='').split(',')
+
+ view_count = int_or_none(self._html_search_meta(
+ 'interactionCount', webpage, 'view count'))
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration'))
+ creator = self._og_search_property(
+ 'video:director', webpage, 'creator', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'creator': creator,
+ 'tags': tags,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/melonvod.py b/youtube_dl/extractor/melonvod.py
new file mode 100644
index 0000000..bd8cf13
--- /dev/null
+++ b/youtube_dl/extractor/melonvod.py
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ urljoin,
+)
+
+
+class MelonVODIE(InfoExtractor):
+ _VALID_URL = r'https?://vod\.melon\.com/video/detail2\.html?\?.*?mvId=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://vod.melon.com/video/detail2.htm?mvId=50158734',
+ 'info_dict': {
+ 'id': '50158734',
+ 'ext': 'mp4',
+ 'title': "Jessica 'Wonderland' MV Making Film",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'artist': 'Jessica (제시카)',
+ 'upload_date': '20161212',
+ 'duration': 203,
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ play_info = self._download_json(
+ 'http://vod.melon.com/video/playerInfo.json', video_id,
+ note='Downloading player info JSON', query={'mvId': video_id})
+
+ title = play_info['mvInfo']['MVTITLE']
+
+ info = self._download_json(
+ 'http://vod.melon.com/delivery/streamingInfo.json', video_id,
+ note='Downloading streaming info JSON',
+ query={
+ 'contsId': video_id,
+ 'contsType': 'VIDEO',
+ })
+
+ stream_info = info['streamingInfo']
+
+ formats = self._extract_m3u8_formats(
+ stream_info['encUrl'], video_id, 'mp4', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ artist_list = play_info.get('artistList')
+ artist = None
+ if isinstance(artist_list, list):
+ artist = ', '.join(
+ [a['ARTISTNAMEWEBLIST']
+ for a in artist_list if a.get('ARTISTNAMEWEBLIST')])
+
+ thumbnail = urljoin(info.get('staticDomain'), stream_info.get('imgPath'))
+
+ duration = int_or_none(stream_info.get('playTime'))
+ upload_date = stream_info.get('mvSvcOpenDt', '')[:8] or None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'artist': artist,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'formats': formats
+ }
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py
index e6e7659..9880924 100644
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -133,7 +133,7 @@ class MetacafeIE(InfoExtractor):
video_id, display_id = re.match(self._VALID_URL, url).groups()
# the video may come from an external site
- m_external = re.match('^(\w{2})-(.*)$', video_id)
+ m_external = re.match(r'^(\w{2})-(.*)$', video_id)
if m_external is not None:
prefix, ext_id = m_external.groups()
# Check if video comes from YouTube
diff --git a/youtube_dl/extractor/mgoon.py b/youtube_dl/extractor/mgoon.py
index 94bc87b..7bb4739 100644
--- a/youtube_dl/extractor/mgoon.py
+++ b/youtube_dl/extractor/mgoon.py
@@ -27,7 +27,7 @@ class MgoonIE(InfoExtractor):
'upload_date': '20131220',
'ext': 'mp4',
'title': 'md5:543aa4c27a4931d371c3f433e8cebebc',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
},
{
diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py
index e0bb5d2..659ede8 100644
--- a/youtube_dl/extractor/mgtv.py
+++ b/youtube_dl/extractor/mgtv.py
@@ -18,7 +18,7 @@ class MGTVIE(InfoExtractor):
'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗',
'description': '我是歌手第四季双年巅峰会',
'duration': 7461,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
# no tbr extracted from stream_url
diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py
index e6730b7..dccc542 100644
--- a/youtube_dl/extractor/minhateca.py
+++ b/youtube_dl/extractor/minhateca.py
@@ -19,7 +19,7 @@ class MinhatecaIE(InfoExtractor):
'id': '125848331',
'ext': 'mp4',
'title': 'youtube-dl test video',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'filesize_approx': 1530000,
'duration': 9,
'view_count': int,
diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py
index 10190d5..8ad9239 100644
--- a/youtube_dl/extractor/ministrygrid.py
+++ b/youtube_dl/extractor/ministrygrid.py
@@ -17,7 +17,7 @@ class MinistryGridIE(InfoExtractor):
'id': '3453494717001',
'ext': 'mp4',
'title': 'The Gospel by Numbers',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'upload_date': '20140410',
'description': 'Coming soon from T4G 2014!',
'uploader_id': '2034960640001',
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
index f577836..79e0b8a 100644
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -90,7 +90,7 @@ class MiTeleIE(InfoExtractor):
'season_id': 'diario_de_t14_11981',
'episode': 'Programa 144',
'episode_number': 3,
- 'thumbnail': 're:(?i)^https?://.*\.jpg$',
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
'duration': 2913,
},
'add_ie': ['Ooyala'],
@@ -108,7 +108,7 @@ class MiTeleIE(InfoExtractor):
'season_id': 'cuarto_milenio_t06_12715',
'episode': 'Programa 226',
'episode_number': 24,
- 'thumbnail': 're:(?i)^https?://.*\.jpg$',
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
'duration': 7313,
},
'params': {
@@ -190,7 +190,7 @@ class MiTeleIE(InfoExtractor):
return {
'_type': 'url_transparent',
# for some reason only HLS is supported
- 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}),
+ 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8,dash'}),
'id': video_id,
'title': title,
'description': description,
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 560fe18..a24b316 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -16,13 +16,12 @@ from ..utils import (
clean_html,
ExtractorError,
OnDemandPagedList,
- parse_count,
str_to_int,
)
class MixcloudIE(InfoExtractor):
- _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
+ _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
IE_NAME = 'mixcloud'
_TESTS = [{
@@ -34,9 +33,8 @@ class MixcloudIE(InfoExtractor):
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
'uploader': 'Daniel Holbach',
'uploader_id': 'dholbach',
- 'thumbnail': 're:https?://.*\.jpg',
+ 'thumbnail': r're:https?://.*\.jpg',
'view_count': int,
- 'like_count': int,
},
}, {
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
@@ -49,8 +47,10 @@ class MixcloudIE(InfoExtractor):
'uploader_id': 'gillespeterson',
'thumbnail': 're:https?://.*',
'view_count': int,
- 'like_count': int,
},
+ }, {
+ 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
+ 'only_matching': True,
}]
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
@@ -86,26 +86,18 @@ class MixcloudIE(InfoExtractor):
song_url = play_info['stream_url']
- PREFIX = (
- r'm-play-on-spacebar[^>]+'
- r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
- title = self._html_search_regex(
- PREFIX + r'm-title="([^"]+)"', webpage, 'title')
+ title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
thumbnail = self._proto_relative_url(self._html_search_regex(
- PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail',
- fatal=False))
+ r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
uploader = self._html_search_regex(
- PREFIX + r'm-owner-name="([^"]+)"',
- webpage, 'uploader', fatal=False)
+ r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
uploader_id = self._search_regex(
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
description = self._og_search_description(webpage)
- like_count = parse_count(self._search_regex(
- r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)',
- webpage, 'like count', default=None))
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
- r'/listeners/?">([0-9,.]+)</a>'],
+ r'/listeners/?">([0-9,.]+)</a>',
+ r'm-tooltip=["\']([\d,.]+) plays'],
webpage, 'play count', default=None))
return {
@@ -117,7 +109,6 @@ class MixcloudIE(InfoExtractor):
'uploader': uploader,
'uploader_id': uploader_id,
'view_count': view_count,
- 'like_count': like_count,
}
diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py
index e242b89..59cd4b8 100644
--- a/youtube_dl/extractor/mlb.py
+++ b/youtube_dl/extractor/mlb.py
@@ -37,7 +37,7 @@ class MLBIE(InfoExtractor):
'duration': 66,
'timestamp': 1405980600,
'upload_date': '20140721',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
@@ -51,7 +51,7 @@ class MLBIE(InfoExtractor):
'duration': 46,
'timestamp': 1405105800,
'upload_date': '20140711',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
@@ -65,7 +65,7 @@ class MLBIE(InfoExtractor):
'duration': 488,
'timestamp': 1405399936,
'upload_date': '20140715',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
@@ -79,7 +79,7 @@ class MLBIE(InfoExtractor):
'duration': 52,
'timestamp': 1405390722,
'upload_date': '20140715',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
diff --git a/youtube_dl/extractor/mnet.py b/youtube_dl/extractor/mnet.py
index e3f42e7..6a85dcb 100644
--- a/youtube_dl/extractor/mnet.py
+++ b/youtube_dl/extractor/mnet.py
@@ -22,7 +22,7 @@ class MnetIE(InfoExtractor):
'timestamp': 1451564040,
'age_limit': 0,
'thumbnails': 'mincount:5',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'ext': 'flv',
},
'params': {
diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py
index 91ee9c4..44bcc49 100644
--- a/youtube_dl/extractor/moevideo.py
+++ b/youtube_dl/extractor/moevideo.py
@@ -30,7 +30,7 @@ class MoeVideoIE(InfoExtractor):
'ext': 'flv',
'title': 'Sink cut out machine',
'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'width': 540,
'height': 360,
'duration': 179,
@@ -46,7 +46,7 @@ class MoeVideoIE(InfoExtractor):
'ext': 'flv',
'title': 'Operacion Condor.',
'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'width': 480,
'height': 296,
'duration': 6027,
diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py
index e3bbe5a..54716f5 100644
--- a/youtube_dl/extractor/mofosex.py
+++ b/youtube_dl/extractor/mofosex.py
@@ -18,7 +18,7 @@ class MofosexIE(KeezMoviesIE):
'display_id': 'amateur-teen-playing-and-masturbating-318131',
'ext': 'mp4',
'title': 'amateur teen playing and masturbating',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20121114',
'view_count': int,
'like_count': int,
diff --git a/youtube_dl/extractor/mojvideo.py b/youtube_dl/extractor/mojvideo.py
index 0ba435d..165e658 100644
--- a/youtube_dl/extractor/mojvideo.py
+++ b/youtube_dl/extractor/mojvideo.py
@@ -20,7 +20,7 @@ class MojvideoIE(InfoExtractor):
'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic',
'ext': 'mp4',
'title': 'V avtu pred mano rdečelaska - Alfi Nipič',
- 'thumbnail': 're:^http://.*\.jpg$',
+ 'thumbnail': r're:^http://.*\.jpg$',
'duration': 242,
}
}
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index 5e1a8a7..6fe3b60 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -23,7 +23,7 @@ class MotherlessIE(InfoExtractor):
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
'upload_date': '20100913',
'uploader_id': 'famouslyfuckedup',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'age_limit': 18,
}
}, {
@@ -37,7 +37,7 @@ class MotherlessIE(InfoExtractor):
'game', 'hairy'],
'upload_date': '20140622',
'uploader_id': 'Sulivana7x',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'age_limit': 18,
},
'skip': '404',
@@ -51,7 +51,7 @@ class MotherlessIE(InfoExtractor):
'categories': ['superheroine heroine superher'],
'upload_date': '20140827',
'uploader_id': 'shade0230',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'age_limit': 18,
}
}, {
diff --git a/youtube_dl/extractor/movieclips.py b/youtube_dl/extractor/movieclips.py
index 30c206f..5453da1 100644
--- a/youtube_dl/extractor/movieclips.py
+++ b/youtube_dl/extractor/movieclips.py
@@ -20,7 +20,7 @@ class MovieClipsIE(InfoExtractor):
'ext': 'mp4',
'title': 'Warcraft Trailer 1',
'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1446843055,
'upload_date': '20151106',
'uploader': 'Movieclips',
diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py
index 478e399..85cc6e2 100644
--- a/youtube_dl/extractor/moviezine.py
+++ b/youtube_dl/extractor/moviezine.py
@@ -16,7 +16,7 @@ class MoviezineIE(InfoExtractor):
'ext': 'mp4',
'title': 'Oculus - Trailer 1',
'description': 'md5:40cc6790fc81d931850ca9249b40e8a4',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
},
}
diff --git a/youtube_dl/extractor/movingimage.py b/youtube_dl/extractor/movingimage.py
index bb789c3..4f62d62 100644
--- a/youtube_dl/extractor/movingimage.py
+++ b/youtube_dl/extractor/movingimage.py
@@ -18,7 +18,7 @@ class MovingImageIE(InfoExtractor):
'title': 'SHETLAND WOOL',
'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
'duration': 900,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}
diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py
index d75ce8b..1473bcf 100644
--- a/youtube_dl/extractor/msn.py
+++ b/youtube_dl/extractor/msn.py
@@ -78,11 +78,6 @@ class MSNIE(InfoExtractor):
m3u8_formats = self._extract_m3u8_formats(
format_url, display_id, 'mp4',
m3u8_id='hls', fatal=False)
- # Despite metadata in m3u8 all video+audio formats are
- # actually video-only (no audio)
- for f in m3u8_formats:
- if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
- f['acodec'] = 'none'
formats.extend(m3u8_formats)
else:
formats.append({
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 0335191..8acea14 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -13,11 +13,11 @@ from ..utils import (
fix_xml_ampersands,
float_or_none,
HEADRequest,
- NO_DEFAULT,
RegexNotFoundError,
sanitized_Request,
strip_or_none,
timeconvert,
+ try_get,
unescapeHTML,
update_url_query,
url_basename,
@@ -42,15 +42,6 @@ class MTVServicesInfoExtractor(InfoExtractor):
# Remove the templates, like &device={device}
return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
- # This was originally implemented for ComedyCentral, but it also works here
- @classmethod
- def _transform_rtmp_url(cls, rtmp_video_url):
- m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
- if not m:
- return {'rtmp': rtmp_video_url}
- base = 'http://viacommtvstrmfs.fplive.net/'
- return {'http': base + m.group('finalid')}
-
def _get_feed_url(self, uri):
return self._FEED_URL
@@ -77,7 +68,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
return [{'url': url, 'ext': 'mp4'}]
- def _extract_video_formats(self, mdoc, mtvn_id):
+ def _extract_video_formats(self, mdoc, mtvn_id, video_id):
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4|copyright_error\.flv(?:\?geo\b.+?)?)$', mdoc.find('.//src').text) is not None:
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
self.to_screen('The normal version is not available from your '
@@ -88,21 +79,33 @@ class MTVServicesInfoExtractor(InfoExtractor):
formats = []
for rendition in mdoc.findall('.//rendition'):
- try:
- _, _, ext = rendition.attrib['type'].partition('/')
- rtmp_video_url = rendition.find('./src').text
- if rtmp_video_url.endswith('siteunavail.png'):
- continue
- new_urls = self._transform_rtmp_url(rtmp_video_url)
- formats.extend([{
- 'ext': 'flv' if new_url.startswith('rtmp') else ext,
- 'url': new_url,
- 'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])),
- 'width': int(rendition.get('width')),
- 'height': int(rendition.get('height')),
- } for kind, new_url in new_urls.items()])
- except (KeyError, TypeError):
- raise ExtractorError('Invalid rendition field.')
+ if rendition.get('method') == 'hls':
+ hls_url = rendition.find('./src').text
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls'))
+ else:
+ # fms
+ try:
+ _, _, ext = rendition.attrib['type'].partition('/')
+ rtmp_video_url = rendition.find('./src').text
+ if 'error_not_available.swf' in rtmp_video_url:
+ raise ExtractorError(
+ '%s said: video is not available' % self.IE_NAME,
+ expected=True)
+ if rtmp_video_url.endswith('siteunavail.png'):
+ continue
+ formats.extend([{
+ 'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext,
+ 'url': rtmp_video_url,
+ 'format_id': '-'.join(filter(None, [
+ 'rtmp' if rtmp_video_url.startswith('rtmp') else None,
+ rendition.get('bitrate')])),
+ 'width': int(rendition.get('width')),
+ 'height': int(rendition.get('height')),
+ }])
+ except (KeyError, TypeError):
+ raise ExtractorError('Invalid rendition field.')
self._sort_formats(formats)
return formats
@@ -118,15 +121,17 @@ class MTVServicesInfoExtractor(InfoExtractor):
} for typographic in transcript.findall('./typographic')]
return subtitles
- def _get_video_info(self, itemdoc):
+ def _get_video_info(self, itemdoc, use_hls=True):
uri = itemdoc.find('guid').text
video_id = self._id_from_uri(uri)
self.report_extraction(video_id)
content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
mediagen_url = self._remove_template_parameter(content_el.attrib['url'])
+ mediagen_url = mediagen_url.replace('device={device}', '')
if 'acceptMethods' not in mediagen_url:
mediagen_url += '&' if '?' in mediagen_url else '?'
- mediagen_url += 'acceptMethods=fms'
+ mediagen_url += 'acceptMethods='
+ mediagen_url += 'hls' if use_hls else 'fms'
mediagen_doc = self._download_xml(mediagen_url, video_id,
'Downloading video urls')
@@ -167,9 +172,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
if mtvn_id_node is not None:
mtvn_id = mtvn_id_node.text
+ formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id)
+
return {
'title': title,
- 'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
+ 'formats': formats,
'subtitles': self._extract_subtitles(mediagen_doc, mtvn_id),
'id': video_id,
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
@@ -184,13 +191,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
data['lang'] = self._LANG
return data
- def _get_videos_info(self, uri):
+ def _get_videos_info(self, uri, use_hls=True):
video_id = self._id_from_uri(uri)
feed_url = self._get_feed_url(uri)
info_url = update_url_query(feed_url, self._get_feed_query(uri))
- return self._get_videos_info_from_url(info_url, video_id)
+ return self._get_videos_info_from_url(info_url, video_id, use_hls)
- def _get_videos_info_from_url(self, url, video_id):
+ def _get_videos_info_from_url(self, url, video_id, use_hls=True):
idoc = self._download_xml(
url, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
@@ -199,10 +206,31 @@ class MTVServicesInfoExtractor(InfoExtractor):
description = xpath_text(idoc, './channel/description')
return self.playlist_result(
- [self._get_video_info(item) for item in idoc.findall('.//item')],
+ [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')],
playlist_title=title, playlist_description=description)
- def _extract_mgid(self, webpage, default=NO_DEFAULT):
+ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
+ triforce_feed = self._parse_json(self._search_regex(
+ r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage,
+ 'triforce feed', default='{}'), video_id, fatal=False)
+
+ data_zone = self._search_regex(
+ r'data-zone=(["\'])(?P<zone>.+?_lc_promo.*?)\1', webpage,
+ 'data zone', default=data_zone, group='zone')
+
+ feed_url = try_get(
+ triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'],
+ compat_str)
+ if not feed_url:
+ return
+
+ feed = self._download_json(feed_url, video_id, fatal=False)
+ if not feed:
+ return
+
+ return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
+
+ def _extract_mgid(self, webpage):
try:
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
# or http://media.mtvnservices.com/{mgid}
@@ -222,7 +250,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
sm4_embed = self._html_search_meta(
'sm4:video:embed', webpage, 'sm4 embed', default='')
mgid = self._search_regex(
- r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default)
+ r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
+
+ if not mgid:
+ mgid = self._extract_triforce_mgid(webpage)
+
return mgid
def _real_extract(self, url):
@@ -272,7 +304,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
class MTVIE(MTVServicesInfoExtractor):
IE_NAME = 'mtv'
- _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|full-episodes)/(?P<id>[^/?#.]+)'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
_FEED_URL = 'http://www.mtv.com/feeds/mrss/'
_TESTS = [{
@@ -289,9 +321,41 @@ class MTVIE(MTVServicesInfoExtractor):
}, {
'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101',
'only_matching': True,
+ }, {
+ 'url': 'http://www.mtv.com/episodes/g8xu7q/teen-mom-2-breaking-the-wall-season-7-ep-713',
+ 'only_matching': True,
}]
+class MTV81IE(InfoExtractor):
+ IE_NAME = 'mtv81'
+ _VALID_URL = r'https?://(?:www\.)?mtv81\.com/videos/(?P<id>[^/?#.]+)'
+
+ _TEST = {
+ 'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/',
+ 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
+ 'info_dict': {
+ 'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
+ 'ext': 'mp4',
+ 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
+ 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
+ 'timestamp': 1468846800,
+ 'upload_date': '20160718',
+ },
+ }
+
+ def _extract_mgid(self, webpage):
+ return self._search_regex(
+ r'getTheVideo\((["\'])(?P<id>mgid:.+?)\1', webpage,
+ 'mgid', group='id')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ mgid = self._extract_mgid(webpage)
+ return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
+
+
class MTVVideoIE(MTVServicesInfoExtractor):
IE_NAME = 'mtv:video'
_VALID_URL = r'''(?x)^https?://
diff --git a/youtube_dl/extractor/muenchentv.py b/youtube_dl/extractor/muenchentv.py
index d9f1761..2cc2bf2 100644
--- a/youtube_dl/extractor/muenchentv.py
+++ b/youtube_dl/extractor/muenchentv.py
@@ -22,7 +22,7 @@ class MuenchenTVIE(InfoExtractor):
'ext': 'mp4',
'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'is_live': True,
- 'thumbnail': 're:^https?://.*\.jpg$'
+ 'thumbnail': r're:^https?://.*\.jpg$'
},
'params': {
'skip_download': True,
diff --git a/youtube_dl/extractor/mwave.py b/youtube_dl/extractor/mwave.py
index fea1caf..a672765 100644
--- a/youtube_dl/extractor/mwave.py
+++ b/youtube_dl/extractor/mwave.py
@@ -18,7 +18,7 @@ class MwaveIE(InfoExtractor):
'id': '168859',
'ext': 'flv',
'title': '[M COUNTDOWN] SISTAR - SHAKE IT',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'M COUNTDOWN',
'duration': 206,
'view_count': int,
@@ -70,7 +70,7 @@ class MwaveMeetGreetIE(InfoExtractor):
'id': '173294',
'ext': 'flv',
'title': '[MEET&GREET] Park BoRam',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Mwave',
'duration': 3634,
'view_count': int,
diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py
index ab32e63..f281238 100644
--- a/youtube_dl/extractor/myspace.py
+++ b/youtube_dl/extractor/myspace.py
@@ -17,9 +17,10 @@ class MySpaceIE(InfoExtractor):
_TESTS = [
{
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
+ 'md5': '9c1483c106f4a695c47d2911feed50a7',
'info_dict': {
'id': '109594919',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Little Big Town',
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
'uploader': 'Five Minutes to the Stage',
@@ -27,37 +28,30 @@ class MySpaceIE(InfoExtractor):
'timestamp': 1414108751,
'upload_date': '20141023',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
},
# songs
{
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
+ 'md5': '1d7ee4604a3da226dd69a123f748b262',
'info_dict': {
'id': '93388656',
- 'ext': 'flv',
+ 'ext': 'm4a',
'title': 'Of weakened soul...',
'uploader': 'Killsorrow',
'uploader_id': 'killsorrow',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
}, {
- 'add_ie': ['Vevo'],
+ 'add_ie': ['Youtube'],
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
'info_dict': {
- 'id': 'USZM20600099',
- 'ext': 'mp4',
- 'title': 'Animal I Have Become',
- 'uploader': 'Three Days Grace',
- 'timestamp': int,
- 'upload_date': '20060502',
+ 'id': 'xqds0B_meys',
+ 'ext': 'webm',
+ 'title': 'Three Days Grace - Animal I Have Become',
+ 'description': 'md5:8bd86b3693e72a077cf863a8530c54bb',
+ 'uploader': 'ThreeDaysGraceVEVO',
+ 'uploader_id': 'ThreeDaysGraceVEVO',
+ 'upload_date': '20091002',
},
- 'skip': 'VEVO is only available in some countries',
}, {
'add_ie': ['Youtube'],
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
@@ -76,24 +70,46 @@ class MySpaceIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ is_song = mobj.group('mediatype').startswith('music/song')
webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(
- r'playerSwf":"([^"?]*)', webpage, 'player URL')
+ r'videoSwf":"([^"?]*)', webpage, 'player URL', fatal=False)
- def rtmp_format_from_stream_url(stream_url, width=None, height=None):
- rtmp_url, play_path = stream_url.split(';', 1)
- return {
- 'format_id': 'rtmp',
- 'url': rtmp_url,
- 'play_path': play_path,
- 'player_url': player_url,
- 'protocol': 'rtmp',
- 'ext': 'flv',
- 'width': width,
- 'height': height,
- }
+ def formats_from_stream_urls(stream_url, hls_stream_url, http_stream_url, width=None, height=None):
+ formats = []
+ vcodec = 'none' if is_song else None
+ if hls_stream_url:
+ formats.append({
+ 'format_id': 'hls',
+ 'url': hls_stream_url,
+ 'protocol': 'm3u8_native',
+ 'ext': 'm4a' if is_song else 'mp4',
+ 'vcodec': vcodec,
+ })
+ if stream_url and player_url:
+ rtmp_url, play_path = stream_url.split(';', 1)
+ formats.append({
+ 'format_id': 'rtmp',
+ 'url': rtmp_url,
+ 'play_path': play_path,
+ 'player_url': player_url,
+ 'protocol': 'rtmp',
+ 'ext': 'flv',
+ 'width': width,
+ 'height': height,
+ 'vcodec': vcodec,
+ })
+ if http_stream_url:
+ formats.append({
+ 'format_id': 'http',
+ 'url': http_stream_url,
+ 'width': width,
+ 'height': height,
+ 'vcodec': vcodec,
+ })
+ return formats
- if mobj.group('mediatype').startswith('music/song'):
+ if is_song:
# songs don't store any useful info in the 'context' variable
song_data = self._search_regex(
r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
@@ -108,8 +124,10 @@ class MySpaceIE(InfoExtractor):
return self._search_regex(
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
song_data, name, default='', group='data')
- stream_url = search_data('stream-url')
- if not stream_url:
+ formats = formats_from_stream_urls(
+ search_data('stream-url'), search_data('hls-stream-url'),
+ search_data('http-stream-url'))
+ if not formats:
vevo_id = search_data('vevo-id')
youtube_id = search_data('youtube-id')
if vevo_id:
@@ -121,6 +139,7 @@ class MySpaceIE(InfoExtractor):
else:
raise ExtractorError(
'Found song but don\'t know how to download it')
+ self._sort_formats(formats)
return {
'id': video_id,
'title': self._og_search_title(webpage),
@@ -128,27 +147,16 @@ class MySpaceIE(InfoExtractor):
'uploader_id': search_data('artist-username'),
'thumbnail': self._og_search_thumbnail(webpage),
'duration': int_or_none(search_data('duration')),
- 'formats': [rtmp_format_from_stream_url(stream_url)]
+ 'formats': formats,
}
else:
video = self._parse_json(self._search_regex(
r'context = ({.*?});', webpage, 'context'),
video_id)['video']
- formats = []
- hls_stream_url = video.get('hlsStreamUrl')
- if hls_stream_url:
- formats.append({
- 'format_id': 'hls',
- 'url': hls_stream_url,
- 'protocol': 'm3u8_native',
- 'ext': 'mp4',
- })
- stream_url = video.get('streamUrl')
- if stream_url:
- formats.append(rtmp_format_from_stream_url(
- stream_url,
- int_or_none(video.get('width')),
- int_or_none(video.get('height'))))
+ formats = formats_from_stream_urls(
+ video.get('streamUrl'), video.get('hlsStreamUrl'),
+ video.get('mp4StreamUrl'), int_or_none(video.get('width')),
+ int_or_none(video.get('height')))
self._sort_formats(formats)
return {
'id': video_id,
diff --git a/youtube_dl/extractor/myvi.py b/youtube_dl/extractor/myvi.py
index 4c65be1..621ae74 100644
--- a/youtube_dl/extractor/myvi.py
+++ b/youtube_dl/extractor/myvi.py
@@ -27,7 +27,7 @@ class MyviIE(SprutoBaseIE):
'id': 'f16b2bbd-cde8-481c-a981-7cd48605df43',
'ext': 'mp4',
'title': 'хозяин жизни',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 25,
},
}, {
diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py
index 6d447a4..6bb64eb 100644
--- a/youtube_dl/extractor/myvideo.py
+++ b/youtube_dl/extractor/myvideo.py
@@ -160,7 +160,7 @@ class MyVideoIE(InfoExtractor):
else:
video_playpath = ''
- video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
+ video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
video_swfobj = compat_urllib_parse_unquote(video_swfobj)
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
index 055070f..e813133 100644
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -12,10 +12,10 @@ from ..utils import (
class NaverIE(InfoExtractor):
- _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://tvcast.naver.com/v/81652',
+ 'url': 'http://tv.naver.com/v/81652',
'info_dict': {
'id': '81652',
'ext': 'mp4',
@@ -24,7 +24,7 @@ class NaverIE(InfoExtractor):
'upload_date': '20130903',
},
}, {
- 'url': 'http://tvcast.naver.com/v/395837',
+ 'url': 'http://tv.naver.com/v/395837',
'md5': '638ed4c12012c458fefcddfd01f173cd',
'info_dict': {
'id': '395837',
@@ -34,6 +34,9 @@ class NaverIE(InfoExtractor):
'upload_date': '20150519',
},
'skip': 'Georestricted',
+ }, {
+ 'url': 'http://tvcast.naver.com/v/81652',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index 7f1bd92..434a94d 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -9,6 +9,7 @@ from ..utils import (
lowercase_escape,
smuggle_url,
unescapeHTML,
+ update_url_query,
)
@@ -208,7 +209,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
'md5': 'af1adfa51312291a017720403826bb64',
'info_dict': {
- 'id': '269389891880',
+ 'id': 'p_tweet_snow_140529',
'ext': 'mp4',
'title': 'How Twitter Reacted To The Snowden Interview',
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
@@ -232,7 +233,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
'info_dict': {
- 'id': '394064451844',
+ 'id': 'nn_netcast_150204',
'ext': 'mp4',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
@@ -245,7 +246,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
'md5': 'a49e173825e5fcd15c13fc297fced39d',
'info_dict': {
- 'id': '529953347624',
+ 'id': 'x_lon_vwhorn_150922',
'ext': 'mp4',
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
@@ -258,7 +259,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
'md5': '118d7ca3f0bea6534f119c68ef539f71',
'info_dict': {
- 'id': '669831235788',
+ 'id': 'tdy_al_space_160420',
'ext': 'mp4',
'title': 'See the aurora borealis from space in stunning new NASA video',
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
@@ -271,15 +272,14 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
'info_dict': {
- 'id': '314487875924',
+ 'id': 'n_hayes_Aimm_140801_272214',
'ext': 'mp4',
'title': 'The chaotic GOP immigration vote',
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1406937606,
'upload_date': '20140802',
'uploader': 'NBCU-NEWS',
- 'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
},
},
{
@@ -311,28 +311,41 @@ class NBCNewsIE(ThePlatformIE):
else:
# "feature" and "nightly-news" pages use theplatform.com
video_id = mobj.group('mpx_id')
- if not video_id.isdigit():
- webpage = self._download_webpage(url, video_id)
- info = None
- bootstrap_json = self._search_regex(
- [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
- r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
- webpage, 'bootstrap json', default=None)
+ webpage = self._download_webpage(url, video_id)
+
+ filter_param = 'byId'
+ bootstrap_json = self._search_regex(
+ [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
+ r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"',
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'],
+ webpage, 'bootstrap json', default=None)
+ if bootstrap_json:
bootstrap = self._parse_json(
bootstrap_json, video_id, transform_source=unescapeHTML)
+
+ info = None
if 'results' in bootstrap:
info = bootstrap['results'][0]['video']
elif 'video' in bootstrap:
info = bootstrap['video']
+ elif 'msnbcVideoInfo' in bootstrap:
+ info = bootstrap['msnbcVideoInfo']['meta']
+ elif 'msnbcThePlatform' in bootstrap:
+ info = bootstrap['msnbcThePlatform']['videoPlayer']['video']
else:
info = bootstrap
- video_id = info['mpxId']
+
+ if 'guid' in info:
+ video_id = info['guid']
+ filter_param = 'byGuid'
+ elif 'mpxId' in info:
+ video_id = info['mpxId']
return {
'_type': 'url_transparent',
'id': video_id,
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
- 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id,
+ 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}),
'ie_key': 'ThePlatformFeed',
}
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index e3b0da2..07528d1 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -302,7 +302,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'info_dict': {
'id': 'livestream217',
'ext': 'flv',
- 'title': 're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
'upload_date': '20150910',
},
@@ -367,7 +367,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'info_dict': {
'id': 'webradioweltweit100',
'ext': 'mp3',
- 'title': 're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
'uploader': 'njoy',
'upload_date': '20150810',
diff --git a/youtube_dl/extractor/ndtv.py b/youtube_dl/extractor/ndtv.py
index 96528f6..255f608 100644
--- a/youtube_dl/extractor/ndtv.py
+++ b/youtube_dl/extractor/ndtv.py
@@ -21,7 +21,7 @@ class NDTVIE(InfoExtractor):
'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
'upload_date': '20131208',
'duration': 1327,
- 'thumbnail': 're:https?://.*\.jpg',
+ 'thumbnail': r're:https?://.*\.jpg',
},
}
diff --git a/youtube_dl/extractor/netzkino.py b/youtube_dl/extractor/netzkino.py
index 0d165a8..aec3026 100644
--- a/youtube_dl/extractor/netzkino.py
+++ b/youtube_dl/extractor/netzkino.py
@@ -25,7 +25,7 @@ class NetzkinoIE(InfoExtractor):
'comments': 'mincount:3',
'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
'upload_date': '20120813',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'timestamp': 1344858571,
'age_limit': 12,
},
diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py
index dee9056..680f03a 100644
--- a/youtube_dl/extractor/nextmedia.py
+++ b/youtube_dl/extractor/nextmedia.py
@@ -2,7 +2,15 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import parse_iso8601
+from ..compat import compat_urlparse
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ int_or_none,
+ parse_iso8601,
+ remove_start,
+ unified_timestamp,
+)
class NextMediaIE(InfoExtractor):
@@ -15,7 +23,7 @@ class NextMediaIE(InfoExtractor):
'id': '53109199',
'ext': 'mp4',
'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:28222b9912b6665a21011b034c70fcc7',
'timestamp': 1415456273,
'upload_date': '20141108',
@@ -30,6 +38,12 @@ class NextMediaIE(InfoExtractor):
return self._extract_from_nextmedia_page(news_id, url, page)
def _extract_from_nextmedia_page(self, news_id, url, page):
+ redirection_url = self._search_regex(
+ r'window\.location\.href\s*=\s*([\'"])(?P<url>(?!\1).+)\1',
+ page, 'redirection URL', default=None, group='url')
+ if redirection_url:
+ return self.url_result(compat_urlparse.urljoin(url, redirection_url))
+
title = self._fetch_title(page)
video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
@@ -76,7 +90,7 @@ class NextMediaActionNewsIE(NextMediaIE):
'id': '19009428',
'ext': 'mp4',
'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
'timestamp': 1421791200,
'upload_date': '20150120',
@@ -93,7 +107,7 @@ class NextMediaActionNewsIE(NextMediaIE):
class AppleDailyIE(NextMediaIE):
IE_DESC = '臺灣蘋果日報'
- _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+ _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
_TESTS = [{
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
@@ -101,7 +115,7 @@ class AppleDailyIE(NextMediaIE):
'id': '36354694',
'ext': 'mp4',
'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4',
'upload_date': '20150128',
}
@@ -112,7 +126,7 @@ class AppleDailyIE(NextMediaIE):
'id': '550549',
'ext': 'mp4',
'title': '不滿被踩腳 山東兩大媽一路打下車',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:175b4260c1d7c085993474217e4ab1b4',
'upload_date': '20150128',
}
@@ -123,7 +137,7 @@ class AppleDailyIE(NextMediaIE):
'id': '5003671',
'ext': 'mp4',
'title': '20正妹熱舞 《刀龍傳說Online》火辣上市',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
'upload_date': '20150128',
},
@@ -150,13 +164,17 @@ class AppleDailyIE(NextMediaIE):
'id': '35770334',
'ext': 'mp4',
'title': '咖啡占卜測 XU裝熟指數',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748',
'upload_date': '20140417',
},
}, {
'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
'only_matching': True,
+ }, {
+ # Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694
+ 'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694',
+ 'only_matching': True,
}]
_URL_PATTERN = r'\{url: \'(.+)\'\}'
@@ -173,3 +191,48 @@ class AppleDailyIE(NextMediaIE):
def _fetch_description(self, page):
return self._html_search_meta('description', page, 'news description')
+
+
+class NextTVIE(InfoExtractor):
+ IE_DESC = '壹電視'
+ _VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671',
+ 'info_dict': {
+ 'id': '11779671',
+ 'ext': 'mp4',
+ 'title': '「超收稅」近4千億! 藍議員籲發消費券',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1484825400,
+ 'upload_date': '20170119',
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h1[^>]*>([^<]+)</h1>', webpage, 'title')
+
+ data = self._hidden_inputs(webpage)
+
+ video_url = data['ntt-vod-src-detailview']
+
+ date_str = get_element_by_class('date', webpage)
+ timestamp = unified_timestamp(date_str + '+0800') if date_str else None
+
+ view_count = int_or_none(remove_start(
+ clean_html(get_element_by_class('click', webpage)), '點閱:'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': data.get('ntt-vod-img-src'),
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ }
diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py
index 3930d16..460deb1 100644
--- a/youtube_dl/extractor/nfl.py
+++ b/youtube_dl/extractor/nfl.py
@@ -72,7 +72,7 @@ class NFLIE(InfoExtractor):
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
'upload_date': '20140921',
'timestamp': 1411337580,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
@@ -84,7 +84,7 @@ class NFLIE(InfoExtractor):
'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
'upload_date': '20131229',
'timestamp': 1388354455,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py
index 7672845..08a7592 100644
--- a/youtube_dl/extractor/nick.py
+++ b/youtube_dl/extractor/nick.py
@@ -10,7 +10,7 @@ from ..utils import update_url_query
class NickIE(MTVServicesInfoExtractor):
# None of videos on the website are still alive?
IE_NAME = 'nick.com'
- _VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
+ _VALID_URL = r'https?://(?:(?:www|beta)\.)?nick(?:jr)?\.com/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
_TESTS = [{
'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
@@ -57,6 +57,9 @@ class NickIE(MTVServicesInfoExtractor):
}, {
'url': 'http://www.nickjr.com/paw-patrol/videos/pups-save-a-goldrush-s3-ep302-full-episode/',
'only_matching': True,
+ }, {
+ 'url': 'http://beta.nick.com/nicky-ricky-dicky-and-dawn/videos/nicky-ricky-dicky-dawn-301-full-episode/',
+ 'only_matching': True,
}]
def _get_feed_query(self, uri):
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index a104e33..8baac23 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -7,7 +7,6 @@ import datetime
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..utils import (
@@ -40,6 +39,7 @@ class NiconicoIE(InfoExtractor):
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'duration': 33,
},
+ 'skip': 'Requires an account',
}, {
# File downloaded with and without credentials are different, so omit
# the md5 field
@@ -55,6 +55,7 @@ class NiconicoIE(InfoExtractor):
'timestamp': 1304065916,
'duration': 209,
},
+ 'skip': 'Requires an account',
}, {
# 'video exists but is marked as "deleted"
# md5 is unstable
@@ -65,9 +66,10 @@ class NiconicoIE(InfoExtractor):
'description': 'deleted',
'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
'upload_date': '20071224',
- 'timestamp': 1198527840, # timestamp field has different value if logged in
+ 'timestamp': int, # timestamp field has different value if logged in
'duration': 304,
},
+ 'skip': 'Requires an account',
}, {
'url': 'http://www.nicovideo.jp/watch/so22543406',
'info_dict': {
@@ -79,13 +81,12 @@ class NiconicoIE(InfoExtractor):
'upload_date': '20140104',
'uploader': 'アニメロチャンネル',
'uploader_id': '312',
- }
+ },
+ 'skip': 'The viewing period of the video you were searching for has expired.',
}]
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico'
- # Determine whether the downloader used authentication to download video
- _AUTHENTICATED = False
def _real_initialize(self):
self._login()
@@ -109,8 +110,6 @@ class NiconicoIE(InfoExtractor):
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
self._downloader.report_warning('unable to log in: bad username or password')
return False
- # Successful login
- self._AUTHENTICATED = True
return True
def _real_extract(self, url):
@@ -128,35 +127,19 @@ class NiconicoIE(InfoExtractor):
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note='Downloading video info page')
- if self._AUTHENTICATED:
- # Get flv info
- flv_info_webpage = self._download_webpage(
- 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
- video_id, 'Downloading flv info')
- else:
- # Get external player info
- ext_player_info = self._download_webpage(
- 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id)
- thumb_play_key = self._search_regex(
- r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey')
-
- # Get flv info
- flv_info_data = compat_urllib_parse_urlencode({
- 'k': thumb_play_key,
- 'v': video_id
- })
- flv_info_request = sanitized_Request(
- 'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
- {'Content-Type': 'application/x-www-form-urlencoded'})
- flv_info_webpage = self._download_webpage(
- flv_info_request, video_id,
- note='Downloading flv info', errnote='Unable to download flv info')
+ # Get flv info
+ flv_info_webpage = self._download_webpage(
+ 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
+ video_id, 'Downloading flv info')
flv_info = compat_urlparse.parse_qs(flv_info_webpage)
if 'url' not in flv_info:
if 'deleted' in flv_info:
raise ExtractorError('The video has been deleted.',
expected=True)
+ elif 'closed' in flv_info:
+ raise ExtractorError('Niconico videos now require logging in',
+ expected=True)
else:
raise ExtractorError('Unable to find video URL')
diff --git a/youtube_dl/extractor/nosvideo.py b/youtube_dl/extractor/nosvideo.py
index eab816e..53c500c 100644
--- a/youtube_dl/extractor/nosvideo.py
+++ b/youtube_dl/extractor/nosvideo.py
@@ -17,7 +17,7 @@ _x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
class NosVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \
- '(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?'
+ r'(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?'
_PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml'
_FILE_DELETED_REGEX = r'<b>File Not Found</b>'
_TEST = {
@@ -27,7 +27,7 @@ class NosVideoIE(InfoExtractor):
'id': 'mu8fle7g7rpq',
'ext': 'mp4',
'title': 'big_buck_bunny_480p_surround-fix.avi.mp4',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}
diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py
index 1039523..06cb8cb 100644
--- a/youtube_dl/extractor/nova.py
+++ b/youtube_dl/extractor/nova.py
@@ -21,7 +21,7 @@ class NovaIE(InfoExtractor):
'ext': 'flv',
'title': 'Duel: Michal Hrdlička a Petr Suchoň',
'description': 'md5:d0cc509858eee1b1374111c588c6f5d5',
- 'thumbnail': 're:^https?://.*\.(?:jpg)',
+ 'thumbnail': r're:^https?://.*\.(?:jpg)',
},
'params': {
# rtmp download
@@ -36,7 +36,7 @@ class NovaIE(InfoExtractor):
'ext': 'mp4',
'title': 'Podzemní nemocnice v pražské Krči',
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
- 'thumbnail': 're:^https?://.*\.(?:jpg)',
+ 'thumbnail': r're:^https?://.*\.(?:jpg)',
}
}, {
'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove',
@@ -46,7 +46,7 @@ class NovaIE(InfoExtractor):
'ext': 'flv',
'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově',
'description': 'md5:dc24e50be5908df83348e50d1431295e', # Make sure this description is clean of html tags
- 'thumbnail': 're:^https?://.*\.(?:jpg)',
+ 'thumbnail': r're:^https?://.*\.(?:jpg)',
},
'params': {
# rtmp download
@@ -58,7 +58,7 @@ class NovaIE(InfoExtractor):
'id': '1756858',
'ext': 'flv',
'title': 'Televizní noviny - 30. 5. 2015',
- 'thumbnail': 're:^https?://.*\.(?:jpg)',
+ 'thumbnail': r're:^https?://.*\.(?:jpg)',
'upload_date': '20150530',
},
'params': {
@@ -72,7 +72,7 @@ class NovaIE(InfoExtractor):
'ext': 'mp4',
'title': 'Zaklínač 3: Divoký hon',
'description': 're:.*Pokud se stejně jako my nemůžete.*',
- 'thumbnail': 're:https?://.*\.jpg(\?.*)?',
+ 'thumbnail': r're:https?://.*\.jpg(\?.*)?',
'upload_date': '20150521',
},
'params': {
diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py
index 3bbd473..829c719 100644
--- a/youtube_dl/extractor/novamov.py
+++ b/youtube_dl/extractor/novamov.py
@@ -24,7 +24,7 @@ class NovaMovIE(InfoExtractor):
)
(?P<id>[a-z\d]{13})
'''
- _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}
+ _VALID_URL = _VALID_URL_TEMPLATE % {'host': r'novamov\.com'}
_HOST = 'www.novamov.com'
@@ -104,7 +104,7 @@ class WholeCloudIE(NovaMovIE):
IE_NAME = 'wholecloud'
IE_DESC = 'WholeCloud'
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': '(?:wholecloud\.net|movshare\.(?:net|sx|ag))'}
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'(?:wholecloud\.net|movshare\.(?:net|sx|ag))'}
_HOST = 'www.wholecloud.net'
@@ -128,7 +128,7 @@ class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo'
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
_HOST = 'www.nowvideo.to'
@@ -152,7 +152,7 @@ class VideoWeedIE(NovaMovIE):
IE_NAME = 'videoweed'
IE_DESC = 'VideoWeed'
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'videoweed\.(?:es|com)'}
_HOST = 'www.videoweed.es'
@@ -176,7 +176,7 @@ class CloudTimeIE(NovaMovIE):
IE_NAME = 'cloudtime'
IE_DESC = 'CloudTime'
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'cloudtime\.to'}
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'cloudtime\.to'}
_HOST = 'www.cloudtime.to'
@@ -190,7 +190,7 @@ class AuroraVidIE(NovaMovIE):
IE_NAME = 'auroravid'
IE_DESC = 'AuroraVid'
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'auroravid\.to'}
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'auroravid\.to'}
_HOST = 'www.auroravid.to'
diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py
index 7e53463..b6c5ee6 100644
--- a/youtube_dl/extractor/nowness.py
+++ b/youtube_dl/extractor/nowness.py
@@ -62,7 +62,7 @@ class NownessIE(NownessBaseIE):
'ext': 'mp4',
'title': 'Candor: The Art of Gesticulation',
'description': 'Candor: The Art of Gesticulation',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1446745676,
'upload_date': '20151105',
'uploader_id': '2385340575001',
@@ -76,7 +76,7 @@ class NownessIE(NownessBaseIE):
'ext': 'mp4',
'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1407315371,
'upload_date': '20140806',
'uploader_id': '2385340575001',
@@ -91,7 +91,7 @@ class NownessIE(NownessBaseIE):
'ext': 'mp4',
'title': 'Bleu, Blanc, Rouge - A Godard Supercut',
'description': 'md5:f0ea5f1857dffca02dbd37875d742cec',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'upload_date': '20150607',
'uploader': 'Cinema Sem Lei',
'uploader_id': 'cinemasemlei',
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py
index 916a102..e43b371 100644
--- a/youtube_dl/extractor/nowtv.py
+++ b/youtube_dl/extractor/nowtv.py
@@ -83,7 +83,7 @@ class NowTVIE(NowTVBaseIE):
'ext': 'flv',
'title': 'Inka Bause stellt die neuen Bauern vor',
'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1432580700,
'upload_date': '20150525',
'duration': 2786,
@@ -101,7 +101,7 @@ class NowTVIE(NowTVBaseIE):
'ext': 'flv',
'title': 'Berlin - Tag & Nacht (Folge 934)',
'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1432666800,
'upload_date': '20150526',
'duration': 2641,
@@ -119,7 +119,7 @@ class NowTVIE(NowTVBaseIE):
'ext': 'flv',
'title': 'Hals- und Beinbruch',
'description': 'md5:b50d248efffe244e6f56737f0911ca57',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1432415400,
'upload_date': '20150523',
'duration': 2742,
@@ -137,7 +137,7 @@ class NowTVIE(NowTVBaseIE):
'ext': 'flv',
'title': 'Angst!',
'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1222632900,
'upload_date': '20080928',
'duration': 3025,
@@ -155,7 +155,7 @@ class NowTVIE(NowTVBaseIE):
'ext': 'flv',
'title': 'Thema u.a.: Der erste Blick: Die Apple Watch',
'description': 'md5:4312b6c9d839ffe7d8caf03865a531af',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1432751700,
'upload_date': '20150527',
'duration': 1083,
@@ -173,7 +173,7 @@ class NowTVIE(NowTVBaseIE):
'ext': 'flv',
'title': "Büro-Fall / Chihuahua 'Joel'",
'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1432408200,
'upload_date': '20150523',
'duration': 3092,
diff --git a/youtube_dl/extractor/noz.py b/youtube_dl/extractor/noz.py
index c47a33d..ccafd77 100644
--- a/youtube_dl/extractor/noz.py
+++ b/youtube_dl/extractor/noz.py
@@ -24,7 +24,7 @@ class NozIE(InfoExtractor):
'duration': 215,
'title': '3:2 - Deutschland gewinnt Badminton-Länderspiel in Melle',
'description': 'Vor rund 370 Zuschauern gewinnt die deutsche Badminton-Nationalmannschaft am Donnerstag ein EM-Vorbereitungsspiel gegen Frankreich in Melle. Video Moritz Frankenberg.',
- 'thumbnail': 're:^http://.*\.jpg',
+ 'thumbnail': r're:^http://.*\.jpg',
},
}]
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index c91f584..9624371 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -241,7 +241,7 @@ class NPOIE(NPOBaseIE):
if metadata.get('tt888') == 'ja':
subtitles['nl'] = [{
'ext': 'vtt',
- 'url': 'http://e.omroep.nl/tt888/%s' % video_id,
+ 'url': 'http://tt888.omroep.nl/tt888/%s' % video_id,
}]
return {
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index c89aac6..fc3c0cd 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -48,6 +48,13 @@ class NRKBaseIE(InfoExtractor):
entries = []
+ conviva = data.get('convivaStatistics') or {}
+ live = (data.get('mediaElementType') == 'Live' or
+ data.get('isLive') is True or conviva.get('isLive'))
+
+ def make_title(t):
+ return self._live_title(t) if live else t
+
media_assets = data.get('mediaAssets')
if media_assets and isinstance(media_assets, list):
def video_id_and_title(idx):
@@ -61,6 +68,13 @@ class NRKBaseIE(InfoExtractor):
if not formats:
continue
self._sort_formats(formats)
+
+ # Some f4m streams may not work with hdcore in fragments' URLs
+ for f in formats:
+ extra_param = f.get('extra_param_to_segment_url')
+ if extra_param and 'hdcore' in extra_param:
+ del f['extra_param_to_segment_url']
+
entry_id, entry_title = video_id_and_title(num)
duration = parse_duration(asset.get('duration'))
subtitles = {}
@@ -72,7 +86,7 @@ class NRKBaseIE(InfoExtractor):
})
entries.append({
'id': asset.get('carrierId') or entry_id,
- 'title': entry_title,
+ 'title': make_title(entry_title),
'duration': duration,
'subtitles': subtitles,
'formats': formats,
@@ -87,7 +101,7 @@ class NRKBaseIE(InfoExtractor):
duration = parse_duration(data.get('duration'))
entries = [{
'id': video_id,
- 'title': title,
+ 'title': make_title(title),
'duration': duration,
'formats': formats,
}]
@@ -111,10 +125,25 @@ class NRKBaseIE(InfoExtractor):
message_type, message_type)),
expected=True)
- conviva = data.get('convivaStatistics') or {}
series = conviva.get('seriesName') or data.get('seriesTitle')
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
+ season_number = None
+ episode_number = None
+ if data.get('mediaElementType') == 'Episode':
+ _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
+ data.get('relativeOriginUrl', '')
+ EPISODENUM_RE = [
+ r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
+ r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
+ ]
+ season_number = int_or_none(self._search_regex(
+ EPISODENUM_RE, _season_episode, 'season number',
+ default=None, group='season'))
+ episode_number = int_or_none(self._search_regex(
+ EPISODENUM_RE, _season_episode, 'episode number',
+ default=None, group='episode'))
+
thumbnails = None
images = data.get('images')
if images and isinstance(images, dict):
@@ -127,11 +156,15 @@ class NRKBaseIE(InfoExtractor):
} for image in web_images if image.get('imageUrl')]
description = data.get('description')
+ category = data.get('mediaAnalytics', {}).get('category')
common_info = {
'description': description,
'series': series,
'episode': episode,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'categories': [category] if category else None,
'age_limit': parse_age_limit(data.get('legalAge')),
'thumbnails': thumbnails,
}
@@ -194,7 +227,15 @@ class NRKIE(NRKBaseIE):
class NRKTVIE(NRKBaseIE):
IE_DESC = 'NRK TV and NRK Radio'
- _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
+ _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:tv|radio)\.nrk(?:super)?\.no/
+ (?:serie/[^/]+|program)/
+ (?![Ee]pisodes)%s
+ (?:/\d{2}-\d{2}-\d{4})?
+ (?:\#del=(?P<part_id>\d+))?
+ ''' % _EPISODE_RE
_API_HOST = 'psapi-we.nrk.no'
_TESTS = [{
@@ -206,63 +247,145 @@ class NRKTVIE(NRKBaseIE):
'title': '20 spørsmål 23.05.2014',
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
'duration': 1741,
+ 'series': '20 spørsmål - TV',
+ 'episode': '23.05.2014',
},
}, {
'url': 'https://tv.nrk.no/program/mdfp15000514',
- 'md5': '43d0be26663d380603a9cf0c24366531',
'info_dict': {
'id': 'MDFP15000514CA',
'ext': 'mp4',
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
'duration': 4605,
+ 'series': 'Kunnskapskanalen',
+ 'episode': '24.05.2014',
+ },
+ 'params': {
+ 'skip_download': True,
},
}, {
# single playlist video
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
- 'md5': 'adbd1dbd813edaf532b0a253780719c2',
'info_dict': {
'id': 'MSPO40010515-part2',
'ext': 'flv',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
},
- 'skip': 'Only works from Norway',
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Video is geo restricted'],
+ 'skip': 'particular part is not supported currently',
}, {
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
'playlist': [{
- 'md5': '9480285eff92d64f06e02a5367970a7a',
'info_dict': {
- 'id': 'MSPO40010515-part1',
- 'ext': 'flv',
- 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
- 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+ 'id': 'MSPO40010515AH',
+ 'ext': 'mp4',
+ 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
+ 'description': 'md5:c03aba1e917561eface5214020551b7a',
+ 'duration': 772,
+ 'series': 'Tour de Ski',
+ 'episode': '06.01.2015',
+ },
+ 'params': {
+ 'skip_download': True,
},
}, {
- 'md5': 'adbd1dbd813edaf532b0a253780719c2',
'info_dict': {
- 'id': 'MSPO40010515-part2',
- 'ext': 'flv',
- 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
- 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+ 'id': 'MSPO40010515BH',
+ 'ext': 'mp4',
+ 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
+ 'description': 'md5:c03aba1e917561eface5214020551b7a',
+ 'duration': 6175,
+ 'series': 'Tour de Ski',
+ 'episode': '06.01.2015',
+ },
+ 'params': {
+ 'skip_download': True,
},
}],
'info_dict': {
'id': 'MSPO40010515',
- 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
- 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
- 'duration': 6947.52,
+ 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
+ 'description': 'md5:c03aba1e917561eface5214020551b7a',
+ },
+ 'expected_warnings': ['Video is geo restricted'],
+ }, {
+ 'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
+ 'info_dict': {
+ 'id': 'KMTE50001317AA',
+ 'ext': 'mp4',
+ 'title': 'Anno 13:30',
+ 'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
+ 'duration': 2340,
+ 'series': 'Anno',
+ 'episode': '13:30',
+ 'season_number': 3,
+ 'episode_number': 13,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
+ 'info_dict': {
+ 'id': 'MUHH46000317AA',
+ 'ext': 'mp4',
+ 'title': 'Nytt på Nytt 27.01.2017',
+ 'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
+ 'duration': 1796,
+ 'series': 'Nytt på nytt',
+ 'episode': '27.01.2017',
+ },
+ 'params': {
+ 'skip_download': True,
},
- 'skip': 'Only works from Norway',
}, {
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
'only_matching': True,
}]
-class NRKPlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+class NRKTVDirekteIE(NRKTVIE):
+ IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
+ _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/direkte/nrk1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
+ 'only_matching': True,
+ }]
+
+
+class NRKPlaylistBaseIE(InfoExtractor):
+ def _extract_description(self, webpage):
+ pass
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
+ for video_id in re.findall(self._ITEM_RE, webpage)
+ ]
+
+ playlist_title = self. _extract_title(webpage)
+ playlist_description = self._extract_description(webpage)
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+class NRKPlaylistIE(NRKPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+ _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
_TESTS = [{
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
'info_dict': {
@@ -281,23 +404,86 @@ class NRKPlaylistIE(InfoExtractor):
'playlist_count': 5,
}]
+ def _extract_title(self, webpage):
+ return self._og_search_title(webpage, fatal=False)
+
+ def _extract_description(self, webpage):
+ return self._og_search_description(webpage)
+
+
+class NRKTVEpisodesIE(NRKPlaylistBaseIE):
+ _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
+ _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
+ 'info_dict': {
+ 'id': '69031',
+ 'title': 'Nytt på nytt, sesong: 201210',
+ },
+ 'playlist_count': 4,
+ }]
+
+ def _extract_title(self, webpage):
+ return self._html_search_regex(
+ r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
+
+
+class NRKTVSeriesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
+ _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/serie/groenn-glede',
+ 'info_dict': {
+ 'id': 'groenn-glede',
+ 'title': 'Grønn glede',
+ 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
+ },
+ 'playlist_mincount': 9,
+ }, {
+ 'url': 'http://tv.nrksuper.no/serie/labyrint',
+ 'info_dict': {
+ 'id': 'labyrint',
+ 'title': 'Labyrint',
+ 'description': 'md5:58afd450974c89e27d5a19212eee7115',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/saving-the-human-race',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/postmann-pat',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if NRKTVIE.suitable(url) else super(NRKTVSeriesIE, cls).suitable(url)
+
def _real_extract(self, url):
- playlist_id = self._match_id(url)
+ series_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
+ webpage = self._download_webpage(url, series_id)
entries = [
- self.url_result('nrk:%s' % video_id, 'NRK')
- for video_id in re.findall(
- r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
- webpage)
+ self.url_result(
+ 'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
+ series=series_id, season=season_id))
+ for season_id in re.findall(self._ITEM_RE, webpage)
]
- playlist_title = self._og_search_title(webpage)
- playlist_description = self._og_search_description(webpage)
+ title = self._html_search_meta(
+ 'seriestitle', webpage,
+ 'title', default=None) or self._og_search_title(
+ webpage, fatal=False)
- return self.playlist_result(
- entries, playlist_id, playlist_title, playlist_description)
+ description = self._html_search_meta(
+ 'series_description', webpage,
+ 'description', default=None) or self._og_search_description(webpage)
+
+ return self.playlist_result(entries, series_id, title, description)
class NRKSkoleIE(InfoExtractor):
diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py
index d28a815..101a537 100644
--- a/youtube_dl/extractor/ntvde.py
+++ b/youtube_dl/extractor/ntvde.py
@@ -22,7 +22,7 @@ class NTVDeIE(InfoExtractor):
'info_dict': {
'id': '14438086',
'ext': 'mp4',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus',
'alt_title': 'Winterchaos auf deutschen Straßen',
'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.',
diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py
index 7d7a785..4f9cedb 100644
--- a/youtube_dl/extractor/ntvru.py
+++ b/youtube_dl/extractor/ntvru.py
@@ -21,7 +21,7 @@ class NTVRuIE(InfoExtractor):
'ext': 'mp4',
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
- 'thumbnail': 're:^http://.*\.jpg',
+ 'thumbnail': r're:^http://.*\.jpg',
'duration': 136,
},
}, {
@@ -32,7 +32,7 @@ class NTVRuIE(InfoExtractor):
'ext': 'mp4',
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
- 'thumbnail': 're:^http://.*\.jpg',
+ 'thumbnail': r're:^http://.*\.jpg',
'duration': 172,
},
}, {
@@ -43,7 +43,7 @@ class NTVRuIE(InfoExtractor):
'ext': 'mp4',
'title': '«Сегодня». 21 марта 2014 года. 16:00',
'description': '«Сегодня». 21 марта 2014 года. 16:00',
- 'thumbnail': 're:^http://.*\.jpg',
+ 'thumbnail': r're:^http://.*\.jpg',
'duration': 1496,
},
}, {
@@ -54,7 +54,7 @@ class NTVRuIE(InfoExtractor):
'ext': 'mp4',
'title': 'Остросюжетный фильм «Кома»',
'description': 'Остросюжетный фильм «Кома»',
- 'thumbnail': 're:^http://.*\.jpg',
+ 'thumbnail': r're:^http://.*\.jpg',
'duration': 5592,
},
}, {
@@ -65,7 +65,7 @@ class NTVRuIE(InfoExtractor):
'ext': 'mp4',
'title': '«Дело врачей»: «Деревце жизни»',
'description': '«Дело врачей»: «Деревце жизни»',
- 'thumbnail': 're:^http://.*\.jpg',
+ 'thumbnail': r're:^http://.*\.jpg',
'duration': 2590,
},
}]
diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py
index 50fbbc7..a914068 100644
--- a/youtube_dl/extractor/oktoberfesttv.py
+++ b/youtube_dl/extractor/oktoberfesttv.py
@@ -13,7 +13,7 @@ class OktoberfestTVIE(InfoExtractor):
'id': 'hb-zelt',
'ext': 'mp4',
'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'is_live': True,
},
'params': {
diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py
new file mode 100644
index 0000000..de1d6b0
--- /dev/null
+++ b/youtube_dl/extractor/ondemandkorea.py
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .jwplatform import JWPlatformBaseIE
+from ..utils import (
+ ExtractorError,
+ js_to_json,
+)
+
+
+class OnDemandKoreaIE(JWPlatformBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
+ _TEST = {
+ 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
+ 'info_dict': {
+ 'id': 'ask-us-anything-e43',
+ 'ext': 'mp4',
+ 'title': 'Ask Us Anything : E43',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download'
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id, fatal=False)
+
+ if not webpage:
+ # Page sometimes returns captcha page with HTTP 403
+ raise ExtractorError(
+ 'Unable to access page. You may have been blocked.',
+ expected=True)
+
+ if 'msg_block_01.png' in webpage:
+ self.raise_geo_restricted(
+ 'This content is not available in your region')
+
+ if 'This video is only available to ODK PLUS members.' in webpage:
+ raise ExtractorError(
+ 'This video is only available to ODK PLUS members.',
+ expected=True)
+
+ title = self._og_search_title(webpage)
+
+ jw_config = self._parse_json(
+ self._search_regex(
+ r'(?s)jwplayer\(([\'"])(?:(?!\1).)+\1\)\.setup\s*\((?P<options>.+?)\);',
+ webpage, 'jw config', group='options'),
+ video_id, transform_source=js_to_json)
+ info = self._parse_jwplayer_data(
+ jw_config, video_id, require_title=False, m3u8_id='hls',
+ base_url=url)
+
+ info.update({
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ })
+ return info
diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py
index 6fb1a3f..1d336cf 100644
--- a/youtube_dl/extractor/onionstudios.py
+++ b/youtube_dl/extractor/onionstudios.py
@@ -22,7 +22,7 @@ class OnionStudiosIE(InfoExtractor):
'id': '2937',
'ext': 'mp4',
'title': 'Hannibal charges forward, stops for a cocktail',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'The A.V. Club',
'uploader_id': 'the-av-club',
},
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
index c2807d0..84be2b1 100644
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor):
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
- def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None):
+ def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None, embed_token=None):
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
metadata = content_tree[list(content_tree)[0]]
embed_code = metadata['embed_code']
@@ -29,7 +29,8 @@ class OoyalaBaseIE(InfoExtractor):
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
compat_urllib_parse_urlencode({
'domain': domain,
- 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds',
+ 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth',
+ 'embedToken': embed_token,
}), video_id)
cur_auth_data = auth_data['authorization_data'][embed_code]
@@ -52,6 +53,12 @@ class OoyalaBaseIE(InfoExtractor):
elif delivery_type == 'hds' or ext == 'f4m':
formats.extend(self._extract_f4m_formats(
s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
+ elif delivery_type == 'dash' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ s_url, embed_code, mpd_id='dash', fatal=False))
+ elif delivery_type == 'smooth':
+ self._extract_ism_formats(
+ s_url, embed_code, ism_id='mss', fatal=False)
elif ext == 'smil':
formats.extend(self._extract_smil_formats(
s_url, embed_code, fatal=False))
@@ -146,8 +153,9 @@ class OoyalaIE(OoyalaBaseIE):
embed_code = self._match_id(url)
domain = smuggled_data.get('domain')
supportedformats = smuggled_data.get('supportedformats')
+ embed_token = smuggled_data.get('embed_token')
content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
- return self._extract(content_tree_url, embed_code, domain, supportedformats)
+ return self._extract(content_tree_url, embed_code, domain, supportedformats, embed_token)
class OoyalaExternalIE(OoyalaBaseIE):
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
index 7f19b1b..32289d8 100644
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@@ -1,25 +1,18 @@
# coding: utf-8
-from __future__ import unicode_literals, division
+from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import (
- compat_chr,
- compat_ord,
-)
+from ..compat import compat_chr
from ..utils import (
determine_ext,
ExtractorError,
)
-from ..jsinterp import (
- JSInterpreter,
- _NAME_RE
-)
class OpenloadIE(InfoExtractor):
- _VALID_URL = r'https?://openload\.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+ _VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o',
@@ -28,7 +21,7 @@ class OpenloadIE(InfoExtractor):
'id': 'kUEfGclsU9o',
'ext': 'mp4',
'title': 'skyrim_no-audio_1080.mp4',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
'url': 'https://openload.co/embed/rjC09fkPLYs',
@@ -36,7 +29,7 @@ class OpenloadIE(InfoExtractor):
'id': 'rjC09fkPLYs',
'ext': 'mp4',
'title': 'movie.mp4',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': {
'en': [{
'ext': 'vtt',
@@ -60,45 +53,16 @@ class OpenloadIE(InfoExtractor):
# for title and ext
'url': 'https://openload.co/embed/Sxz5sADo82g/',
'only_matching': True,
+ }, {
+ 'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
+ 'only_matching': True,
}]
- def openload_decode(self, txt):
- symbol_dict = {
- '(゚Д゚) [゚Θ゚]': '_',
- '(゚Д゚) [゚ω゚ノ]': 'a',
- '(゚Д゚) [゚Θ゚ノ]': 'b',
- '(゚Д゚) [\'c\']': 'c',
- '(゚Д゚) [゚ー゚ノ]': 'd',
- '(゚Д゚) [゚Д゚ノ]': 'e',
- '(゚Д゚) [1]': 'f',
- '(゚Д゚) [\'o\']': 'o',
- '(o゚ー゚o)': 'u',
- '(゚Д゚) [\'c\']': 'c',
- '((゚ー゚) + (o^_^o))': '7',
- '((o^_^o) +(o^_^o) +(c^_^o))': '6',
- '((゚ー゚) + (゚Θ゚))': '5',
- '(-~3)': '4',
- '(-~-~1)': '3',
- '(-~1)': '2',
- '(-~0)': '1',
- '((c^_^o)-(c^_^o))': '0',
- }
- delim = '(゚Д゚)[゚ε゚]+'
- end_token = '(゚Д゚)[゚o゚]'
- symbols = '|'.join(map(re.escape, symbol_dict.keys()))
- txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt)
- ret = ''
- for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt):
- for aachar in aacode.split(delim):
- if aachar.isdigit():
- ret += compat_chr(int(aachar, 8))
- else:
- m = re.match(r'^u([\da-f]{4})$', aachar)
- if m:
- ret += compat_chr(int(m.group(1), 16))
- else:
- self.report_warning("Cannot decode: %s" % aachar)
- return ret
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)',
+ webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -107,36 +71,21 @@ class OpenloadIE(InfoExtractor):
if 'File not found' in webpage or 'deleted by the owner' in webpage:
raise ExtractorError('File not found', expected=True)
- # The following decryption algorithm is written by @yokrysty and
- # declared to be freely used in youtube-dl
- # See https://github.com/rg3/youtube-dl/issues/10408
- enc_data = self._html_search_regex(
- r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"',
- webpage, 'encrypted data')
+ ol_id = self._search_regex(
+ '<span[^>]+id="[^"]+"[^>]*>([0-9]+)</span>',
+ webpage, 'openload ID')
- enc_code = self._html_search_regex(r'<script[^>]+>(゚ω゚[^<]+)</script>',
- webpage, 'encrypted code')
+ first_three_chars = int(float(ol_id[0:][:3]))
+ fifth_char = int(float(ol_id[3:5]))
+ urlcode = ''
+ num = 5
- js_code = self.openload_decode(enc_code)
- jsi = JSInterpreter(js_code)
+ while num < len(ol_id):
+ urlcode += compat_chr(int(float(ol_id[num:][:3])) +
+ first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2])))
+ num += 5
- m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function')
- m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function')
-
- offset = jsi.call_function(m_offset_fun)
- diff = jsi.call_function(m_diff_fun)
-
- video_url_chars = []
-
- for idx, c in enumerate(enc_data):
- j = compat_ord(c)
- if j >= 33 and j <= 126:
- j = ((j + 14) % 94) + 33
- if idx == len(enc_data) - offset:
- j += diff
- video_url_chars += compat_chr(j)
-
- video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
+ video_url = 'https://openload.co/stream/' + urlcode
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
@@ -152,8 +101,7 @@ class OpenloadIE(InfoExtractor):
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'url': video_url,
# Seems all videos have extensions in their titles
- 'ext': determine_ext(title),
+ 'ext': determine_ext(title, 'mp4'),
'subtitles': subtitles,
}
-
return info_dict
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index b4cce7e..1e2c54e 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -247,7 +247,7 @@ class ORFIPTVIE(InfoExtractor):
'title': 'Weitere Evakuierungen um Vulkan Calbuco',
'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
'duration': 68.197,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20150425',
},
}
diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py
index 2b07958..89c95ff 100644
--- a/youtube_dl/extractor/pandoratv.py
+++ b/youtube_dl/extractor/pandoratv.py
@@ -11,6 +11,7 @@ from ..utils import (
float_or_none,
parse_duration,
str_to_int,
+ urlencode_postdata,
)
@@ -25,7 +26,7 @@ class PandoraTVIE(InfoExtractor):
'ext': 'flv',
'title': '頭を撫でてくれる?',
'description': '頭を撫でてくれる?',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 39,
'upload_date': '20151218',
'uploader': 'カワイイ動物まとめ',
@@ -56,6 +57,22 @@ class PandoraTVIE(InfoExtractor):
r'^v(\d+)[Uu]rl$', format_id, 'height', default=None)
if not height:
continue
+
+ play_url = self._download_json(
+ 'http://m.pandora.tv/?c=api&m=play_url', video_id,
+ data=urlencode_postdata({
+ 'prgid': video_id,
+ 'runtime': info.get('runtime'),
+ 'vod_url': format_url,
+ }),
+ headers={
+ 'Origin': url,
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ format_url = play_url.get('url')
+ if not format_url:
+ continue
+
formats.append({
'format_id': '%sp' % height,
'url': format_url,
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index b490ef7..6baed77 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -236,7 +236,7 @@ class PBSIE(InfoExtractor):
'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b',
'duration': 6559,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
@@ -249,7 +249,7 @@ class PBSIE(InfoExtractor):
'description': 'md5:c741d14e979fc53228c575894094f157',
'title': 'NOVA - Killer Typhoon',
'duration': 3172,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140122',
'age_limit': 10,
},
@@ -270,7 +270,7 @@ class PBSIE(InfoExtractor):
'title': 'American Experience - Death and the Civil War, Chapter 1',
'description': 'md5:67fa89a9402e2ee7d08f53b920674c18',
'duration': 682,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True, # requires ffmpeg
@@ -286,7 +286,7 @@ class PBSIE(InfoExtractor):
'title': 'FRONTLINE - United States of Secrets (Part One)',
'description': 'md5:55756bd5c551519cc4b7703e373e217e',
'duration': 6851,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
@@ -302,7 +302,7 @@ class PBSIE(InfoExtractor):
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
'description': 'md5:c0ff7475a4b70261c7e58f493c2792a5',
'duration': 1480,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
@@ -315,7 +315,7 @@ class PBSIE(InfoExtractor):
'title': 'FRONTLINE - The Atomic Artists',
'description': 'md5:f677e4520cfacb4a5ce1471e31b57800',
'duration': 723,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True, # requires ffmpeg
@@ -330,7 +330,7 @@ class PBSIE(InfoExtractor):
'ext': 'mp4',
'title': 'FRONTLINE - Netanyahu at War',
'duration': 6852,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'formats': 'mincount:8',
},
},
@@ -350,6 +350,15 @@ class PBSIE(InfoExtractor):
410: 'This video has expired and is no longer available for online streaming.',
}
+ def _real_initialize(self):
+ cookie = (self._download_json(
+ 'http://localization.services.pbs.org/localize/auto/cookie/',
+ None, headers=self.geo_verification_headers(), fatal=False) or {}).get('cookie')
+ if cookie:
+ station = self._search_regex(r'#?s=\["([^"]+)"', cookie, 'station')
+ if station:
+ self._set_cookie('.pbs.org', 'pbsol.station', station)
+
def _extract_webpage(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -476,7 +485,8 @@ class PBSIE(InfoExtractor):
redirect_info = self._download_json(
'%s?format=json' % redirect['url'], display_id,
- 'Downloading %s video url info' % (redirect_id or num))
+ 'Downloading %s video url info' % (redirect_id or num),
+ headers=self.geo_verification_headers())
if redirect_info['status'] == 'error':
raise ExtractorError(
@@ -558,7 +568,7 @@ class PBSIE(InfoExtractor):
# Try turning it to 'program - title' naming scheme if possible
alt_title = info.get('program', {}).get('title')
if alt_title:
- info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title'])
+ info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + r'[\s\-:]+', '', info['title'])
description = info.get('description') or info.get(
'program', {}).get('description') or description
diff --git a/youtube_dl/extractor/people.py b/youtube_dl/extractor/people.py
index 9ecdbc1..6ca9571 100644
--- a/youtube_dl/extractor/people.py
+++ b/youtube_dl/extractor/people.py
@@ -14,7 +14,7 @@ class PeopleIE(InfoExtractor):
'ext': 'mp4',
'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”',
'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 246.318,
'timestamp': 1458720585,
'upload_date': '20160323',
diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py
index ac009f6..e435c28 100644
--- a/youtube_dl/extractor/phoenix.py
+++ b/youtube_dl/extractor/phoenix.py
@@ -1,9 +1,9 @@
from __future__ import unicode_literals
-from .zdf import ZDFIE
+from .dreisat import DreiSatIE
-class PhoenixIE(ZDFIE):
+class PhoenixIE(DreiSatIE):
IE_NAME = 'phoenix.de'
_VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
(?:
diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py
new file mode 100644
index 0000000..c0c276a
--- /dev/null
+++ b/youtube_dl/extractor/piksel.py
@@ -0,0 +1,123 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ dict_get,
+ int_or_none,
+ unescapeHTML,
+ parse_iso8601,
+)
+
+
+class PikselIE(InfoExtractor):
+ _VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
+ _TESTS = [
+ {
+ 'url': 'http://player.piksel.com/v/nv60p12f',
+ 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
+ 'info_dict': {
+ 'id': 'nv60p12f',
+ 'ext': 'mp4',
+ 'title': 'فن الحياة - الحلقة 1',
+ 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
+ 'timestamp': 1465231790,
+ 'upload_date': '20160606',
+ }
+ },
+ {
+ # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al
+ 'url': 'https://player.piksel.com/v/v80kqp41',
+ 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d',
+ 'info_dict': {
+ 'id': 'v80kqp41',
+ 'ext': 'mp4',
+ 'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
+ 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
+ 'timestamp': 1486171129,
+ 'upload_date': '20170204',
+ }
+ }
+ ]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ app_token = self._search_regex([
+ r'clientAPI\s*:\s*"([^"]+)"',
+ r'data-de-api-key\s*=\s*"([^"]+)"'
+ ], webpage, 'app token')
+ response = self._download_json(
+ 'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
+ video_id, query={
+ 'v': video_id
+ })['response']
+ failure = response.get('failure')
+ if failure:
+ raise ExtractorError(response['failure']['reason'], expected=True)
+ video_data = response['WsProgramResponse']['program']['asset']
+ title = video_data['title']
+
+ formats = []
+
+ m3u8_url = dict_get(video_data, [
+ 'm3u8iPadURL',
+ 'ipadM3u8Url',
+ 'm3u8AndroidURL',
+ 'm3u8iPhoneURL',
+ 'iphoneM3u8Url'])
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ asset_type = dict_get(video_data, ['assetType', 'asset_type'])
+ for asset_file in video_data.get('assetFiles', []):
+ # TODO: extract rtmp formats
+ http_url = asset_file.get('http_url')
+ if not http_url:
+ continue
+ tbr = None
+ vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
+ abr = int_or_none(asset_file.get('audioBitrate'), 1024)
+ if asset_type == 'video':
+ tbr = vbr + abr
+ elif asset_type == 'audio':
+ tbr = abr
+
+ format_id = ['http']
+ if tbr:
+ format_id.append(compat_str(tbr))
+
+ formats.append({
+ 'format_id': '-'.join(format_id),
+ 'url': unescapeHTML(http_url),
+ 'vbr': vbr,
+ 'abr': abr,
+ 'width': int_or_none(asset_file.get('videoWidth')),
+ 'height': int_or_none(asset_file.get('videoHeight')),
+ 'filesize': int_or_none(asset_file.get('filesize')),
+ 'tbr': tbr,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnailUrl'),
+ 'timestamp': parse_iso8601(video_data.get('dateadd')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py
index a52210f..6a4580d 100644
--- a/youtube_dl/extractor/pinkbike.py
+++ b/youtube_dl/extractor/pinkbike.py
@@ -23,7 +23,7 @@ class PinkbikeIE(InfoExtractor):
'ext': 'mp4',
'title': 'Brandon Semenuk - RAW 100',
'description': 'Official release: www.redbull.ca/rupertwalker',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 100,
'upload_date': '20150406',
'uploader': 'revelco',
diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py
index 77e1211..e38c761 100644
--- a/youtube_dl/extractor/pladform.py
+++ b/youtube_dl/extractor/pladform.py
@@ -34,7 +34,7 @@ class PladformIE(InfoExtractor):
'ext': 'mp4',
'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 694,
'age_limit': 0,
},
diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py
index 1e8096a..391e1bd 100644
--- a/youtube_dl/extractor/playtvak.py
+++ b/youtube_dl/extractor/playtvak.py
@@ -25,7 +25,7 @@ class PlaytvakIE(InfoExtractor):
'ext': 'mp4',
'title': 'Vyžeňte vosy a sršně ze zahrady',
'description': 'md5:f93d398691044d303bc4a3de62f3e976',
- 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
'duration': 279,
'timestamp': 1438732860,
'upload_date': '20150805',
@@ -38,7 +38,7 @@ class PlaytvakIE(InfoExtractor):
'ext': 'flv',
'title': 're:^Přímý přenos iDNES.cz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze',
- 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
'is_live': True,
},
'params': {
@@ -52,7 +52,7 @@ class PlaytvakIE(InfoExtractor):
'ext': 'mp4',
'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se',
'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2',
- 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
'duration': 39,
'timestamp': 1438969140,
'upload_date': '20150807',
@@ -66,7 +66,7 @@ class PlaytvakIE(InfoExtractor):
'ext': 'mp4',
'title': 'Táhni! Demonstrace proti imigrantům budila emoce',
'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c',
- 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
'timestamp': 1439052180,
'upload_date': '20150808',
'is_live': False,
@@ -79,7 +79,7 @@ class PlaytvakIE(InfoExtractor):
'ext': 'mp4',
'title': 'Recesisté udělali z billboardu kolotoč',
'description': 'md5:7369926049588c3989a66c9c1a043c4c',
- 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
'timestamp': 1415725500,
'upload_date': '20141111',
'is_live': False,
diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py
index 79c2db0..4aef186 100644
--- a/youtube_dl/extractor/playvid.py
+++ b/youtube_dl/extractor/playvid.py
@@ -34,7 +34,7 @@ class PlayvidIE(InfoExtractor):
'ext': 'mp4',
'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park',
'age_limit': 18,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}]
diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py
index 0bc7431..4d96a10 100644
--- a/youtube_dl/extractor/playwire.py
+++ b/youtube_dl/extractor/playwire.py
@@ -18,7 +18,7 @@ class PlaywireIE(InfoExtractor):
'id': '3353705',
'ext': 'mp4',
'title': 'S04_RM_UCL_Rus',
- 'thumbnail': 're:^https?://.*\.png$',
+ 'thumbnail': r're:^https?://.*\.png$',
'duration': 145.94,
},
}, {
diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py
index 0ffd41e..5c798e8 100644
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dl/extractor/pluralsight.py
@@ -157,13 +157,10 @@ class PluralsightIE(PluralsightBaseIE):
display_id = '%s-%s' % (name, clip_id)
- parsed_url = compat_urlparse.urlparse(url)
-
- payload_url = compat_urlparse.urlunparse(parsed_url._replace(
- netloc='app.pluralsight.com', path='player/api/v1/payload'))
-
course = self._download_json(
- payload_url, display_id, headers={'Referer': url})['payload']['course']
+ 'https://app.pluralsight.com/player/user/api/v1/player/payload',
+ display_id, data=urlencode_postdata({'courseId': course_name}),
+ headers={'Referer': url})
collection = course['modules']
diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py
index 5ff1737..2ac1fcb 100644
--- a/youtube_dl/extractor/polskieradio.py
+++ b/youtube_dl/extractor/polskieradio.py
@@ -36,7 +36,7 @@ class PolskieRadioIE(InfoExtractor):
'timestamp': 1456594200,
'upload_date': '20160227',
'duration': 2364,
- 'thumbnail': 're:^https?://static\.prsa\.pl/images/.*\.jpg$'
+ 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$'
},
}],
}, {
diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py
index d85e029..8218c7d 100644
--- a/youtube_dl/extractor/porncom.py
+++ b/youtube_dl/extractor/porncom.py
@@ -22,7 +22,7 @@ class PornComIE(InfoExtractor):
'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec',
'ext': 'mp4',
'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 551,
'view_count': int,
'age_limit': 18,
diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py
new file mode 100644
index 0000000..a4a5d39
--- /dev/null
+++ b/youtube_dl/extractor/pornflip.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_str,
+)
+from ..utils import (
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class PornFlipIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})'
+ _TESTS = [{
+ 'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
+ 'md5': '98c46639849145ae1fd77af532a9278c',
+ 'info_dict': {
+ 'id': 'wz7DfNhMmep',
+ 'ext': 'mp4',
+ 'title': '2 Amateurs swallow make his dream cumshots true',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 112,
+ 'timestamp': 1481655502,
+ 'upload_date': '20161213',
+ 'uploader_id': '106786',
+ 'uploader': 'figifoto',
+ 'view_count': int,
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://www.pornflip.com/v/%s' % video_id, video_id)
+
+ flashvars = compat_parse_qs(self._search_regex(
+ r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1',
+ webpage, 'flashvars', group='flashvars'))
+
+ title = flashvars['video_vars[title]'][0]
+
+ def flashvar(kind):
+ return try_get(
+ flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str)
+
+ formats = []
+ for key, value in flashvars.items():
+ if not (value and isinstance(value, list)):
+ continue
+ format_url = value[0]
+ if key == 'video_vars[hds_manifest]':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ continue
+ height = self._search_regex(
+ r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None)
+ if not height:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': 'http-%s' % height,
+ 'height': int_or_none(height),
+ })
+ self._sort_formats(formats)
+
+ uploader = self._html_search_regex(
+ (r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)',
+ r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'),
+ webpage, 'uploader', fatal=False, group='uploader')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': flashvar('big_thumb'),
+ 'duration': int_or_none(flashvar('duration')),
+ 'timestamp': unified_timestamp(self._html_search_meta(
+ 'uploadDate', webpage, 'timestamp')),
+ 'uploader_id': flashvar('author_id'),
+ 'uploader': uploader,
+ 'view_count': int_or_none(flashvar('views')),
+ 'age_limit': 18,
+ }
diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py
index 8df12ee..842317e 100644
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@@ -21,7 +21,7 @@ class PornHdIE(InfoExtractor):
'ext': 'mp4',
'title': 'Restroom selfie masturbation',
'description': 'md5:3748420395e03e31ac96857a8f125b2b',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'view_count': int,
'age_limit': 18,
}
@@ -35,7 +35,7 @@ class PornHdIE(InfoExtractor):
'ext': 'mp4',
'title': 'Sierra loves doing laundry',
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'view_count': int,
'age_limit': 18,
},
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 40dbe69..017f6c5 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -156,7 +156,12 @@ class PornHubIE(InfoExtractor):
comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
- video_urls = list(map(compat_urllib_parse_unquote, re.findall(r"player_quality_[0-9]{3}p\s*=\s*'([^']+)'", webpage)))
+ video_urls = []
+ for quote, video_url in re.findall(
+ r'player_quality_[0-9]{3,4}p\s*=\s*(["\'])(.+?)\1;', webpage):
+ video_urls.append(compat_urllib_parse_unquote(re.sub(
+ r'{0}\s*\+\s*{0}'.format(quote), '', video_url)))
+
if webpage.find('"encrypted":true') != -1:
password = compat_urllib_parse_unquote_plus(
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
@@ -229,7 +234,14 @@ class PornHubPlaylistBaseIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
- entries = self._extract_entries(webpage)
+ # Only process container div with main playlist content skipping
+ # drop-down menu that uses similar pattern for videos (see
+ # https://github.com/rg3/youtube-dl/issues/11594).
+ container = self._search_regex(
+ r'(?s)(<div[^>]+class=["\']container.+)', webpage,
+ 'container', default=webpage)
+
+ entries = self._extract_entries(container)
playlist = self._parse_json(
self._search_regex(
@@ -243,12 +255,12 @@ class PornHubPlaylistBaseIE(InfoExtractor):
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://www.pornhub.com/playlist/6201671',
+ 'url': 'http://www.pornhub.com/playlist/4667351',
'info_dict': {
- 'id': '6201671',
- 'title': 'P0p4',
+ 'id': '4667351',
+ 'title': 'Nataly Hot',
},
- 'playlist_mincount': 35,
+ 'playlist_mincount': 2,
}]
diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py
index 63816c3..1b5b9a3 100644
--- a/youtube_dl/extractor/pornotube.py
+++ b/youtube_dl/extractor/pornotube.py
@@ -19,7 +19,7 @@ class PornotubeIE(InfoExtractor):
'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0',
'categories': ['Adult Humor', 'Blondes'],
'uploader': 'Alpha Blue Archives',
- 'thumbnail': 're:^https?://.*\\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1417582800,
'age_limit': 18,
}
diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py
index 58f557e..b6b7106 100644
--- a/youtube_dl/extractor/pornovoisines.py
+++ b/youtube_dl/extractor/pornovoisines.py
@@ -23,7 +23,7 @@ class PornoVoisinesIE(InfoExtractor):
'ext': 'mp4',
'title': 'Recherche appartement',
'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140925',
'duration': 120,
'view_count': int,
diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py
index 3c9087f..1a0cce7 100644
--- a/youtube_dl/extractor/pornoxo.py
+++ b/youtube_dl/extractor/pornoxo.py
@@ -20,7 +20,7 @@ class PornoXOIE(JWPlatformBaseIE):
'display_id': 'striptease-from-sexy-secretary',
'description': 'md5:0ee35252b685b3883f4a1d38332f9980',
'categories': list, # NSFW
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'age_limit': 18,
}
}
diff --git a/youtube_dl/extractor/presstv.py b/youtube_dl/extractor/presstv.py
index 2da93ed..b5c2792 100644
--- a/youtube_dl/extractor/presstv.py
+++ b/youtube_dl/extractor/presstv.py
@@ -19,7 +19,7 @@ class PressTVIE(InfoExtractor):
'ext': 'mp4',
'title': 'Organic mattresses used to clean waste water',
'upload_date': '20160409',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:20002e654bbafb6908395a5c0cfcd125'
}
}
diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py
index d40cca0..23ac93d 100644
--- a/youtube_dl/extractor/promptfile.py
+++ b/youtube_dl/extractor/promptfile.py
@@ -20,7 +20,7 @@ class PromptFileIE(InfoExtractor):
'id': '86D1CE8462-576CAAE416',
'ext': 'mp4',
'title': 'oceans.mp4',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py
index 7cc07a2..5091d84 100644
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -85,6 +85,9 @@ class ProSiebenSat1BaseIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
source_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
+ elif mimetype == 'application/dash+xml':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id='dash', fatal=False))
else:
tbr = fix_bitrate(source['bitrate'])
if protocol in ('rtmp', 'rtmpe'):
@@ -144,16 +147,12 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
'info_dict': {
'id': '2104602',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Episode 18 - Staffel 2',
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
'upload_date': '20131231',
'duration': 5845.04,
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
},
{
'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
@@ -255,7 +254,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
'info_dict': {
'id': '2572814',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Andreas Kümmert: Rocket Man',
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
'upload_date': '20131017',
@@ -269,7 +268,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
'info_dict': {
'id': '2156342',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Kurztrips zum Valentinstag',
'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
'duration': 307.24,
@@ -286,12 +285,13 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'description': 'md5:63b8963e71f481782aeea877658dec84',
},
'playlist_count': 2,
+ 'skip': 'This video is unavailable',
},
{
'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
'info_dict': {
'id': '4187506',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Best of Circus HalliGalli',
'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
'upload_date': '20151229',
@@ -372,7 +372,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
info = self._extract_video_info(url, clip_id)
description = self._html_search_regex(
- self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
+ self._DESCRIPTION_REGEXES, webpage, 'description', default=None)
+ if description is None:
+ description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._html_search_regex(
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
@@ -391,7 +393,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
playlist = self._parse_json(
self._search_regex(
- 'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script',
+ r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script',
webpage, 'playlist'),
playlist_id)
entries = []
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py
index 37cb9e2..17c27da 100644
--- a/youtube_dl/extractor/qqmusic.py
+++ b/youtube_dl/extractor/qqmusic.py
@@ -29,7 +29,7 @@ class QQMusicIE(InfoExtractor):
'release_date': '20141227',
'creator': '林俊杰',
'description': 'md5:d327722d0361576fde558f1ac68a7065',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'note': 'There is no mp3-320 version of this song.',
@@ -42,7 +42,7 @@ class QQMusicIE(InfoExtractor):
'release_date': '20050626',
'creator': '李季美',
'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'note': 'lyrics not in .lrc format',
@@ -54,7 +54,7 @@ class QQMusicIE(InfoExtractor):
'release_date': '19970225',
'creator': 'Dark Funeral',
'description': 'md5:ed14d5bd7ecec19609108052c25b2c11',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True,
diff --git a/youtube_dl/extractor/r7.py b/youtube_dl/extractor/r7.py
index 069dbfa..ed38c77 100644
--- a/youtube_dl/extractor/r7.py
+++ b/youtube_dl/extractor/r7.py
@@ -23,7 +23,7 @@ class R7IE(InfoExtractor):
'ext': 'mp4',
'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
'description': 'md5:01812008664be76a6479aa58ec865b72',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 98,
'like_count': int,
'view_count': int,
diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py
index 0aa8d05..2c35f98 100644
--- a/youtube_dl/extractor/radiobremen.py
+++ b/youtube_dl/extractor/radiobremen.py
@@ -20,7 +20,7 @@ class RadioBremenIE(InfoExtractor):
'duration': 178,
'width': 512,
'title': 'Druck auf Patrick Öztürk',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.',
},
}
diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py
index 321917a..3b40002 100644
--- a/youtube_dl/extractor/radiocanada.py
+++ b/youtube_dl/extractor/radiocanada.py
@@ -54,9 +54,8 @@ class RadioCanadaIE(InfoExtractor):
raise ExtractorError('This video is DRM protected.', expected=True)
device_types = ['ipad']
- if app_code != 'toutv':
- device_types.append('flash')
if not smuggled_data:
+ device_types.append('flash')
device_types.append('android')
formats = []
@@ -103,7 +102,7 @@ class RadioCanadaIE(InfoExtractor):
continue
f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url)
protocol = determine_protocol({'url': f_url})
- formats.append({
+ f = {
'format_id': '%s-%d' % (protocol, tbr),
'url': f_url,
'ext': 'flv' if protocol == 'rtmp' else ext,
@@ -111,7 +110,14 @@ class RadioCanadaIE(InfoExtractor):
'width': int_or_none(url_e.get('width')),
'height': int_or_none(url_e.get('height')),
'tbr': tbr,
- })
+ }
+ mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url)
+ if mobj:
+ f.update({
+ 'url': mobj.group('url') + mobj.group('auth'),
+ 'play_path': mobj.group('playpath'),
+ })
+ formats.append(f)
if protocol == 'rtsp':
base_url = self._search_regex(
r'rtsp://([^?]+)', f_url, 'base url', default=None)
diff --git a/youtube_dl/extractor/radiode.py b/youtube_dl/extractor/radiode.py
index aa5f6f8..2c06c8b 100644
--- a/youtube_dl/extractor/radiode.py
+++ b/youtube_dl/extractor/radiode.py
@@ -13,7 +13,7 @@ class RadioDeIE(InfoExtractor):
'ext': 'mp3',
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:591c49c702db1a33751625ebfb67f273',
- 'thumbnail': 're:^https?://.*\.png',
+ 'thumbnail': r're:^https?://.*\.png',
'is_live': True,
},
'params': {
diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py
index ec4fa6e..a53ad97 100644
--- a/youtube_dl/extractor/radiojavan.py
+++ b/youtube_dl/extractor/radiojavan.py
@@ -18,7 +18,7 @@ class RadioJavanIE(InfoExtractor):
'id': 'chaartaar-ashoobam',
'ext': 'mp4',
'title': 'Chaartaar - Ashoobam',
- 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
'upload_date': '20150215',
'view_count': int,
'like_count': int,
diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py
index dc640b1..41afbd9 100644
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -120,7 +120,7 @@ class RaiTVIE(RaiBaseIE):
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
'upload_date': '20140407',
'duration': 6160,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
},
{
@@ -133,7 +133,7 @@ class RaiTVIE(RaiBaseIE):
'title': 'TG PRIMO TEMPO',
'upload_date': '20140612',
'duration': 1758,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'Geo-restricted to Italy',
},
@@ -169,7 +169,7 @@ class RaiTVIE(RaiBaseIE):
'description': 'md5:364b604f7db50594678f483353164fb8',
'upload_date': '20140923',
'duration': 386,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
},
]
diff --git a/youtube_dl/extractor/rbmaradio.py b/youtube_dl/extractor/rbmaradio.py
index 471928e..53b82fb 100644
--- a/youtube_dl/extractor/rbmaradio.py
+++ b/youtube_dl/extractor/rbmaradio.py
@@ -22,7 +22,7 @@ class RBMARadioIE(InfoExtractor):
'ext': 'mp3',
'title': 'Main Stage - Ford & Lopatin',
'description': 'md5:4f340fb48426423530af5a9d87bd7b91',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 2452,
'timestamp': 1307103164,
'upload_date': '20110603',
diff --git a/youtube_dl/extractor/reuters.py b/youtube_dl/extractor/reuters.py
index 961d504..9dc482d 100644
--- a/youtube_dl/extractor/reuters.py
+++ b/youtube_dl/extractor/reuters.py
@@ -32,7 +32,7 @@ class ReutersIE(InfoExtractor):
webpage, 'video data'))
def get_json_value(key, fatal=False):
- return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
+ return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
title = unescapeHTML(get_json_value('title', fatal=True))
mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py
index 4875009..4cb99c2 100644
--- a/youtube_dl/extractor/reverbnation.py
+++ b/youtube_dl/extractor/reverbnation.py
@@ -18,7 +18,7 @@ class ReverbNationIE(InfoExtractor):
'title': 'MONA LISA',
'uploader': 'ALKILADOS',
'uploader_id': '216429',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
},
}]
diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py
index 962b524..69934ef 100644
--- a/youtube_dl/extractor/ro220.py
+++ b/youtube_dl/extractor/ro220.py
@@ -14,7 +14,7 @@ class Ro220IE(InfoExtractor):
'id': 'LYV6doKo7f',
'ext': 'mp4',
'title': 'Luati-le Banii sez 4 ep 1',
- 'description': 're:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$',
+ 'description': r're:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$',
}
}
diff --git a/youtube_dl/extractor/rockstargames.py b/youtube_dl/extractor/rockstargames.py
index 48128e2..cd6904b 100644
--- a/youtube_dl/extractor/rockstargames.py
+++ b/youtube_dl/extractor/rockstargames.py
@@ -18,7 +18,7 @@ class RockstarGamesIE(InfoExtractor):
'ext': 'mp4',
'title': 'Further Adventures in Finance and Felony Trailer',
'description': 'md5:6d31f55f30cb101b5476c4a379e324a3',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1464876000,
'upload_date': '20160602',
}
diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py
index f5b2f56..46dfc78 100644
--- a/youtube_dl/extractor/roosterteeth.py
+++ b/youtube_dl/extractor/roosterteeth.py
@@ -26,7 +26,7 @@ class RoosterTeethIE(InfoExtractor):
'ext': 'mp4',
'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement',
'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5',
- 'thumbnail': 're:^https?://.*\.png$',
+ 'thumbnail': r're:^https?://.*\.png$',
'series': 'Million Dollars, But...',
'episode': 'Million Dollars, But... The Game Announcement',
'comment_count': int,
diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py
index 1d404d2..14c8e82 100644
--- a/youtube_dl/extractor/rottentomatoes.py
+++ b/youtube_dl/extractor/rottentomatoes.py
@@ -14,7 +14,7 @@ class RottenTomatoesIE(InfoExtractor):
'ext': 'mp4',
'title': 'Toy Story 3',
'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}
diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py
index ebe563e..a6fac6c 100644
--- a/youtube_dl/extractor/rte.py
+++ b/youtube_dl/extractor/rte.py
@@ -4,118 +4,31 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_HTTPError
from ..utils import (
float_or_none,
parse_iso8601,
unescapeHTML,
+ ExtractorError,
)
-class RteIE(InfoExtractor):
- IE_NAME = 'rte'
- IE_DESC = 'Raidió Teilifís Éireann TV'
- _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
- 'info_dict': {
- 'id': '10478715',
- 'ext': 'flv',
- 'title': 'Watch iWitness online',
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'description': 'iWitness : The spirit of Ireland, one voice and one minute at a time.',
- 'duration': 60.046,
- },
- 'params': {
- 'skip_download': 'f4m fails with --test atm'
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- title = self._og_search_title(webpage)
- description = self._html_search_meta('description', webpage, 'description')
- duration = float_or_none(self._html_search_meta(
- 'duration', webpage, 'duration', fatal=False), 1000)
-
- thumbnail = None
- thumbnail_meta = self._html_search_meta('thumbnail', webpage)
- if thumbnail_meta:
- thumbnail_id = self._search_regex(
- r'uri:irus:(.+)', thumbnail_meta,
- 'thumbnail id', fatal=False)
- if thumbnail_id:
- thumbnail = 'http://img.rasset.ie/%s.jpg' % thumbnail_id
-
- feeds_url = self._html_search_meta('feeds-prefix', webpage, 'feeds url') + video_id
- json_string = self._download_json(feeds_url, video_id)
-
- # f4m_url = server + relative_url
- f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
- f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
- self._sort_formats(f4m_formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': f4m_formats,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- }
-
-
-class RteRadioIE(InfoExtractor):
- IE_NAME = 'rte:radio'
- IE_DESC = 'Raidió Teilifís Éireann radio'
- # Radioplayer URLs have two distinct specifier formats,
- # the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
- # the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
- # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
- # An <id> uniquely defines an individual recording, and is the only part we require.
- _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
-
- _TESTS = [{
- # Old-style player URL; HLS and RTMPE formats
- 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
- 'info_dict': {
- 'id': '10507902',
- 'ext': 'mp4',
- 'title': 'Gloria',
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0',
- 'timestamp': 1451203200,
- 'upload_date': '20151227',
- 'duration': 7230.0,
- },
- 'params': {
- 'skip_download': 'f4m fails with --test atm'
- }
- }, {
- # New-style player URL; RTMPE formats only
- 'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
- 'info_dict': {
- 'id': '3250678',
- 'ext': 'flv',
- 'title': 'The Lyric Concert with Paul Herriott',
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'description': '',
- 'timestamp': 1333742400,
- 'upload_date': '20120406',
- 'duration': 7199.016,
- },
- 'params': {
- 'skip_download': 'f4m fails with --test atm'
- }
- }]
-
+class RteBaseIE(InfoExtractor):
def _real_extract(self, url):
item_id = self._match_id(url)
- json_string = self._download_json(
- 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
- item_id)
+ try:
+ json_string = self._download_json(
+ 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
+ item_id)
+ except ExtractorError as ee:
+ if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+ error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
+ if error_info:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error_info['message']),
+ expected=True)
+ raise
# NB the string values in the JSON are stored using XML escaping(!)
show = json_string['shows'][0]
@@ -163,3 +76,67 @@ class RteRadioIE(InfoExtractor):
'duration': duration,
'formats': formats,
}
+
+
+class RteIE(RteBaseIE):
+ IE_NAME = 'rte'
+ IE_DESC = 'Raidió Teilifís Éireann TV'
+ _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
+ 'md5': '4a76eb3396d98f697e6e8110563d2604',
+ 'info_dict': {
+ 'id': '10478715',
+ 'ext': 'mp4',
+ 'title': 'iWitness',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'The spirit of Ireland, one voice and one minute at a time.',
+ 'duration': 60.046,
+ 'upload_date': '20151012',
+ 'timestamp': 1444694160,
+ },
+ }
+
+
+class RteRadioIE(RteBaseIE):
+ IE_NAME = 'rte:radio'
+ IE_DESC = 'Raidió Teilifís Éireann radio'
+ # Radioplayer URLs have two distinct specifier formats,
+ # the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
+ # the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
+ # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
+ # An <id> uniquely defines an individual recording, and is the only part we require.
+ _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ # Old-style player URL; HLS and RTMPE formats
+ 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
+ 'md5': 'c79ccb2c195998440065456b69760411',
+ 'info_dict': {
+ 'id': '10507902',
+ 'ext': 'mp4',
+ 'title': 'Gloria',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0',
+ 'timestamp': 1451203200,
+ 'upload_date': '20151227',
+ 'duration': 7230.0,
+ },
+ }, {
+ # New-style player URL; RTMPE formats only
+ 'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
+ 'info_dict': {
+ 'id': '3250678',
+ 'ext': 'flv',
+ 'title': 'The Lyric Concert with Paul Herriott',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': '',
+ 'timestamp': 1333742400,
+ 'upload_date': '20120406',
+ 'duration': 7199.016,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }]
diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py
index cb4ee88..721ee73 100644
--- a/youtube_dl/extractor/rtl2.py
+++ b/youtube_dl/extractor/rtl2.py
@@ -2,7 +2,9 @@
from __future__ import unicode_literals
import re
+
from .common import InfoExtractor
+from ..utils import int_or_none
class RTL2IE(InfoExtractor):
@@ -13,7 +15,7 @@ class RTL2IE(InfoExtractor):
'id': 'folge-203-0',
'ext': 'f4v',
'title': 'GRIP sucht den Sommerkönig',
- 'description': 'Matthias, Det und Helge treten gegeneinander an.'
+ 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f'
},
'params': {
# rtmp download
@@ -25,7 +27,7 @@ class RTL2IE(InfoExtractor):
'id': '21040-anna-erwischt-alex',
'ext': 'mp4',
'title': 'Anna erwischt Alex!',
- 'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
+ 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.'
},
'params': {
# rtmp download
@@ -52,34 +54,47 @@ class RTL2IE(InfoExtractor):
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
vivi_id = self._html_search_regex(
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
- info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
- info = self._download_json(info_url, video_id)
+ info = self._download_json(
+ 'http://www.rtl2.de/sites/default/modules/rtl2/mediathek/php/get_video_jw.php',
+ video_id, query={
+ 'vico_id': vico_id,
+ 'vivi_id': vivi_id,
+ })
video_info = info['video']
title = video_info['titel']
- description = video_info.get('beschreibung')
- thumbnail = video_info.get('image')
- download_url = video_info['streamurl']
- download_url = download_url.replace('\\', '')
- stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, 'stream URL')
- rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0']
+ formats = []
+
+ rtmp_url = video_info.get('streamurl')
+ if rtmp_url:
+ rtmp_url = rtmp_url.replace('\\', '')
+ stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL')
+ rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0']
+
+ formats.append({
+ 'format_id': 'rtmp',
+ 'url': rtmp_url,
+ 'play_path': stream_url,
+ 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf',
+ 'page_url': url,
+ 'flash_version': 'LNX 11,2,202,429',
+ 'rtmp_conn': rtmp_conn,
+ 'no_resume': True,
+ 'preference': 1,
+ })
+
+ m3u8_url = video_info.get('streamurl_hls')
+ if m3u8_url:
+ formats.extend(self._extract_akamai_formats(m3u8_url, video_id))
- formats = [{
- 'url': download_url,
- 'play_path': stream_url,
- 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf',
- 'page_url': url,
- 'flash_version': 'LNX 11,2,202,429',
- 'rtmp_conn': rtmp_conn,
- 'no_resume': True,
- }]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
- 'thumbnail': thumbnail,
- 'description': description,
+ 'thumbnail': video_info.get('image'),
+ 'description': video_info.get('beschreibung'),
+ 'duration': int_or_none(video_info.get('duration')),
'formats': formats,
}
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index f0250af..54076de 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -40,7 +40,7 @@ class RtlNlIE(InfoExtractor):
'ext': 'mp4',
'timestamp': 1424039400,
'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
- 'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
+ 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
'upload_date': '20150215',
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
}
@@ -52,7 +52,7 @@ class RtlNlIE(InfoExtractor):
'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
'ext': 'mp4',
'title': 'RTL Nieuws - Meer beelden van overval juwelier',
- 'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
+ 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
'timestamp': 1437233400,
'upload_date': '20150718',
'duration': 30.474,
diff --git a/youtube_dl/extractor/rtp.py b/youtube_dl/extractor/rtp.py
index 82b323c..533ee27 100644
--- a/youtube_dl/extractor/rtp.py
+++ b/youtube_dl/extractor/rtp.py
@@ -16,7 +16,7 @@ class RTPIE(InfoExtractor):
'ext': 'mp3',
'title': 'Paixões Cruzadas',
'description': 'As paixões musicais de António Cartaxo e António Macedo',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
},
'params': {
# rtmp download
diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py
index 3cc3284..48f17b8 100644
--- a/youtube_dl/extractor/rts.py
+++ b/youtube_dl/extractor/rts.py
@@ -4,27 +4,24 @@ from __future__ import unicode_literals
import re
from .srgssr import SRGSSRIE
-from ..compat import (
- compat_str,
- compat_urllib_parse_urlparse,
-)
+from ..compat import compat_str
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
unescapeHTML,
- xpath_text,
+ determine_ext,
)
class RTSIE(SRGSSRIE):
IE_DESC = 'RTS.ch'
- _VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html'
+ _VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html'
_TESTS = [
{
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
- 'md5': 'f254c4b26fb1d3c183793d52bc40d3e7',
+ 'md5': 'ff7f8450a90cf58dacb64e29707b4a8e',
'info_dict': {
'id': '3449373',
'display_id': 'les-enfants-terribles',
@@ -35,38 +32,20 @@ class RTSIE(SRGSSRIE):
'uploader': 'Divers',
'upload_date': '19680921',
'timestamp': -40280400,
- 'thumbnail': 're:^https?://.*\.image',
+ 'thumbnail': r're:^https?://.*\.image',
'view_count': int,
},
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
},
{
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
- 'md5': 'f1077ac5af686c76528dc8d7c5df29ba',
'info_dict': {
- 'id': '5742494',
- 'display_id': '5742494',
- 'ext': 'mp4',
- 'duration': 3720,
- 'title': 'Les yeux dans les cieux - Mon homard au Canada',
- 'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7',
- 'uploader': 'Passe-moi les jumelles',
- 'upload_date': '20140404',
- 'timestamp': 1396635300,
- 'thumbnail': 're:^https?://.*\.image',
- 'view_count': int,
+ 'id': '5624065',
+ 'title': 'Passe-moi les jumelles',
},
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
+ 'playlist_mincount': 4,
},
{
'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
- 'md5': 'b4326fecd3eb64a458ba73c73e91299d',
'info_dict': {
'id': '5745975',
'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski',
@@ -77,14 +56,18 @@ class RTSIE(SRGSSRIE):
'uploader': 'Hockey',
'upload_date': '20140403',
'timestamp': 1396556882,
- 'thumbnail': 're:^https?://.*\.image',
+ 'thumbnail': r're:^https?://.*\.image',
'view_count': int,
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
'skip': 'Blocked outside Switzerland',
},
{
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
- 'md5': '9f713382f15322181bb366cc8c3a4ff0',
+ 'md5': '1bae984fe7b1f78e94abc74e802ed99f',
'info_dict': {
'id': '5745356',
'display_id': 'londres-cachee-par-un-epais-smog',
@@ -92,16 +75,12 @@ class RTSIE(SRGSSRIE):
'duration': 33,
'title': 'Londres cachée par un épais smog',
'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
- 'uploader': 'Le Journal en continu',
+ 'uploader': 'L\'actu en vidéo',
'upload_date': '20140403',
'timestamp': 1396537322,
- 'thumbnail': 're:^https?://.*\.image',
+ 'thumbnail': r're:^https?://.*\.image',
'view_count': int,
},
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
},
{
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
@@ -125,6 +104,10 @@ class RTSIE(SRGSSRIE):
'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse',
},
'playlist_mincount': 5,
+ },
+ {
+ 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html',
+ 'only_matching': True,
}
]
@@ -142,19 +125,32 @@ class RTSIE(SRGSSRIE):
# media_id extracted out of URL is not always a real id
if 'video' not in all_info and 'audio' not in all_info:
- page = self._download_webpage(url, display_id)
+ entries = []
- # article with videos on rhs
- videos = re.findall(
- r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"',
- page)
- if not videos:
+ for item in all_info.get('items', []):
+ item_url = item.get('url')
+ if not item_url:
+ continue
+ entries.append(self.url_result(item_url, 'RTS'))
+
+ if not entries:
+ page, urlh = self._download_webpage_handle(url, display_id)
+ if re.match(self._VALID_URL, urlh.geturl()).group('id') != media_id:
+ return self.url_result(urlh.geturl(), 'RTS')
+
+ # article with videos on rhs
videos = re.findall(
- r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"',
+ r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"',
page)
- if videos:
- entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos]
- return self.playlist_result(entries, media_id, self._og_search_title(page))
+ if not videos:
+ videos = re.findall(
+ r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"',
+ page)
+ if videos:
+ entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos]
+
+ if entries:
+ return self.playlist_result(entries, media_id, all_info.get('title'))
internal_id = self._html_search_regex(
r'<(?:video|audio) data-id="([0-9]+)"', page,
@@ -168,36 +164,29 @@ class RTSIE(SRGSSRIE):
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
- upload_timestamp = parse_iso8601(info.get('broadcast_date'))
- duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
- if isinstance(duration, compat_str):
- duration = parse_duration(duration)
- view_count = info.get('plays')
- thumbnail = unescapeHTML(info.get('preview_image_url'))
+ title = info['title']
def extract_bitrate(url):
return int_or_none(self._search_regex(
r'-([0-9]+)k\.', url, 'bitrate', default=None))
formats = []
- for format_id, format_url in info['streams'].items():
- if format_id == 'hds_sd' and 'hds' in info['streams']:
+ streams = info.get('streams', {})
+ for format_id, format_url in streams.items():
+ if format_id == 'hds_sd' and 'hds' in streams:
continue
- if format_id == 'hls_sd' and 'hls' in info['streams']:
+ if format_id == 'hls_sd' and 'hls' in streams:
continue
- if format_url.endswith('.f4m'):
- token = self._download_xml(
- 'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path,
- media_id, 'Downloading %s token' % format_id)
- auth_params = xpath_text(token, './/authparams', 'auth params')
- if not auth_params:
- continue
- formats.extend(self._extract_f4m_formats(
- '%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params),
- media_id, f4m_id=format_id, fatal=False))
- elif format_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(
- format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))
+ ext = determine_ext(format_url)
+ if ext in ('m3u8', 'f4m'):
+ format_url = self._get_tokenized_src(format_url, media_id, format_id)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url + ('?' if '?' not in format_url else '&') + 'hdcore=3.4.0',
+ media_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))
else:
formats.append({
'format_id': format_id,
@@ -205,25 +194,37 @@ class RTSIE(SRGSSRIE):
'tbr': extract_bitrate(format_url),
})
- if 'media' in info:
- formats.extend([{
- 'format_id': '%s-%sk' % (media['ext'], media['rate']),
- 'url': 'http://download-video.rts.ch/%s' % media['url'],
- 'tbr': media['rate'] or extract_bitrate(media['url']),
- } for media in info['media'] if media.get('rate')])
+ for media in info.get('media', []):
+ media_url = media.get('url')
+ if not media_url or re.match(r'https?://', media_url):
+ continue
+ rate = media.get('rate')
+ ext = media.get('ext') or determine_ext(media_url, 'mp4')
+ format_id = ext
+ if rate:
+ format_id += '-%dk' % rate
+ formats.append({
+ 'format_id': format_id,
+ 'url': 'http://download-video.rts.ch/' + media_url,
+ 'tbr': rate or extract_bitrate(media_url),
+ })
self._check_formats(formats, media_id)
self._sort_formats(formats)
+ duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
+ if isinstance(duration, compat_str):
+ duration = parse_duration(duration)
+
return {
'id': media_id,
'display_id': display_id,
'formats': formats,
- 'title': info['title'],
+ 'title': title,
'description': info.get('intro'),
'duration': duration,
- 'view_count': view_count,
+ 'view_count': int_or_none(info.get('plays')),
'uploader': info.get('programName'),
- 'timestamp': upload_timestamp,
- 'thumbnail': thumbnail,
+ 'timestamp': parse_iso8601(info.get('broadcast_date')),
+ 'thumbnail': unescapeHTML(info.get('preview_image_url')),
}
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py
index 6a43b03..746677a 100644
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -209,7 +209,10 @@ class RTVELiveIE(InfoExtractor):
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
vidplayer_id = self._search_regex(
- r'playerId=player([0-9]+)', webpage, 'internal video ID')
+ (r'playerId=player([0-9]+)',
+ r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
+ r'data-id=["\'](\d+)'),
+ webpage, 'internal video ID')
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
png = self._download_webpage(png_url, video_id, 'Downloading url information')
m3u8_url = _decrypt_url(png)
diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dl/extractor/rtvnh.py
index f6454c6..6a00f70 100644
--- a/youtube_dl/extractor/rtvnh.py
+++ b/youtube_dl/extractor/rtvnh.py
@@ -14,7 +14,7 @@ class RTVNHIE(InfoExtractor):
'id': '131946',
'ext': 'mp4',
'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw',
- 'thumbnail': 're:^https?:.*\.jpg$'
+ 'thumbnail': r're:^https?:.*\.jpg$'
}
}
diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py
index 9a330c1..3bfe934 100644
--- a/youtube_dl/extractor/rudo.py
+++ b/youtube_dl/extractor/rudo.py
@@ -28,7 +28,7 @@ class RudoIE(JWPlatformBaseIE):
@classmethod
def _extract_url(self, webpage):
mobj = re.search(
- '<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
+ r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
webpage)
if mobj:
return mobj.group('url')
diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py
index ce631b4..2b830cf 100644
--- a/youtube_dl/extractor/ruhd.py
+++ b/youtube_dl/extractor/ruhd.py
@@ -14,7 +14,7 @@ class RUHDIE(InfoExtractor):
'ext': 'divx',
'title': 'КОТ бааааам',
'description': 'классный кот)',
- 'thumbnail': 're:^http://.*\.jpg$',
+ 'thumbnail': r're:^http://.*\.jpg$',
}
}
diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py
index 6db3e3e..20d0175 100644
--- a/youtube_dl/extractor/ruutu.py
+++ b/youtube_dl/extractor/ruutu.py
@@ -23,7 +23,7 @@ class RuutuIE(InfoExtractor):
'ext': 'mp4',
'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 114,
'age_limit': 0,
},
@@ -36,7 +36,7 @@ class RuutuIE(InfoExtractor):
'ext': 'mp4',
'title': 'Superpesis: katso koko kausi Ruudussa',
'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 40,
'age_limit': 0,
},
@@ -49,7 +49,7 @@ class RuutuIE(InfoExtractor):
'ext': 'mp4',
'title': 'Osa 1: Mikael Jungner',
'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 0,
},
},
@@ -81,6 +81,9 @@ class RuutuIE(InfoExtractor):
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id='hds', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
else:
proto = compat_urllib_parse_urlparse(video_url).scheme
if not child.tag.startswith('HTTP') and proto != 'rtmp':
diff --git a/youtube_dl/extractor/savefrom.py b/youtube_dl/extractor/savefrom.py
index 5b7367b..30f9cf8 100644
--- a/youtube_dl/extractor/savefrom.py
+++ b/youtube_dl/extractor/savefrom.py
@@ -20,7 +20,7 @@ class SaveFromIE(InfoExtractor):
'upload_date': '20120816',
'uploader': 'Howcast',
'uploader_id': 'Howcast',
- 'description': 're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*',
+ 'description': r're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*',
},
'params': {
'skip_download': True
diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py
index 43131fb..845712a 100644
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@@ -22,7 +22,7 @@ class SBSIE(InfoExtractor):
'ext': 'mp4',
'title': 'Dingo Conservation (The Feed)',
'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'duration': 308,
'timestamp': 1408613220,
'upload_date': '20140821',
diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py
index ed9de96..62a6a83 100644
--- a/youtube_dl/extractor/screencast.py
+++ b/youtube_dl/extractor/screencast.py
@@ -21,7 +21,7 @@ class ScreencastIE(InfoExtractor):
'ext': 'm4v',
'title': 'Color Measurement with Ocean Optics Spectrometers',
'description': 'md5:240369cde69d8bed61349a199c5fb153',
- 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
}
}, {
'url': 'http://www.screencast.com/t/V2uXehPJa1ZI',
@@ -31,7 +31,7 @@ class ScreencastIE(InfoExtractor):
'ext': 'mov',
'title': 'The Amadeus Spectrometer',
'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit',
- 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
}
}, {
'url': 'http://www.screencast.com/t/aAB3iowa',
@@ -41,7 +41,7 @@ class ScreencastIE(InfoExtractor):
'ext': 'mp4',
'title': 'Google Earth Export',
'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.',
- 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
}
}, {
'url': 'http://www.screencast.com/t/X3ddTrYh',
@@ -51,7 +51,7 @@ class ScreencastIE(InfoExtractor):
'ext': 'wmv',
'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression',
'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
- 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
}
}, {
'url': 'http://screencast.com/t/aAB3iowa',
diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py
index 7a88a42..94a2a37 100644
--- a/youtube_dl/extractor/screencastomatic.py
+++ b/youtube_dl/extractor/screencastomatic.py
@@ -14,7 +14,7 @@ class ScreencastOMaticIE(JWPlatformBaseIE):
'id': 'c2lD3BeOPl',
'ext': 'mp4',
'title': 'Welcome to 3-4 Philosophy @ DECV!',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
'duration': 369.163,
}
diff --git a/youtube_dl/extractor/screenjunkies.py b/youtube_dl/extractor/screenjunkies.py
deleted file mode 100644
index 02e574c..0000000
--- a/youtube_dl/extractor/screenjunkies.py
+++ /dev/null
@@ -1,138 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- parse_age_limit,
-)
-
-
-class ScreenJunkiesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?screenjunkies\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
- _TESTS = [{
- 'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915',
- 'md5': '5c2b686bec3d43de42bde9ec047536b0',
- 'info_dict': {
- 'id': '2841915',
- 'display_id': 'best-quentin-tarantino-movie',
- 'ext': 'mp4',
- 'title': 'Best Quentin Tarantino Movie',
- 'thumbnail': 're:^https?://.*\.jpg',
- 'duration': 3671,
- 'age_limit': 13,
- 'tags': list,
- },
- }, {
- 'url': 'http://www.screenjunkies.com/video/honest-trailers-the-dark-knight',
- 'info_dict': {
- 'id': '2348808',
- 'display_id': 'honest-trailers-the-dark-knight',
- 'ext': 'mp4',
- 'title': "Honest Trailers: 'The Dark Knight'",
- 'thumbnail': 're:^https?://.*\.jpg',
- 'age_limit': 10,
- 'tags': list,
- },
- }, {
- # requires subscription but worked around
- 'url': 'http://www.screenjunkies.com/video/knocking-dead-ep-1-the-show-so-far-3003285',
- 'info_dict': {
- 'id': '3003285',
- 'display_id': 'knocking-dead-ep-1-the-show-so-far',
- 'ext': 'mp4',
- 'title': 'Knocking Dead Ep 1: State of The Dead Recap',
- 'thumbnail': 're:^https?://.*\.jpg',
- 'duration': 3307,
- 'age_limit': 13,
- 'tags': list,
- },
- }]
-
- _DEFAULT_BITRATES = (48, 150, 496, 864, 2240)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- if not video_id:
- webpage = self._download_webpage(url, display_id)
- video_id = self._search_regex(
- (r'src=["\']/embed/(\d+)', r'data-video-content-id=["\'](\d+)'),
- webpage, 'video id')
-
- webpage = self._download_webpage(
- 'http://www.screenjunkies.com/embed/%s' % video_id,
- display_id, 'Downloading video embed page')
- embed_vars = self._parse_json(
- self._search_regex(
- r'(?s)embedVars\s*=\s*({.+?})\s*</script>', webpage, 'embed vars'),
- display_id)
-
- title = embed_vars['contentName']
-
- formats = []
- bitrates = []
- for f in embed_vars.get('media', []):
- if not f.get('uri') or f.get('mediaPurpose') != 'play':
- continue
- bitrate = int_or_none(f.get('bitRate'))
- if bitrate:
- bitrates.append(bitrate)
- formats.append({
- 'url': f['uri'],
- 'format_id': 'http-%d' % bitrate if bitrate else 'http',
- 'width': int_or_none(f.get('width')),
- 'height': int_or_none(f.get('height')),
- 'tbr': bitrate,
- 'format': 'mp4',
- })
-
- if not bitrates:
- # When subscriptionLevel > 0, i.e. plus subscription is required
- # media list will be empty. However, hds and hls uris are still
- # available. We can grab them assuming bitrates to be default.
- bitrates = self._DEFAULT_BITRATES
-
- auth_token = embed_vars.get('AuthToken')
-
- def construct_manifest_url(base_url, ext):
- pieces = [base_url]
- pieces.extend([compat_str(b) for b in bitrates])
- pieces.append('_kbps.mp4.%s?%s' % (ext, auth_token))
- return ','.join(pieces)
-
- if bitrates and auth_token:
- hds_url = embed_vars.get('hdsUri')
- if hds_url:
- f4m_formats = self._extract_f4m_formats(
- construct_manifest_url(hds_url, 'f4m'),
- display_id, f4m_id='hds', fatal=False)
- if len(f4m_formats) == len(bitrates):
- for f, bitrate in zip(f4m_formats, bitrates):
- if not f.get('tbr'):
- f['format_id'] = 'hds-%d' % bitrate
- f['tbr'] = bitrate
- # TODO: fix f4m downloader to handle manifests without bitrates if possible
- # formats.extend(f4m_formats)
-
- hls_url = embed_vars.get('hlsUri')
- if hls_url:
- formats.extend(self._extract_m3u8_formats(
- construct_manifest_url(hls_url, 'm3u8'),
- display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'thumbnail': embed_vars.get('thumbUri'),
- 'duration': int_or_none(embed_vars.get('videoLengthInSeconds')) or None,
- 'age_limit': parse_age_limit(embed_vars.get('audienceRating')),
- 'tags': embed_vars.get('tags', '').split(','),
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py
index 35540c0..387a4f7 100644
--- a/youtube_dl/extractor/senateisvp.py
+++ b/youtube_dl/extractor/senateisvp.py
@@ -55,7 +55,7 @@ class SenateISVPIE(InfoExtractor):
'id': 'judiciary031715',
'ext': 'mp4',
'title': 'Integrated Senate Video Player',
- 'thumbnail': 're:^https?://.*\.(?:jpg|png)$',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
},
'params': {
# m3u8 download
diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py
index 2dbe490..9880a5a 100644
--- a/youtube_dl/extractor/sendtonews.py
+++ b/youtube_dl/extractor/sendtonews.py
@@ -8,6 +8,9 @@ from ..utils import (
float_or_none,
parse_iso8601,
update_url_query,
+ int_or_none,
+ determine_protocol,
+ unescapeHTML,
)
@@ -20,18 +23,18 @@ class SendtoNewsIE(JWPlatformBaseIE):
'info_dict': {
'id': 'GxfCe0Zo7D-175909-5588'
},
- 'playlist_count': 9,
+ 'playlist_count': 8,
# test the first video only to prevent lengthy tests
'playlist': [{
'info_dict': {
- 'id': '198180',
+ 'id': '240385',
'ext': 'mp4',
- 'title': 'Recap: CLE 5, LAA 4',
- 'description': '8/14/16: Naquin, Almonte lead Indians in 5-4 win',
- 'duration': 57.343,
- 'thumbnail': 're:https?://.*\.jpg$',
- 'upload_date': '20160815',
- 'timestamp': 1471221961,
+ 'title': 'Indians introduce Encarnacion',
+ 'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland',
+ 'duration': 137.898,
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'upload_date': '20170105',
+ 'timestamp': 1483649762,
},
}],
'params': {
@@ -64,7 +67,20 @@ class SendtoNewsIE(JWPlatformBaseIE):
for video in playlist_data['playlistData'][0]:
info_dict = self._parse_jwplayer_data(
video['jwconfiguration'],
- require_title=False, rtmp_params={'no_resume': True})
+ require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True})
+
+ for f in info_dict['formats']:
+ if f.get('tbr'):
+ continue
+ tbr = int_or_none(self._search_regex(
+ r'/(\d+)k/', f['url'], 'bitrate', default=None))
+ if not tbr:
+ continue
+ f.update({
+ 'format_id': '%s-%d' % (determine_protocol(f), tbr),
+ 'tbr': tbr,
+ })
+ self._sort_formats(info_dict['formats'], ('tbr', 'height', 'width', 'format_id'))
thumbnails = []
if video.get('thumbnailUrl'):
@@ -78,8 +94,8 @@ class SendtoNewsIE(JWPlatformBaseIE):
'url': video['smThumbnailUrl'],
})
info_dict.update({
- 'title': video['S_headLine'],
- 'description': video.get('S_fullStory'),
+ 'title': video['S_headLine'].strip(),
+ 'description': unescapeHTML(video.get('S_fullStory')),
'thumbnails': thumbnails,
'duration': float_or_none(video.get('SM_length')),
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),
diff --git a/youtube_dl/extractor/sexu.py b/youtube_dl/extractor/sexu.py
index a99b2a8..5e22ea7 100644
--- a/youtube_dl/extractor/sexu.py
+++ b/youtube_dl/extractor/sexu.py
@@ -14,7 +14,7 @@ class SexuIE(InfoExtractor):
'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
'description': 'md5:2b75327061310a3afb3fbd7d09e2e403',
'categories': list, # NSFW
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'age_limit': 18,
}
}
diff --git a/youtube_dl/extractor/sharesix.py b/youtube_dl/extractor/sharesix.py
deleted file mode 100644
index 9cce5ce..0000000
--- a/youtube_dl/extractor/sharesix.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- parse_duration,
- sanitized_Request,
- urlencode_postdata,
-)
-
-
-class ShareSixIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P<id>[0-9a-zA-Z]+)'
- _TESTS = [
- {
- 'url': 'http://sharesix.com/f/OXjQ7Y6',
- 'md5': '9e8e95d8823942815a7d7c773110cc93',
- 'info_dict': {
- 'id': 'OXjQ7Y6',
- 'ext': 'mp4',
- 'title': 'big_buck_bunny_480p_surround-fix.avi',
- 'duration': 596,
- 'width': 854,
- 'height': 480,
- },
- },
- {
- 'url': 'http://sharesix.com/lfrwoxp35zdd',
- 'md5': 'dd19f1435b7cec2d7912c64beeee8185',
- 'info_dict': {
- 'id': 'lfrwoxp35zdd',
- 'ext': 'flv',
- 'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv',
- 'duration': 65,
- 'width': 1280,
- 'height': 720,
- },
- }
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- fields = {
- 'method_free': 'Free'
- }
- post = urlencode_postdata(fields)
- req = sanitized_Request(url, post)
- req.add_header('Content-type', 'application/x-www-form-urlencoded')
-
- webpage = self._download_webpage(req, video_id,
- 'Downloading video page')
-
- video_url = self._search_regex(
- r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL')
- title = self._html_search_regex(
- r'(?s)<dt>Filename:</dt>.+?<dd>(.+?)</dd>', webpage, 'title')
- duration = parse_duration(
- self._search_regex(
- r'(?s)<dt>Length:</dt>.+?<dd>(.+?)</dd>',
- webpage,
- 'duration',
- fatal=False
- )
- )
-
- m = re.search(
- r'''(?xs)<dt>Width\sx\sHeight</dt>.+?
- <dd>(?P<width>\d+)\sx\s(?P<height>\d+)</dd>''',
- webpage
- )
- width = height = None
- if m:
- width, height = int(m.group('width')), int(m.group('height'))
-
- formats = [{
- 'format_id': 'sd',
- 'url': video_url,
- 'width': width,
- 'height': height,
- }]
-
- return {
- 'id': video_id,
- 'title': title,
- 'duration': duration,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/showroomlive.py b/youtube_dl/extractor/showroomlive.py
new file mode 100644
index 0000000..efd9d56
--- /dev/null
+++ b/youtube_dl/extractor/showroomlive.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ urljoin,
+)
+
+
+class ShowRoomLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?!onlive|timetable|event|campaign|news|ranking|room)(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.showroom-live.com/48_Nana_Okada',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ broadcaster_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, broadcaster_id)
+
+ room_id = self._search_regex(
+ (r'SrGlobal\.roomId\s*=\s*(\d+)',
+ r'(?:profile|room)\?room_id\=(\d+)'), webpage, 'room_id')
+
+ room = self._download_json(
+ urljoin(url, '/api/room/profile?room_id=%s' % room_id),
+ broadcaster_id)
+
+ is_live = room.get('is_onlive')
+ if is_live is not True:
+ raise ExtractorError('%s is offline' % broadcaster_id, expected=True)
+
+ uploader = room.get('performer_name') or broadcaster_id
+ title = room.get('room_name') or room.get('main_name') or uploader
+
+ streaming_url_list = self._download_json(
+ urljoin(url, '/api/live/streaming_url?room_id=%s' % room_id),
+ broadcaster_id)['streaming_url_list']
+
+ formats = []
+ for stream in streaming_url_list:
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ stream_type = stream.get('type')
+ if stream_type == 'hls':
+ m3u8_formats = self._extract_m3u8_formats(
+ stream_url, broadcaster_id, ext='mp4', m3u8_id='hls',
+ live=True)
+ for f in m3u8_formats:
+ f['quality'] = int_or_none(stream.get('quality', 100))
+ formats.extend(m3u8_formats)
+ elif stream_type == 'rtmp':
+ stream_name = stream.get('stream_name')
+ if not stream_name:
+ continue
+ formats.append({
+ 'url': stream_url,
+ 'play_path': stream_name,
+ 'page_url': url,
+ 'player_url': 'https://www.showroom-live.com/assets/swf/v3/ShowRoomLive.swf',
+ 'rtmp_live': True,
+ 'ext': 'flv',
+ 'format_id': 'rtmp',
+ 'format_note': stream.get('label'),
+ 'quality': int_or_none(stream.get('quality', 100)),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': compat_str(room.get('live_id') or broadcaster_id),
+ 'title': self._live_title(title),
+ 'description': room.get('description'),
+ 'timestamp': int_or_none(room.get('current_live_started_at')),
+ 'uploader': uploader,
+ 'uploader_id': broadcaster_id,
+ 'view_count': int_or_none(room.get('view_num')),
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/youtube_dl/extractor/skysports.py b/youtube_dl/extractor/skysports.py
index 9dc78c7..4ca9f6b 100644
--- a/youtube_dl/extractor/skysports.py
+++ b/youtube_dl/extractor/skysports.py
@@ -2,18 +2,19 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import strip_or_none
class SkySportsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
- 'md5': 'c44a1db29f27daf9a0003e010af82100',
+ 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec',
'info_dict': {
'id': '10328419',
- 'ext': 'flv',
- 'title': 'Bale: Its our time to shine',
- 'description': 'md5:9fd1de3614d525f5addda32ac3c482c9',
+ 'ext': 'mp4',
+ 'title': 'Bale: It\'s our time to shine',
+ 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d',
},
'add_ie': ['Ooyala'],
}
@@ -28,6 +29,6 @@ class SkySportsIE(InfoExtractor):
'url': 'ooyala:%s' % self._search_regex(
r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
+ 'description': strip_or_none(self._og_search_description(webpage)),
'ie_key': 'Ooyala',
}
diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py
index 18cc772..7145d28 100644
--- a/youtube_dl/extractor/slutload.py
+++ b/youtube_dl/extractor/slutload.py
@@ -13,7 +13,7 @@ class SlutloadIE(InfoExtractor):
'ext': 'mp4',
'title': 'virginie baisee en cam',
'age_limit': 18,
- 'thumbnail': 're:https?://.*?\.jpg'
+ 'thumbnail': r're:https?://.*?\.jpg'
}
}
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py
index def46ab..370fa88 100644
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -81,7 +81,7 @@ class SmotriIE(InfoExtractor):
'uploader': 'psavari1',
'uploader_id': 'psavari1',
'upload_date': '20081103',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'videopassword': '223322',
@@ -117,7 +117,7 @@ class SmotriIE(InfoExtractor):
'uploader': 'вАся',
'uploader_id': 'asya_prosto',
'upload_date': '20081218',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 18,
},
'params': {
diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py
index 4819fe5..f773547 100644
--- a/youtube_dl/extractor/snotr.py
+++ b/youtube_dl/extractor/snotr.py
@@ -22,7 +22,7 @@ class SnotrIE(InfoExtractor):
'duration': 248,
'filesize_approx': 40700000,
'description': 'A drone flying through Fourth of July Fireworks',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'expected_warnings': ['description'],
}, {
@@ -34,7 +34,7 @@ class SnotrIE(InfoExtractor):
'duration': 126,
'filesize_approx': 8500000,
'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}]
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 5a201ea..b3aa4ce 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -173,46 +173,54 @@ class SoundcloudIE(InfoExtractor):
})
# We have to retrieve the url
- streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
- 'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
format_dict = self._download_json(
- streams_url,
- track_id, 'Downloading track url')
+ 'http://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
+ track_id, 'Downloading track url', query={
+ 'client_id': self._CLIENT_ID,
+ 'secret_token': secret_token,
+ })
for key, stream_url in format_dict.items():
+ abr = int_or_none(self._search_regex(
+ r'_(\d+)_url', key, 'audio bitrate', default=None))
if key.startswith('http'):
- formats.append({
+ stream_formats = [{
'format_id': key,
'ext': ext,
'url': stream_url,
- 'vcodec': 'none',
- })
+ }]
elif key.startswith('rtmp'):
# The url doesn't have an rtmp app, we have to extract the playpath
url, path = stream_url.split('mp3:', 1)
- formats.append({
+ stream_formats = [{
'format_id': key,
'url': url,
'play_path': 'mp3:' + path,
'ext': 'flv',
- 'vcodec': 'none',
- })
-
- if not formats:
- # We fallback to the stream_url in the original info, this
- # cannot be always used, sometimes it can give an HTTP 404 error
- formats.append({
- 'format_id': 'fallback',
- 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
- 'ext': ext,
- 'vcodec': 'none',
- })
-
- for f in formats:
- if f['format_id'].startswith('http'):
- f['protocol'] = 'http'
- if f['format_id'].startswith('rtmp'):
- f['protocol'] = 'rtmp'
+ }]
+ elif key.startswith('hls'):
+ stream_formats = self._extract_m3u8_formats(
+ stream_url, track_id, 'mp3', entry_protocol='m3u8_native',
+ m3u8_id=key, fatal=False)
+ else:
+ continue
+
+ for f in stream_formats:
+ f['abr'] = abr
+
+ formats.extend(stream_formats)
+
+ if not formats:
+ # We fallback to the stream_url in the original info, this
+ # cannot be always used, sometimes it can give an HTTP 404 error
+ formats.append({
+ 'format_id': 'fallback',
+ 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
+ 'ext': ext,
+ })
+
+ for f in formats:
+ f['vcodec'] = 'none'
self._check_formats(formats, track_id)
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py
index 3a4ddf5..e004e2c 100644
--- a/youtube_dl/extractor/soundgasm.py
+++ b/youtube_dl/extractor/soundgasm.py
@@ -27,7 +27,7 @@ class SoundgasmIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
audio_url = self._html_search_regex(
r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
- audio_id = re.split('\/|\.', audio_url)[-2]
+ audio_id = re.split(r'\/|\.', audio_url)[-2]
description = self._html_search_regex(
r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
fatal=False)
diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py
index 08f8c57..d8ce416 100644
--- a/youtube_dl/extractor/southpark.py
+++ b/youtube_dl/extractor/southpark.py
@@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor
class SouthParkIE(MTVServicesInfoExtractor):
IE_NAME = 'southpark.cc.com'
- _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
@@ -75,7 +75,7 @@ class SouthParkDeIE(SouthParkIE):
class SouthParkNlIE(SouthParkIE):
IE_NAME = 'southpark.nl'
- _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/'
_TESTS = [{
diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py
index 186d22b..123c33a 100644
--- a/youtube_dl/extractor/spankbang.py
+++ b/youtube_dl/extractor/spankbang.py
@@ -15,7 +15,7 @@ class SpankBangIE(InfoExtractor):
'ext': 'mp4',
'title': 'fantasy solo',
'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'silly2587',
'age_limit': 18,
}
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py
index 92a7120..44d8fa5 100644
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@@ -85,7 +85,7 @@ class SpankwireIE(InfoExtractor):
r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)
heights = [int(video[0]) for video in videos]
video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos]))
- if webpage.find('flashvars\.encrypted = "true"') != -1:
+ if webpage.find(r'flashvars\.encrypted = "true"') != -1:
password = self._search_regex(
r'flashvars\.video_title = "([^"]+)',
webpage, 'password').replace('+', ' ')
diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py
index 034bd47..e1cfb86 100644
--- a/youtube_dl/extractor/spiegeltv.py
+++ b/youtube_dl/extractor/spiegeltv.py
@@ -18,7 +18,7 @@ class SpiegeltvIE(InfoExtractor):
'ext': 'm4v',
'title': 'Flug MH370',
'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines',
- 'thumbnail': 're:http://.*\.jpg$',
+ 'thumbnail': r're:http://.*\.jpg$',
},
'params': {
# m3u8 download
diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py
index abfee3e..c59896a 100644
--- a/youtube_dl/extractor/spike.py
+++ b/youtube_dl/extractor/spike.py
@@ -46,7 +46,7 @@ class SpikeIE(MTVServicesInfoExtractor):
_CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)')
def _extract_mgid(self, webpage):
- mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None)
+ mgid = super(SpikeIE, self)._extract_mgid(webpage)
if mgid is None:
url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id')
video_type, episode_id = url_parts.split('/', 1)
diff --git a/youtube_dl/extractor/sport5.py b/youtube_dl/extractor/sport5.py
index 7e67833..a417b5a 100644
--- a/youtube_dl/extractor/sport5.py
+++ b/youtube_dl/extractor/sport5.py
@@ -41,7 +41,7 @@ class Sport5IE(InfoExtractor):
webpage = self._download_webpage(url, media_id)
- video_id = self._html_search_regex('clipId=([\w-]+)', webpage, 'video id')
+ video_id = self._html_search_regex(r'clipId=([\w-]+)', webpage, 'video id')
metadata = self._download_xml(
'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id,
diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py
index e5c28ae..e7bd5bf 100644
--- a/youtube_dl/extractor/sportbox.py
+++ b/youtube_dl/extractor/sportbox.py
@@ -4,65 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- js_to_json,
- unified_strdate,
-)
-
-
-class SportBoxIE(InfoExtractor):
- _VALID_URL = r'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)'
- _TESTS = [{
- 'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S',
- 'md5': 'ff56a598c2cf411a9a38a69709e97079',
- 'info_dict': {
- 'id': '80822',
- 'ext': 'mp4',
- 'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн',
- 'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad',
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'upload_date': '20140928',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4',
- 'only_matching': True,
- }, {
- 'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(url, display_id)
-
- player = self._search_regex(
- r'src="/?(vdl/player/[^"]+)"', webpage, 'player')
-
- title = self._html_search_regex(
- [r'"nodetitle"\s*:\s*"([^"]+)"', r'class="node-header_{1,2}title">([^<]+)'],
- webpage, 'title')
- description = self._og_search_description(webpage) or self._html_search_meta(
- 'description', webpage, 'description')
- thumbnail = self._og_search_thumbnail(webpage)
- upload_date = unified_strdate(self._html_search_meta(
- 'dateCreated', webpage, 'upload date'))
-
- return {
- '_type': 'url_transparent',
- 'url': compat_urlparse.urljoin(url, '/%s' % player),
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- }
+from ..utils import js_to_json
class SportBoxEmbedIE(InfoExtractor):
@@ -73,7 +15,7 @@ class SportBoxEmbedIE(InfoExtractor):
'id': '211355',
'ext': 'mp4',
'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
# m3u8 download
diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py
index a9927f6..a3c35a8 100644
--- a/youtube_dl/extractor/sportdeutschland.py
+++ b/youtube_dl/extractor/sportdeutschland.py
@@ -20,8 +20,8 @@ class SportDeutschlandIE(InfoExtractor):
'title': 're:Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
'categories': ['Badminton'],
'view_count': int,
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'description': 're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': r're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV',
'timestamp': int,
'upload_date': 're:^201408[23][0-9]$',
},
@@ -38,7 +38,7 @@ class SportDeutschlandIE(InfoExtractor):
'timestamp': 1408976060,
'duration': 2732,
'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'view_count': int,
'categories': ['Li-Ning Badminton WM 2014'],
diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py
index 246970c..319a48a 100644
--- a/youtube_dl/extractor/srgssr.py
+++ b/youtube_dl/extractor/srgssr.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
parse_iso8601,
@@ -23,6 +24,16 @@ class SRGSSRIE(InfoExtractor):
'STARTDATE': 'This video is not yet available. Please try again later.',
}
+ def _get_tokenized_src(self, url, video_id, format_id):
+ sp = compat_urllib_parse_urlparse(url).path.split('/')
+ token = self._download_json(
+ 'http://tp.srgssr.ch/akahd/token?acl=/%s/%s/*' % (sp[1], sp[2]),
+ video_id, 'Downloading %s token' % format_id, fatal=False) or {}
+ auth_params = token.get('token', {}).get('authparams')
+ if auth_params:
+ url += '?' + auth_params
+ return url
+
def get_media_data(self, bu, media_type, media_id):
media_data = self._download_json(
'http://il.srgssr.ch/integrationlayer/1.0/ue/%s/%s/play/%s.json' % (bu, media_type, media_id),
@@ -37,9 +48,6 @@ class SRGSSRIE(InfoExtractor):
def _real_extract(self, url):
bu, media_type, media_id = re.match(self._VALID_URL, url).groups()
- if bu == 'rts':
- return self.url_result('rts:%s' % media_id, 'RTS')
-
media_data = self.get_media_data(bu, media_type, media_id)
metadata = media_data['AssetMetadatas']['AssetMetadata'][0]
@@ -61,14 +69,16 @@ class SRGSSRIE(InfoExtractor):
asset_url = asset['text']
quality = asset['@quality']
format_id = '%s-%s' % (protocol, quality)
- if protocol == 'HTTP-HDS':
- formats.extend(self._extract_f4m_formats(
- asset_url + '?hdcore=3.4.0', media_id,
- f4m_id=format_id, fatal=False))
- elif protocol == 'HTTP-HLS':
- formats.extend(self._extract_m3u8_formats(
- asset_url, media_id, 'mp4', 'm3u8_native',
- m3u8_id=format_id, fatal=False))
+ if protocol.startswith('HTTP-HDS') or protocol.startswith('HTTP-HLS'):
+ asset_url = self._get_tokenized_src(asset_url, media_id, format_id)
+ if protocol.startswith('HTTP-HDS'):
+ formats.extend(self._extract_f4m_formats(
+ asset_url + ('?' if '?' not in asset_url else '&') + 'hdcore=3.4.0',
+ media_id, f4m_id=format_id, fatal=False))
+ elif protocol.startswith('HTTP-HLS'):
+ formats.extend(self._extract_m3u8_formats(
+ asset_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
else:
formats.append({
'format_id': format_id,
@@ -94,10 +104,10 @@ class SRGSSRPlayIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
- 'md5': '4cd93523723beff51bb4bee974ee238d',
+ 'md5': 'da6b5b3ac9fa4761a942331cef20fcb3',
'info_dict': {
'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
- 'ext': 'm4v',
+ 'ext': 'mp4',
'upload_date': '20130701',
'title': 'Snowden beantragt Asyl in Russland',
'timestamp': 1372713995,
@@ -140,7 +150,7 @@ class SRGSSRPlayIE(InfoExtractor):
'uploader': '19h30',
'upload_date': '20141201',
'timestamp': 1417458600,
- 'thumbnail': 're:^https?://.*\.image',
+ 'thumbnail': r're:^https?://.*\.image',
'view_count': int,
},
'params': {
diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py
index b03272f..28baf90 100644
--- a/youtube_dl/extractor/srmediathek.py
+++ b/youtube_dl/extractor/srmediathek.py
@@ -20,7 +20,7 @@ class SRMediathekIE(ARDMediathekIE):
'ext': 'mp4',
'title': 'sportarena (26.10.2014)',
'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'no longer available',
}, {
diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py
index 4a3d8bb..cce65fb 100644
--- a/youtube_dl/extractor/stanfordoc.py
+++ b/youtube_dl/extractor/stanfordoc.py
@@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
r'(?s)<description>([^<]+)</description>',
coursepage, 'description', fatal=False)
- links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
+ links = orderedSet(re.findall(r'<a href="(VideoPage.php\?[^"]+)">', coursepage))
info['entries'] = [self.url_result(
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
) for l in links]
@@ -84,7 +84,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
rootpage = self._download_webpage(rootURL, info['id'],
errnote='Unable to download course info page')
- links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
+ links = orderedSet(re.findall(r'<a href="(CoursePage.php\?[^"]+)">', rootpage))
info['entries'] = [self.url_result(
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
) for l in links]
diff --git a/youtube_dl/extractor/stitcher.py b/youtube_dl/extractor/stitcher.py
index 0f8782d..97d1ff6 100644
--- a/youtube_dl/extractor/stitcher.py
+++ b/youtube_dl/extractor/stitcher.py
@@ -22,7 +22,7 @@ class StitcherIE(InfoExtractor):
'title': 'Machine Learning Mastery and Cancer Clusters',
'description': 'md5:55163197a44e915a14a1ac3a1de0f2d3',
'duration': 1604,
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
@@ -33,7 +33,7 @@ class StitcherIE(InfoExtractor):
'title': "The CW's 'Crazy Ex-Girlfriend'",
'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17',
'duration': 2235,
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
},
'params': {
'skip_download': True,
diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py
index 2c26fa6..e973c86 100644
--- a/youtube_dl/extractor/streamable.py
+++ b/youtube_dl/extractor/streamable.py
@@ -21,7 +21,7 @@ class StreamableIE(InfoExtractor):
'id': 'dnd1',
'ext': 'mp4',
'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'uploader': 'teabaker',
'timestamp': 1454964157.35115,
'upload_date': '20160208',
@@ -37,7 +37,7 @@ class StreamableIE(InfoExtractor):
'id': 'moo',
'ext': 'mp4',
'title': '"Please don\'t eat me!"',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'timestamp': 1426115495,
'upload_date': '20150311',
'duration': 12,
diff --git a/youtube_dl/extractor/streetvoice.py b/youtube_dl/extractor/streetvoice.py
index e529051..91612c7 100644
--- a/youtube_dl/extractor/streetvoice.py
+++ b/youtube_dl/extractor/streetvoice.py
@@ -16,7 +16,7 @@ class StreetVoiceIE(InfoExtractor):
'ext': 'mp3',
'title': '輸',
'description': 'Crispy脆樂團 - 輸',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 260,
'upload_date': '20091018',
'uploader': 'Crispy脆樂團',
diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py
index ef9be79..6805116 100644
--- a/youtube_dl/extractor/sunporno.py
+++ b/youtube_dl/extractor/sunporno.py
@@ -21,7 +21,7 @@ class SunPornoIE(InfoExtractor):
'ext': 'mp4',
'title': 'md5:0a400058e8105d39e35c35e7c5184164',
'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 302,
'age_limit': 18,
}
diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py
index fb0a4b2..10cf808 100644
--- a/youtube_dl/extractor/svt.py
+++ b/youtube_dl/extractor/svt.py
@@ -129,7 +129,7 @@ class SVTPlayIE(SVTBaseIE):
'ext': 'mp4',
'title': 'Flygplan till Haile Selassie',
'duration': 3527,
- 'thumbnail': 're:^https?://.*[\.-]jpg$',
+ 'thumbnail': r're:^https?://.*[\.-]jpg$',
'age_limit': 0,
'subtitles': {
'sv': [{
diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py
index 6d69f76..0f61597 100644
--- a/youtube_dl/extractor/swrmediathek.py
+++ b/youtube_dl/extractor/swrmediathek.py
@@ -1,10 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import parse_duration
+from ..utils import (
+ parse_duration,
+ int_or_none,
+ determine_protocol,
+)
class SWRMediathekIE(InfoExtractor):
@@ -18,7 +20,7 @@ class SWRMediathekIE(InfoExtractor):
'ext': 'mp4',
'title': 'SWR odysso',
'description': 'md5:2012e31baad36162e97ce9eb3f157b8a',
- 'thumbnail': 're:^http:.*\.jpg$',
+ 'thumbnail': r're:^http:.*\.jpg$',
'duration': 2602,
'upload_date': '20140515',
'uploader': 'SWR Fernsehen',
@@ -32,12 +34,13 @@ class SWRMediathekIE(InfoExtractor):
'ext': 'mp4',
'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen',
'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'duration': 5305,
'upload_date': '20140516',
'uploader': 'SWR Fernsehen',
'uploader_id': '990030',
},
+ 'skip': 'redirect to http://swrmediathek.de/index.htm?hinweis=swrlink',
}, {
'url': 'http://swrmediathek.de/player.htm?show=bba23e10-cb93-11e3-bf7f-0026b975f2e6',
'md5': '4382e4ef2c9d7ce6852535fa867a0dd3',
@@ -46,59 +49,67 @@ class SWRMediathekIE(InfoExtractor):
'ext': 'mp3',
'title': 'Saša Stanišic: Vor dem Fest',
'description': 'md5:5b792387dc3fbb171eb709060654e8c9',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'duration': 3366,
'upload_date': '20140520',
'uploader': 'SWR 2',
'uploader_id': '284670',
- }
+ },
+ 'skip': 'redirect to http://swrmediathek.de/index.htm?hinweis=swrlink',
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
video = self._download_json(
- 'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id, video_id, 'Downloading video JSON')
+ 'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id,
+ video_id, 'Downloading video JSON')
attr = video['attr']
- media_type = attr['entry_etype']
+ title = attr['entry_title']
+ media_type = attr.get('entry_etype')
formats = []
- for entry in video['sub']:
- if entry['name'] != 'entry_media':
+ for entry in video.get('sub', []):
+ if entry.get('name') != 'entry_media':
continue
- entry_attr = entry['attr']
- codec = entry_attr['val0']
- quality = int(entry_attr['val1'])
-
- fmt = {
- 'url': entry_attr['val2'],
- 'quality': quality,
- }
-
- if media_type == 'Video':
- fmt.update({
- 'format_note': ['144p', '288p', '544p', '720p'][quality - 1],
- 'vcodec': codec,
- })
- elif media_type == 'Audio':
- fmt.update({
- 'acodec': codec,
+ entry_attr = entry.get('attr', {})
+ f_url = entry_attr.get('val2')
+ if not f_url:
+ continue
+ codec = entry_attr.get('val0')
+ if codec == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ f_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif codec == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ f_url + '?hdcore=3.7.0', video_id,
+ f4m_id='hds', fatal=False))
+ else:
+ formats.append({
+ 'format_id': determine_protocol({'url': f_url}),
+ 'url': f_url,
+ 'quality': int_or_none(entry_attr.get('val1')),
+ 'vcodec': codec if media_type == 'Video' else 'none',
+ 'acodec': codec if media_type == 'Audio' else None,
})
- formats.append(fmt)
-
self._sort_formats(formats)
+ upload_date = None
+ entry_pdatet = attr.get('entry_pdatet')
+ if entry_pdatet:
+ upload_date = entry_pdatet[:-4]
+
return {
'id': video_id,
- 'title': attr['entry_title'],
- 'description': attr['entry_descl'],
- 'thumbnail': attr['entry_image_16_9'],
- 'duration': parse_duration(attr['entry_durat']),
- 'upload_date': attr['entry_pdatet'][:-4],
- 'uploader': attr['channel_title'],
- 'uploader_id': attr['channel_idkey'],
+ 'title': title,
+ 'description': attr.get('entry_descl'),
+ 'thumbnail': attr.get('entry_image_16_9'),
+ 'duration': parse_duration(attr.get('entry_durat')),
+ 'upload_date': upload_date,
+ 'uploader': attr.get('channel_title'),
+ 'uploader_id': attr.get('channel_idkey'),
'formats': formats,
}
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index 8670cee..c351b75 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -23,7 +23,7 @@ class TagesschauPlayerIE(InfoExtractor):
'id': '179517',
'ext': 'mp4',
'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD',
- 'thumbnail': 're:^https?:.*\.jpg$',
+ 'thumbnail': r're:^https?:.*\.jpg$',
'formats': 'mincount:6',
},
}, {
@@ -33,7 +33,7 @@ class TagesschauPlayerIE(InfoExtractor):
'id': '29417',
'ext': 'mp3',
'title': 'Trabi - Bye, bye Rennpappe',
- 'thumbnail': 're:^https?:.*\.jpg$',
+ 'thumbnail': r're:^https?:.*\.jpg$',
'formats': 'mincount:2',
},
}, {
@@ -135,7 +135,7 @@ class TagesschauIE(InfoExtractor):
'ext': 'mp4',
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
'description': '18.07.2015 20:10 Uhr',
- 'thumbnail': 're:^https?:.*\.jpg$',
+ 'thumbnail': r're:^https?:.*\.jpg$',
},
}, {
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
@@ -145,7 +145,7 @@ class TagesschauIE(InfoExtractor):
'ext': 'mp4',
'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
'description': 'md5:695c01bfd98b7e313c501386327aea59',
- 'thumbnail': 're:^https?:.*\.jpg$',
+ 'thumbnail': r're:^https?:.*\.jpg$',
},
}, {
# exclusive audio
@@ -156,7 +156,7 @@ class TagesschauIE(InfoExtractor):
'ext': 'mp3',
'title': 'Trabi - Bye, bye Rennpappe',
'description': 'md5:8687dda862cbbe2cfb2df09b56341317',
- 'thumbnail': 're:^https?:.*\.jpg$',
+ 'thumbnail': r're:^https?:.*\.jpg$',
},
}, {
# audio in article
@@ -167,7 +167,7 @@ class TagesschauIE(InfoExtractor):
'ext': 'mp3',
'title': 'Viele Baustellen für neuen BND-Chef',
'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4',
- 'thumbnail': 're:^https?:.*\.jpg$',
+ 'thumbnail': r're:^https?:.*\.jpg$',
},
}, {
'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
diff --git a/youtube_dl/extractor/tass.py b/youtube_dl/extractor/tass.py
index 5293393..6d336da 100644
--- a/youtube_dl/extractor/tass.py
+++ b/youtube_dl/extractor/tass.py
@@ -21,7 +21,7 @@ class TassIE(InfoExtractor):
'ext': 'mp4',
'title': 'Посетителям московского зоопарка показали красную панду',
'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
diff --git a/youtube_dl/extractor/tdslifeway.py b/youtube_dl/extractor/tdslifeway.py
index 4d1f5c8..101c6ee 100644
--- a/youtube_dl/extractor/tdslifeway.py
+++ b/youtube_dl/extractor/tdslifeway.py
@@ -13,7 +13,7 @@ class TDSLifewayIE(InfoExtractor):
'id': '3453494717001',
'ext': 'mp4',
'title': 'The Gospel by Numbers',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'upload_date': '20140410',
'description': 'Coming soon from T4G 2014!',
'uploader_id': '2034960640001',
diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py
index df5d555..f14713a 100644
--- a/youtube_dl/extractor/teachertube.py
+++ b/youtube_dl/extractor/teachertube.py
@@ -24,7 +24,7 @@ class TeacherTubeIE(InfoExtractor):
'ext': 'mp4',
'title': 'Measures of dispersion from a frequency table',
'description': 'Measures of dispersion from a frequency table',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
},
}, {
'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064',
@@ -34,7 +34,7 @@ class TeacherTubeIE(InfoExtractor):
'ext': 'mp4',
'title': 'How to Make Paper Dolls _ Paper Art Projects',
'description': 'Learn how to make paper dolls in this simple',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
},
}, {
'url': 'http://www.teachertube.com/music.php?music_id=8805',
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 451cde7..1b1afab 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -47,7 +47,7 @@ class TEDIE(InfoExtractor):
'id': 'tSVI8ta_P4w',
'ext': 'mp4',
'title': 'Vishal Sikka: The beauty and power of algorithms',
- 'thumbnail': 're:^https?://.+\.jpg',
+ 'thumbnail': r're:^https?://.+\.jpg',
'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
'upload_date': '20140122',
'uploader_id': 'TEDInstitute',
@@ -189,7 +189,7 @@ class TEDIE(InfoExtractor):
'format_id': '%s-%sk' % (format_id, bitrate),
'tbr': bitrate,
})
- if re.search('\d+k', h264_url):
+ if re.search(r'\d+k', h264_url):
http_url = h264_url
elif format_id == 'rtmp':
streamer = talk_info.get('streamer')
diff --git a/youtube_dl/extractor/telebruxelles.py b/youtube_dl/extractor/telebruxelles.py
index eefecc4..5886e9c 100644
--- a/youtube_dl/extractor/telebruxelles.py
+++ b/youtube_dl/extractor/telebruxelles.py
@@ -7,33 +7,30 @@ from .common import InfoExtractor
class TeleBruxellesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt|emission)/?(?P<id>[^/#?]+)'
_TESTS = [{
- 'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/',
- 'md5': '59439e568c9ee42fb77588b2096b214f',
+ 'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/',
+ 'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9',
'info_dict': {
- 'id': '11942',
- 'display_id': 'auditions-devant-parlement-francken-galant-tres-attendus',
- 'ext': 'flv',
- 'title': 'Parlement : Francken et Galant répondent aux interpellations de l’opposition',
- 'description': 're:Les auditions des ministres se poursuivent*'
- },
- 'params': {
- 'skip_download': 'requires rtmpdump'
+ 'id': '158856',
+ 'display_id': 'que-risque-lauteur-dune-fausse-alerte-a-la-bombe',
+ 'ext': 'mp4',
+ 'title': 'Que risque l’auteur d’une fausse alerte à la bombe ?',
+ 'description': 'md5:3cf8df235d44ebc5426373050840e466',
},
}, {
- 'url': 'http://www.telebruxelles.be/sport/basket-brussels-bat-mons-80-74/',
- 'md5': '181d3fbdcf20b909309e5aef5c6c6047',
+ 'url': 'http://bx1.be/sport/futsal-schaerbeek-sincline-5-3-a-thulin/',
+ 'md5': 'dfe07ecc9c153ceba8582ac912687675',
'info_dict': {
- 'id': '10091',
- 'display_id': 'basket-brussels-bat-mons-80-74',
- 'ext': 'flv',
- 'title': 'Basket : le Brussels bat Mons 80-74',
- 'description': 're:^Ils l\u2019on fait ! En basket, le B*',
- },
- 'params': {
- 'skip_download': 'requires rtmpdump'
+ 'id': '158433',
+ 'display_id': 'futsal-schaerbeek-sincline-5-3-a-thulin',
+ 'ext': 'mp4',
+ 'title': 'Futsal : Schaerbeek s’incline 5-3 à Thulin',
+ 'description': 'md5:fd013f1488d5e2dceb9cebe39e2d569b',
},
+ }, {
+ 'url': 'http://bx1.be/emission/bxenf1-gastronomie/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -50,13 +47,13 @@ class TeleBruxellesIE(InfoExtractor):
r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
webpage, 'RTMP url')
rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
+ formats = self._extract_wowza_formats(rtmp_url, article_id or display_id)
+ self._sort_formats(formats)
return {
'id': article_id or display_id,
'display_id': display_id,
'title': title,
'description': description,
- 'url': rtmp_url,
- 'ext': 'flv',
- 'rtmp_live': True # if rtmpdump is not called with "--live" argument, the download is blocked and can be completed
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py
index 58078c5..0f576c1 100644
--- a/youtube_dl/extractor/telegraaf.py
+++ b/youtube_dl/extractor/telegraaf.py
@@ -17,7 +17,7 @@ class TelegraafIE(InfoExtractor):
'ext': 'mp4',
'title': 'Tikibad ontruimd wegens brand',
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 33,
},
'params': {
diff --git a/youtube_dl/extractor/telemb.py b/youtube_dl/extractor/telemb.py
index 1bbd0e7..9bcac4e 100644
--- a/youtube_dl/extractor/telemb.py
+++ b/youtube_dl/extractor/telemb.py
@@ -19,7 +19,7 @@ class TeleMBIE(InfoExtractor):
'ext': 'mp4',
'title': 'Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages',
'description': 'md5:bc5225f47b17c309761c856ad4776265',
- 'thumbnail': 're:^http://.*\.(?:jpg|png)$',
+ 'thumbnail': r're:^http://.*\.(?:jpg|png)$',
}
},
{
@@ -32,7 +32,7 @@ class TeleMBIE(InfoExtractor):
'ext': 'mp4',
'title': 'Havré - Incendie mortel - Les reportages',
'description': 'md5:5e54cb449acb029c2b7734e2d946bd4a',
- 'thumbnail': 're:^http://.*\.(?:jpg|png)$',
+ 'thumbnail': r're:^http://.*\.(?:jpg|png)$',
}
},
]
diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dl/extractor/telewebion.py
index 7786b28..1207b1a 100644
--- a/youtube_dl/extractor/telewebion.py
+++ b/youtube_dl/extractor/telewebion.py
@@ -13,7 +13,7 @@ class TelewebionIE(InfoExtractor):
'id': '1263668',
'ext': 'mp4',
'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'view_count': int,
},
'params': {
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index cfbf7f4..192d8fa 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -33,7 +33,9 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
class ThePlatformBaseIE(OnceIE):
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
- meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'})
+ meta = self._download_xml(
+ smil_url, video_id, note=note, query={'format': 'SMIL'},
+ headers=self.geo_verification_headers())
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
if error_element is not None and error_element.attrib['src'].startswith(
'http://link.theplatform.com/s/errorFiles/Unavailable.'):
@@ -154,7 +156,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
'title': 'iPhone Siri’s sassy response to a math question has people talking',
'description': 'md5:a565d1deadd5086f3331d57298ec6333',
'duration': 83.0,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1435752600,
'upload_date': '20150701',
'uploader': 'NBCU-NEWS',
@@ -295,7 +297,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
'ext': 'mp4',
'title': 'The Biden factor: will Joe run in 2016?',
'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140208',
'timestamp': 1391824260,
'duration': 467.0,
diff --git a/youtube_dl/extractor/thisamericanlife.py b/youtube_dl/extractor/thisamericanlife.py
index 36493a5..91e45f2 100644
--- a/youtube_dl/extractor/thisamericanlife.py
+++ b/youtube_dl/extractor/thisamericanlife.py
@@ -13,7 +13,7 @@ class ThisAmericanLifeIE(InfoExtractor):
'ext': 'm4a',
'title': '487: Harper High School, Part One',
'description': 'md5:ee40bdf3fb96174a9027f76dbecea655',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
'url': 'http://www.thisamericanlife.org/play_full.php?play=487',
diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py
index 7629f0d..197258d 100644
--- a/youtube_dl/extractor/thisoldhouse.py
+++ b/youtube_dl/extractor/thisoldhouse.py
@@ -5,10 +5,10 @@ from .common import InfoExtractor
class ThisOldHouseIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
- 'md5': '568acf9ca25a639f0c4ff905826b662f',
+ 'md5': '946f05bbaa12a33f9ae35580d2dfcfe3',
'info_dict': {
'id': '2REGtUDQ',
'ext': 'mp4',
@@ -20,6 +20,9 @@ class ThisOldHouseIE(InfoExtractor):
}, {
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
'only_matching': True,
+ }, {
+ 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/tinypic.py b/youtube_dl/extractor/tinypic.py
index c43cace..bc2def5 100644
--- a/youtube_dl/extractor/tinypic.py
+++ b/youtube_dl/extractor/tinypic.py
@@ -34,7 +34,7 @@ class TinyPicIE(InfoExtractor):
webpage = self._download_webpage(url, video_id, 'Downloading page')
mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
- '\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
+ r'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
if mobj is None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py
index 77d56b8..7e6ec34 100644
--- a/youtube_dl/extractor/tnaflix.py
+++ b/youtube_dl/extractor/tnaflix.py
@@ -91,7 +91,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
formats = []
def extract_video_url(vl):
- return re.sub('speed=\d+', 'speed=', unescapeHTML(vl.text))
+ return re.sub(r'speed=\d+', 'speed=', unescapeHTML(vl.text))
video_link = cfg_xml.find('./videoLink')
if video_link is not None:
@@ -174,7 +174,7 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
'display_id': '6538',
'ext': 'mp4',
'title': 'Educational xxx video',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'age_limit': 18,
},
'params': {
@@ -209,7 +209,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
'display_id': 'Carmella-Decesare-striptease',
'ext': 'mp4',
'title': 'Carmella Decesare - striptease',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'duration': 91,
'age_limit': 18,
'categories': ['Porn Stars'],
@@ -224,7 +224,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
'ext': 'mp4',
'title': 'Educational xxx video',
'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'duration': 164,
'age_limit': 18,
'uploader': 'bobwhite39',
@@ -250,7 +250,7 @@ class EMPFlixIE(TNAFlixNetworkBaseIE):
'ext': 'mp4',
'title': 'Amateur Finger Fuck',
'description': 'Amateur solo finger fucking.',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'duration': 83,
'age_limit': 18,
'uploader': 'cwbike',
@@ -280,7 +280,7 @@ class MovieFapIE(TNAFlixNetworkBaseIE):
'ext': 'mp4',
'title': 'Experienced MILF Amazing Handjob',
'description': 'Experienced MILF giving an Amazing Handjob',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'age_limit': 18,
'uploader': 'darvinfred06',
'view_count': int,
@@ -298,7 +298,7 @@ class MovieFapIE(TNAFlixNetworkBaseIE):
'ext': 'flv',
'title': 'Jeune Couple Russe',
'description': 'Amateur',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'age_limit': 18,
'uploader': 'whiskeyjar',
'view_count': int,
diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py
index bb8b8e2..2aae55e 100644
--- a/youtube_dl/extractor/tudou.py
+++ b/youtube_dl/extractor/tudou.py
@@ -23,7 +23,7 @@ class TudouIE(InfoExtractor):
'id': '159448201',
'ext': 'f4v',
'title': '卡马乔国足开大脚长传冲吊集锦',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1372113489000,
'description': '卡马乔卡家军,开大脚先进战术不完全集锦!',
'duration': 289.04,
@@ -36,7 +36,7 @@ class TudouIE(InfoExtractor):
'id': '117049447',
'ext': 'f4v',
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1349207518000,
'description': 'md5:294612423894260f2dcd5c6c04fe248b',
'duration': 5478.33,
diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py
index ebe411e..7861435 100644
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@@ -17,7 +17,7 @@ class TumblrIE(InfoExtractor):
'ext': 'mp4',
'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...',
'description': 'md5:37db8211e40b50c7c44e95da14f630b7',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
}
}, {
'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all',
@@ -27,7 +27,7 @@ class TumblrIE(InfoExtractor):
'ext': 'mp4',
'title': '5SOS STRUM ;]',
'description': 'md5:dba62ac8639482759c8eb10ce474586a',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
}
}, {
'url': 'http://hdvideotest.tumblr.com/post/130323439814/test-description-for-my-hd-video',
@@ -37,7 +37,7 @@ class TumblrIE(InfoExtractor):
'ext': 'mp4',
'title': 'HD Video Testing \u2014 Test description for my HD video',
'description': 'md5:97cc3ab5fcd27ee4af6356701541319c',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
},
'params': {
'format': 'hd',
@@ -92,7 +92,7 @@ class TumblrIE(InfoExtractor):
'title': 'Video by victoriassecret',
'description': 'Invisibility or flight…which superpower would YOU choose? #VSFashionShow #ThisOrThat',
'uploader_id': 'victoriassecret',
- 'thumbnail': 're:^https?://.*\.jpg'
+ 'thumbnail': r're:^https?://.*\.jpg'
},
'add_ie': ['Instagram'],
}]
diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py
index ae4cfae..7e51de8 100644
--- a/youtube_dl/extractor/tunein.py
+++ b/youtube_dl/extractor/tunein.py
@@ -11,6 +11,12 @@ from ..compat import compat_urlparse
class TuneInBaseIE(InfoExtractor):
_API_BASE_URL = 'http://tunein.com/tuner/tune/'
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src=["\'](?P<url>(?:https?://)?tunein\.com/embed/player/[pst]\d+)',
+ webpage)
+
def _real_extract(self, url):
content_id = self._match_id(url)
@@ -69,82 +75,83 @@ class TuneInClipIE(TuneInBaseIE):
_VALID_URL = r'https?://(?:www\.)?tunein\.com/station/.*?audioClipId\=(?P<id>\d+)'
_API_URL_QUERY = '?tuneType=AudioClip&audioclipId=%s'
- _TESTS = [
- {
- 'url': 'http://tunein.com/station/?stationId=246119&audioClipId=816',
- 'md5': '99f00d772db70efc804385c6b47f4e77',
- 'info_dict': {
- 'id': '816',
- 'title': '32m',
- 'ext': 'mp3',
- },
+ _TESTS = [{
+ 'url': 'http://tunein.com/station/?stationId=246119&audioClipId=816',
+ 'md5': '99f00d772db70efc804385c6b47f4e77',
+ 'info_dict': {
+ 'id': '816',
+ 'title': '32m',
+ 'ext': 'mp3',
},
- ]
+ }]
class TuneInStationIE(TuneInBaseIE):
IE_NAME = 'tunein:station'
- _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-s|station/.*?StationId\=)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-s|station/.*?StationId=|embed/player/s)(?P<id>\d+)'
_API_URL_QUERY = '?tuneType=Station&stationId=%s'
@classmethod
def suitable(cls, url):
return False if TuneInClipIE.suitable(url) else super(TuneInStationIE, cls).suitable(url)
- _TESTS = [
- {
- 'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
- 'info_dict': {
- 'id': '34682',
- 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
- 'ext': 'mp3',
- 'location': 'Tacoma, WA',
- },
- 'params': {
- 'skip_download': True, # live stream
- },
+ _TESTS = [{
+ 'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
+ 'info_dict': {
+ 'id': '34682',
+ 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
+ 'ext': 'mp3',
+ 'location': 'Tacoma, WA',
+ },
+ 'params': {
+ 'skip_download': True, # live stream
},
- ]
+ }, {
+ 'url': 'http://tunein.com/embed/player/s6404/',
+ 'only_matching': True,
+ }]
class TuneInProgramIE(TuneInBaseIE):
IE_NAME = 'tunein:program'
- _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-p|program/.*?ProgramId\=)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-p|program/.*?ProgramId=|embed/player/p)(?P<id>\d+)'
_API_URL_QUERY = '?tuneType=Program&programId=%s'
- _TESTS = [
- {
- 'url': 'http://tunein.com/radio/Jazz-24-p2506/',
- 'info_dict': {
- 'id': '2506',
- 'title': 'Jazz 24 on 91.3 WUKY-HD3',
- 'ext': 'mp3',
- 'location': 'Lexington, KY',
- },
- 'params': {
- 'skip_download': True, # live stream
- },
+ _TESTS = [{
+ 'url': 'http://tunein.com/radio/Jazz-24-p2506/',
+ 'info_dict': {
+ 'id': '2506',
+ 'title': 'Jazz 24 on 91.3 WUKY-HD3',
+ 'ext': 'mp3',
+ 'location': 'Lexington, KY',
},
- ]
+ 'params': {
+ 'skip_download': True, # live stream
+ },
+ }, {
+ 'url': 'http://tunein.com/embed/player/p191660/',
+ 'only_matching': True,
+ }]
class TuneInTopicIE(TuneInBaseIE):
IE_NAME = 'tunein:topic'
- _VALID_URL = r'https?://(?:www\.)?tunein\.com/topic/.*?TopicId\=(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:topic/.*?TopicId=|embed/player/t)(?P<id>\d+)'
_API_URL_QUERY = '?tuneType=Topic&topicId=%s'
- _TESTS = [
- {
- 'url': 'http://tunein.com/topic/?TopicId=101830576',
- 'md5': 'c31a39e6f988d188252eae7af0ef09c9',
- 'info_dict': {
- 'id': '101830576',
- 'title': 'Votez pour moi du 29 octobre 2015 (29/10/15)',
- 'ext': 'mp3',
- 'location': 'Belgium',
- },
+ _TESTS = [{
+ 'url': 'http://tunein.com/topic/?TopicId=101830576',
+ 'md5': 'c31a39e6f988d188252eae7af0ef09c9',
+ 'info_dict': {
+ 'id': '101830576',
+ 'title': 'Votez pour moi du 29 octobre 2015 (29/10/15)',
+ 'ext': 'mp3',
+ 'location': 'Belgium',
},
- ]
+ }, {
+ 'url': 'http://tunein.com/embed/player/t101830576/',
+ 'only_matching': True,
+ }]
class TuneInShortenerIE(InfoExtractor):
diff --git a/youtube_dl/extractor/turbo.py b/youtube_dl/extractor/turbo.py
index 7ae63a4..25aa9c5 100644
--- a/youtube_dl/extractor/turbo.py
+++ b/youtube_dl/extractor/turbo.py
@@ -24,7 +24,7 @@ class TurboIE(InfoExtractor):
'duration': 3715,
'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
'description': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}
diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py
index 57ffedb..1c0be9f 100644
--- a/youtube_dl/extractor/turner.py
+++ b/youtube_dl/extractor/turner.py
@@ -100,9 +100,13 @@ class TurnerBaseIE(AdobePassIE):
formats.extend(self._extract_smil_formats(
video_url, video_id, fatal=False))
elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
+ m3u8_formats = self._extract_m3u8_formats(
video_url, video_id, 'mp4',
- m3u8_id=format_id or 'hls', fatal=False))
+ m3u8_id=format_id or 'hls', fatal=False)
+ if '/secure/' in video_url and '?hdnea=' in video_url:
+ for f in m3u8_formats:
+ f['_seekable'] = False
+ formats.extend(m3u8_formats)
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
update_url_query(video_url, {'hdcore': '3.7.0'}),
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py
index bd28267..d5071e8 100644
--- a/youtube_dl/extractor/tv2.py
+++ b/youtube_dl/extractor/tv2.py
@@ -126,7 +126,7 @@ class TV2ArticleIE(InfoExtractor):
if not assets:
# New embed pattern
- for v in re.findall('TV2ContentboxVideo\(({.+?})\)', webpage):
+ for v in re.findall(r'TV2ContentboxVideo\(({.+?})\)', webpage):
video = self._parse_json(
v, playlist_id, transform_source=js_to_json, fatal=False)
if not video:
diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py
index 5d2d8f1..ad79db9 100644
--- a/youtube_dl/extractor/tv4.py
+++ b/youtube_dl/extractor/tv4.py
@@ -4,11 +4,10 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- ExtractorError,
int_or_none,
parse_iso8601,
try_get,
- update_url_query,
+ determine_ext,
)
@@ -28,24 +27,24 @@ class TV4IE(InfoExtractor):
_TESTS = [
{
'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
- 'md5': '909d6454b87b10a25aa04c4bdd416a9b',
+ 'md5': 'cb837212f342d77cec06e6dad190e96d',
'info_dict': {
'id': '2491650',
'ext': 'mp4',
'title': 'Kalla Fakta 5 (english subtitles)',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': int,
'upload_date': '20131125',
},
},
{
'url': 'http://www.tv4play.se/iframe/video/3054113',
- 'md5': '77f851c55139ffe0ebd41b6a5552489b',
+ 'md5': 'cb837212f342d77cec06e6dad190e96d',
'info_dict': {
'id': '3054113',
'ext': 'mp4',
'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
'timestamp': int,
'upload_date': '20150130',
@@ -75,11 +74,10 @@ class TV4IE(InfoExtractor):
# If is_geo_restricted is true, it doesn't necessarily mean we can't download it
if info.get('is_geo_restricted'):
self.report_warning('This content might not be available in your country due to licensing restrictions.')
- if info.get('requires_subscription'):
- raise ExtractorError('This content requires subscription.', expected=True)
title = info['title']
+ subtitles = {}
formats = []
# http formats are linked with unresolvable host
for kind in ('hls', ''):
@@ -87,26 +85,41 @@ class TV4IE(InfoExtractor):
'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id,
video_id, 'Downloading sources JSON', query={
'protocol': kind,
- 'videoFormat': 'MP4+WEBVTTS+WEBVTT',
+ 'videoFormat': 'MP4+WEBVTT',
})
- item = try_get(data, lambda x: x['playback']['items']['item'], dict)
- manifest_url = item.get('url')
- if not isinstance(manifest_url, compat_str):
+ items = try_get(data, lambda x: x['playback']['items']['item'])
+ if not items:
continue
- if kind == 'hls':
- formats.extend(self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id=kind, fatal=False))
- else:
- formats.extend(self._extract_f4m_formats(
- update_url_query(manifest_url, {'hdcore': '3.8.0'}),
- video_id, f4m_id='hds', fatal=False))
+ if isinstance(items, dict):
+ items = [items]
+ for item in items:
+ manifest_url = item.get('url')
+ if not isinstance(manifest_url, compat_str):
+ continue
+ ext = determine_ext(manifest_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=kind, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_akamai_formats(
+ manifest_url, video_id, {
+ 'hls': 'tv4play-i.akamaihd.net',
+ }))
+ elif ext == 'webvtt':
+ subtitles = self._merge_subtitles(
+ subtitles, {
+ 'sv': [{
+ 'url': manifest_url,
+ 'ext': 'vtt',
+ }]})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
+ 'subtitles': subtitles,
'description': info.get('description'),
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
'duration': int_or_none(info.get('duration')),
diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py
new file mode 100644
index 0000000..3ced098
--- /dev/null
+++ b/youtube_dl/extractor/tva.py
@@ -0,0 +1,54 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ smuggle_url,
+)
+
+
+class TVAIE(InfoExtractor):
+ _VALID_URL = r'https?://videos\.tva\.ca/episode/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://videos.tva.ca/episode/85538',
+ 'info_dict': {
+ 'id': '85538',
+ 'ext': 'mp4',
+ 'title': 'Épisode du 25 janvier 2017',
+ 'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98',
+ 'upload_date': '20170126',
+ 'timestamp': 1485442329,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ "https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id,
+ video_id, query={
+ '$expand': 'Metadata,CustomId',
+ '$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName',
+ '$format': 'json',
+ })
+ metadata = video_data.get('Metadata', {})
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': video_data['Title'],
+ 'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}),
+ 'description': video_data.get('LongDescription') or video_data.get('ShortDescription'),
+ 'series': video_data.get('ShowName'),
+ 'episode': metadata.get('EpisodeTitle'),
+ 'episode_number': int_or_none(metadata.get('EpisodeNumber')),
+ 'categories': video_data.get('Categories'),
+ 'average_rating': video_data.get('AverageUserRating'),
+ 'timestamp': parse_iso8601(video_data.get('CreatedDate')),
+ 'ie_key': 'Ooyala',
+ }
diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py
index 4065354..008f64c 100644
--- a/youtube_dl/extractor/tvc.py
+++ b/youtube_dl/extractor/tvc.py
@@ -19,7 +19,7 @@ class TVCIE(InfoExtractor):
'id': '74622',
'ext': 'mp4',
'title': 'События. "События". Эфир от 22.05.2015 14:30',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1122,
},
}
@@ -72,7 +72,7 @@ class TVCArticleIE(InfoExtractor):
'ext': 'mp4',
'title': 'События. "События". Эфир от 22.05.2015 14:30',
'description': 'md5:ad7aa7db22903f983e687b8a3e98c6dd',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1122,
},
}, {
@@ -82,7 +82,7 @@ class TVCArticleIE(InfoExtractor):
'ext': 'mp4',
'title': 'Эксперты: в столице встал вопрос о максимально безопасных остановках',
'description': 'md5:f2098f71e21f309e89f69b525fd9846e',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 278,
},
}, {
@@ -92,7 +92,7 @@ class TVCArticleIE(InfoExtractor):
'ext': 'mp4',
'title': 'Ещё не поздно. Эфир от 03.08.2013',
'description': 'md5:51fae9f3f8cfe67abce014e428e5b027',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 3316,
},
}]
diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py
index 7a9386c..2b10d9b 100644
--- a/youtube_dl/extractor/tweakers.py
+++ b/youtube_dl/extractor/tweakers.py
@@ -18,7 +18,7 @@ class TweakersIE(InfoExtractor):
'ext': 'mp4',
'title': 'New Nintendo 3DS XL - Op alle fronten beter',
'description': 'md5:3789b21fed9c0219e9bcaacd43fab280',
- 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
'duration': 386,
'uploader_id': 's7JeEm',
}
diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py
index af92b71..a983ebf 100644
--- a/youtube_dl/extractor/twentyfourvideo.py
+++ b/youtube_dl/extractor/twentyfourvideo.py
@@ -12,7 +12,7 @@ from ..utils import (
class TwentyFourVideoIE(InfoExtractor):
IE_NAME = '24video'
- _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.24video.net/video/view/1044982',
@@ -22,7 +22,7 @@ class TwentyFourVideoIE(InfoExtractor):
'ext': 'mp4',
'title': 'Эротика каменного века',
'description': 'Как смотрели порно в каменном веке.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'SUPERTELO',
'duration': 31,
'timestamp': 1275937857,
@@ -43,7 +43,7 @@ class TwentyFourVideoIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(
- 'http://www.24video.net/video/view/%s' % video_id, video_id)
+ 'http://www.24video.sex/video/view/%s' % video_id, video_id)
title = self._og_search_title(webpage)
description = self._html_search_regex(
@@ -69,11 +69,11 @@ class TwentyFourVideoIE(InfoExtractor):
# Sets some cookies
self._download_xml(
- r'http://www.24video.net/video/xml/%s?mode=init' % video_id,
+ r'http://www.24video.sex/video/xml/%s?mode=init' % video_id,
video_id, 'Downloading init XML')
video_xml = self._download_xml(
- 'http://www.24video.net/video/xml/%s?mode=play' % video_id,
+ 'http://www.24video.sex/video/xml/%s?mode=play' % video_id,
video_id, 'Downloading video XML')
video = xpath_element(video_xml, './/video', 'video', fatal=True)
diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py
index b721ecb..4fd1aa4 100644
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dl/extractor/twentymin.py
@@ -4,91 +4,88 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import remove_end
+from ..utils import (
+ int_or_none,
+ try_get,
+)
class TwentyMinutenIE(InfoExtractor):
IE_NAME = '20min'
- _VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P<id>\d+)|(?:[^/]+/)*(?P<display_id>[^/#?]+))'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?20min\.ch/
+ (?:
+ videotv/*\?.*?\bvid=|
+ videoplayer/videoplayer\.html\?.*?\bvideoId@
+ )
+ (?P<id>\d+)
+ '''
_TESTS = [{
- # regular video
'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
- 'md5': 'b52d6bc6ea6398e6a38f12cfd418149c',
+ 'md5': 'e7264320db31eed8c38364150c12496e',
'info_dict': {
'id': '469148',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': '85 000 Franken für 15 perfekte Minuten',
- 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)',
- 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg'
- }
- }, {
- # news article with video
- 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469',
- 'md5': 'cd4cbb99b94130cff423e967cd275e5e',
- 'info_dict': {
- 'id': '469408',
- 'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469',
- 'ext': 'flv',
- 'title': '«Wir müssen mutig nach vorne schauen»',
- 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
- 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
+ 'thumbnail': r're:https?://.*\.jpg$',
},
- 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.',
}, {
- # YouTube embed
- 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184',
- 'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f',
+ 'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629',
'info_dict': {
- 'id': 'ivM7A7SpDOs',
+ 'id': '523629',
'ext': 'mp4',
- 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016',
- 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a',
- 'upload_date': '20160424',
- 'uploader': 'RTVCM Castilla-La Mancha',
- 'uploader_id': 'RTVCM',
+ 'title': 'So kommen Sie bei Eis und Schnee sicher an',
+ 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
},
- 'add_ie': ['Youtube'],
}, {
'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
'only_matching': True,
- }, {
- 'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411',
- 'only_matching': True,
}]
+ @staticmethod
+ def _extract_urls(webpage):
+ return [m.group('url') for m in re.finditer(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
+ webpage)]
+
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id') or video_id
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://api.20min.ch/video/%s/show' % video_id,
+ video_id)['content']
- webpage = self._download_webpage(url, display_id)
+ title = video['title']
- youtube_url = self._html_search_regex(
- r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
- webpage, 'YouTube embed URL', default=None)
- if youtube_url is not None:
- return self.url_result(youtube_url, 'Youtube')
+ formats = [{
+ 'format_id': format_id,
+ 'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p),
+ 'quality': quality,
+ } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])]
+ self._sort_formats(formats)
- title = self._html_search_regex(
- r'<h1>.*?<span>(.+?)</span></h1>',
- webpage, 'title', default=None)
- if not title:
- title = remove_end(re.sub(
- r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News')
+ description = video.get('lead')
+ thumbnail = video.get('thumbnail')
- if not video_id:
- video_id = self._search_regex(
- r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id')
+ def extract_count(kind):
+ return try_get(
+ video,
+ lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind]))
- description = self._html_search_meta(
- 'description', webpage, 'description')
- thumbnail = self._og_search_thumbnail(webpage)
+ like_count = extract_count('up')
+ dislike_count = extract_count('down')
return {
'id': video_id,
- 'display_id': display_id,
- 'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index 77414a2..bbba394 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -22,6 +22,7 @@ from ..utils import (
orderedSet,
parse_duration,
parse_iso8601,
+ update_url_query,
urlencode_postdata,
)
@@ -205,7 +206,14 @@ class TwitchChapterIE(TwitchItemBaseIE):
class TwitchVodIE(TwitchItemBaseIE):
IE_NAME = 'twitch:vod'
- _VALID_URL = r'%s/[^/]+/v/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?twitch\.tv/(?:[^/]+/v|videos)/|
+ player\.twitch\.tv/\?.*?\bvideo=v
+ )
+ (?P<id>\d+)
+ '''
_ITEM_TYPE = 'vod'
_ITEM_SHORTCUT = 'v'
@@ -215,7 +223,7 @@ class TwitchVodIE(TwitchItemBaseIE):
'id': 'v6528877',
'ext': 'mp4',
'title': 'LCK Summer Split - Week 6 Day 1',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 17208,
'timestamp': 1435131709,
'upload_date': '20150624',
@@ -235,7 +243,7 @@ class TwitchVodIE(TwitchItemBaseIE):
'id': 'v11230755',
'ext': 'mp4',
'title': 'Untitled Broadcast',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1638,
'timestamp': 1439746708,
'upload_date': '20150816',
@@ -248,6 +256,12 @@ class TwitchVodIE(TwitchItemBaseIE):
'skip_download': True,
},
'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/videos/6528877',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -279,6 +293,18 @@ class TwitchVodIE(TwitchItemBaseIE):
if 't' in query:
info['start_time'] = parse_duration(query['t'][0])
+ if info.get('timestamp') is not None:
+ info['subtitles'] = {
+ 'rechat': [{
+ 'url': update_url_query(
+ 'https://rechat.twitch.tv/rechat-messages', {
+ 'video_id': 'v%s' % item_id,
+ 'start': info['timestamp'],
+ }),
+ 'ext': 'json',
+ }],
+ }
+
return info
@@ -300,7 +326,7 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
response = self._call_api(
self._PLAYLIST_PATH % (channel_id, offset, limit),
channel_id,
- 'Downloading %s videos JSON page %s'
+ 'Downloading %s JSON page %s'
% (self._PLAYLIST_TYPE, counter_override or counter))
page_entries = self._extract_playlist_page(response)
if not page_entries:
@@ -350,25 +376,85 @@ class TwitchProfileIE(TwitchPlaylistBaseIE):
}
-class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
- IE_NAME = 'twitch:past_broadcasts'
- _VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
- _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcasts=true'
+class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
+ _VALID_URL_VIDEOS_BASE = r'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE
+ _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type='
+
+
+class TwitchAllVideosIE(TwitchVideosBaseIE):
+ IE_NAME = 'twitch:videos:all'
+ _VALID_URL = r'%s/all' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
+ _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight'
+ _PLAYLIST_TYPE = 'all videos'
+
+ _TEST = {
+ 'url': 'https://www.twitch.tv/spamfish/videos/all',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'Spamfish',
+ },
+ 'playlist_mincount': 869,
+ }
+
+
+class TwitchUploadsIE(TwitchVideosBaseIE):
+ IE_NAME = 'twitch:videos:uploads'
+ _VALID_URL = r'%s/uploads' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
+ _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload'
+ _PLAYLIST_TYPE = 'uploads'
+
+ _TEST = {
+ 'url': 'https://www.twitch.tv/spamfish/videos/uploads',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'Spamfish',
+ },
+ 'playlist_mincount': 0,
+ }
+
+
+class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
+ IE_NAME = 'twitch:videos:past-broadcasts'
+ _VALID_URL = r'%s/past-broadcasts' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
+ _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive'
_PLAYLIST_TYPE = 'past broadcasts'
_TEST = {
- 'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
+ 'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'Spamfish',
+ },
+ 'playlist_mincount': 0,
+ }
+
+
+class TwitchHighlightsIE(TwitchVideosBaseIE):
+ IE_NAME = 'twitch:videos:highlights'
+ _VALID_URL = r'%s/highlights' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
+ _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight'
+ _PLAYLIST_TYPE = 'highlights'
+
+ _TEST = {
+ 'url': 'https://www.twitch.tv/spamfish/videos/highlights',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
- 'playlist_mincount': 54,
+ 'playlist_mincount': 805,
}
class TwitchStreamIE(TwitchBaseIE):
IE_NAME = 'twitch:stream'
- _VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?twitch\.tv/|
+ player\.twitch\.tv/\?.*?\bchannel=
+ )
+ (?P<id>[^/#?]+)
+ '''
_TESTS = [{
'url': 'http://www.twitch.tv/shroomztv',
@@ -392,8 +478,25 @@ class TwitchStreamIE(TwitchBaseIE):
}, {
'url': 'http://www.twitch.tv/miracle_doto#profile-0',
'only_matching': True,
+ }, {
+ 'url': 'https://player.twitch.tv/?channel=lotsofs',
+ 'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if any(ie.suitable(url) for ie in (
+ TwitchVideoIE,
+ TwitchChapterIE,
+ TwitchVodIE,
+ TwitchProfileIE,
+ TwitchAllVideosIE,
+ TwitchUploadsIE,
+ TwitchPastBroadcastsIE,
+ TwitchHighlightsIE))
+ else super(TwitchStreamIE, cls).suitable(url))
+
def _real_extract(self, url):
channel_id = self._match_id(url)
@@ -474,7 +577,7 @@ class TwitchClipsIE(InfoExtractor):
'id': 'AggressiveCobraPoooound',
'ext': 'mp4',
'title': 'EA Play 2016 Live from the Novo Theatre',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'creator': 'EA',
'uploader': 'stereotype_',
'uploader_id': 'stereotype_',
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index ac0b221..37e3bc4 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -34,7 +34,7 @@ class TwitterCardIE(TwitterBaseIE):
'id': '560070183650213889',
'ext': 'mp4',
'title': 'Twitter Card',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 30.033,
}
},
@@ -45,7 +45,7 @@ class TwitterCardIE(TwitterBaseIE):
'id': '623160978427936768',
'ext': 'mp4',
'title': 'Twitter Card',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 80.155,
},
},
@@ -82,7 +82,7 @@ class TwitterCardIE(TwitterBaseIE):
'id': '705235433198714880',
'ext': 'mp4',
'title': 'Twitter web player',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'https://twitter.com/i/videos/752274308186120192',
@@ -201,7 +201,7 @@ class TwitterIE(InfoExtractor):
'id': '643211948184596480',
'ext': 'mp4',
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple',
@@ -217,7 +217,7 @@ class TwitterIE(InfoExtractor):
'ext': 'mp4',
'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
- 'thumbnail': 're:^https?://.*\.png',
+ 'thumbnail': r're:^https?://.*\.png',
'uploader': 'Gifs',
'uploader_id': 'giphz',
},
@@ -257,7 +257,7 @@ class TwitterIE(InfoExtractor):
'ext': 'mp4',
'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'JG',
'uploader_id': 'jaydingeer',
},
diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py
index 57dd73a..daf45d0 100644
--- a/youtube_dl/extractor/udn.py
+++ b/youtube_dl/extractor/udn.py
@@ -23,7 +23,7 @@ class UDNEmbedIE(InfoExtractor):
'id': '300040',
'ext': 'mp4',
'title': '生物老師男變女 全校挺"做自己"',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
# m3u8 download
diff --git a/youtube_dl/extractor/uktvplay.py b/youtube_dl/extractor/uktvplay.py
new file mode 100644
index 0000000..2137502
--- /dev/null
+++ b/youtube_dl/extractor/uktvplay.py
@@ -0,0 +1,33 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class UKTVPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/.+?\?.*?\bvideo=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
+ 'md5': '',
+ 'info_dict': {
+ 'id': '2117008346001',
+ 'ext': 'mp4',
+ 'title': 'Pincers',
+ 'description': 'Pincers',
+ 'uploader_id': '1242911124001',
+ 'upload_date': '20130124',
+ 'timestamp': 1359049267,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download MPD manifest']
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % video_id,
+ 'BrightcoveNew', video_id)
diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py
index c27c643..e670830 100644
--- a/youtube_dl/extractor/uol.py
+++ b/youtube_dl/extractor/uol.py
@@ -84,12 +84,27 @@ class UOLIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- if not video_id.isdigit():
- embed_page = self._download_webpage('https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, video_id)
- video_id = self._search_regex(r'mediaId=(\d+)', embed_page, 'media id')
+ media_id = None
+
+ if video_id.isdigit():
+ media_id = video_id
+
+ if not media_id:
+ embed_page = self._download_webpage(
+ 'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id,
+ video_id, 'Downloading embed page', fatal=False)
+ if embed_page:
+ media_id = self._search_regex(
+ (r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'),
+ embed_page, 'media id', default=None)
+
+ if not media_id:
+ webpage = self._download_webpage(url, video_id)
+ media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id')
+
video_data = self._download_json(
- 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % video_id,
- video_id)['item']
+ 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id,
+ media_id)['item']
title = video_data['title']
query = {
@@ -118,7 +133,7 @@ class UOLIE(InfoExtractor):
tags.append(tag_description)
return {
- 'id': video_id,
+ 'id': media_id,
'title': title,
'description': clean_html(video_data.get('desMedia')),
'thumbnail': video_data.get('thumbnail'),
diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dl/extractor/uplynk.py
index 2cd22cf..f06bf5b 100644
--- a/youtube_dl/extractor/uplynk.py
+++ b/youtube_dl/extractor/uplynk.py
@@ -30,7 +30,9 @@ class UplynkIE(InfoExtractor):
def _extract_uplynk_info(self, uplynk_content_url):
path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups()
display_id = video_id or external_id
- formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4')
+ formats = self._extract_m3u8_formats(
+ 'http://content.uplynk.com/%s.m3u8' % path,
+ display_id, 'mp4', 'm3u8_native')
if session_id:
for f in formats:
f['extra_param_to_segment_url'] = 'pbs=' + session_id
diff --git a/youtube_dl/extractor/urort.py b/youtube_dl/extractor/urort.py
index 8872cfc..8f6edab 100644
--- a/youtube_dl/extractor/urort.py
+++ b/youtube_dl/extractor/urort.py
@@ -21,7 +21,7 @@ class UrortIE(InfoExtractor):
'id': '33124-24',
'ext': 'mp3',
'title': 'The Bomb',
- 'thumbnail': 're:^https?://.+\.jpg',
+ 'thumbnail': r're:^https?://.+\.jpg',
'uploader': 'Gerilja',
'uploader_id': 'Gerilja',
'upload_date': '20100323',
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py
index 0c06bf3..5737d4d 100644
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -69,6 +69,13 @@ class UstreamIE(InfoExtractor):
},
}]
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
+ if mobj is not None:
+ return mobj.group('url')
+
def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None):
def num_to_hex(n):
return hex(n)[2:]
diff --git a/youtube_dl/extractor/ustudio.py b/youtube_dl/extractor/ustudio.py
index 3484a20..56509be 100644
--- a/youtube_dl/extractor/ustudio.py
+++ b/youtube_dl/extractor/ustudio.py
@@ -22,7 +22,7 @@ class UstudioIE(InfoExtractor):
'ext': 'mp4',
'title': 'San Francisco: Golden Gate Bridge',
'description': 'md5:23925500697f2c6d4830e387ba51a9be',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20111107',
'uploader': 'Tony Farley',
}
diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py
index 8469837..f474ed7 100644
--- a/youtube_dl/extractor/varzesh3.py
+++ b/youtube_dl/extractor/varzesh3.py
@@ -22,7 +22,7 @@ class Varzesh3IE(InfoExtractor):
'ext': 'mp4',
'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا',
'description': 'فصل ۲۰۱۵-۲۰۱۴',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'HTTP 404 Error',
}, {
@@ -67,7 +67,7 @@ class Varzesh3IE(InfoExtractor):
webpage, display_id, default=None)
if video_id is None:
video_id = self._search_regex(
- 'var\s+VideoId\s*=\s*(\d+);', webpage, 'video id',
+ r'var\s+VideoId\s*=\s*(\d+);', webpage, 'video id',
default=display_id)
return {
diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index a1e0851..bef6394 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -4,11 +4,22 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import urlencode_postdata
+from ..utils import ExtractorError
class Vbox7IE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vbox7\.com/(?:play:|emb/external\.php\?.*?\bvid=)(?P<id>[\da-fA-F]+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:[^/]+\.)?vbox7\.com/
+ (?:
+ play:|
+ (?:
+ emb/external\.php|
+ player/ext\.swf
+ )\?.*?\bvid=
+ )
+ (?P<id>[\da-fA-F]+)
+ '''
_TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c',
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
@@ -16,6 +27,14 @@ class Vbox7IE(InfoExtractor):
'id': '0946fff23c',
'ext': 'mp4',
'title': 'Борисов: Притеснен съм за бъдещето на България',
+ 'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1470982814,
+ 'upload_date': '20160812',
+ 'uploader': 'zdraveibulgaria',
+ },
+ 'params': {
+ 'proxy': '127.0.0.1:8118',
},
}, {
'url': 'http://vbox7.com/play:249bb972c2',
@@ -29,12 +48,15 @@ class Vbox7IE(InfoExtractor):
}, {
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
'only_matching': True,
+ }, {
+ 'url': 'http://i49.vbox7.com/player/ext.swf?vid=0946fff23c&autoplay=1',
+ 'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
- '<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)',
+ r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)',
webpage)
if mobj:
return mobj.group('url')
@@ -42,33 +64,41 @@ class Vbox7IE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(
- 'http://vbox7.com/play:%s' % video_id, video_id)
-
- title = self._html_search_regex(
- r'<title>(.+?)</title>', webpage, 'title').split('/')[0].strip()
+ response = self._download_json(
+ 'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
+ video_id)
- video_url = self._search_regex(
- r'src\s*:\s*(["\'])(?P<url>.+?.mp4.*?)\1',
- webpage, 'video url', default=None, group='url')
+ if 'error' in response:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
- thumbnail_url = self._og_search_thumbnail(webpage)
+ video = response['options']
- if not video_url:
- info_response = self._download_webpage(
- 'http://vbox7.com/play/magare.do', video_id,
- 'Downloading info webpage',
- data=urlencode_postdata({'as3': '1', 'vid': video_id}),
- headers={'Content-Type': 'application/x-www-form-urlencoded'})
- final_url, thumbnail_url = map(
- lambda x: x.split('=')[1], info_response.split('&'))
+ title = video['title']
+ video_url = video['src']
if '/na.mp4' in video_url:
self.raise_geo_restricted()
- return {
+ uploader = video.get('uploader')
+
+ webpage = self._download_webpage(
+ 'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
+
+ info = {}
+
+ if webpage:
+ info = self._search_json_ld(
+ webpage.replace('"/*@context"', '"@context"'), video_id,
+ fatal=False)
+
+ info.update({
'id': video_id,
- 'url': self._proto_relative_url(video_url, 'http:'),
'title': title,
- 'thumbnail': thumbnail_url,
- }
+ 'url': video_url,
+ 'uploader': uploader,
+ 'thumbnail': self._proto_relative_url(
+ info.get('thumbnail') or self._og_search_thumbnail(webpage),
+ 'http:'),
+ })
+ return info
diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py
index 6b9c227..80a643d 100644
--- a/youtube_dl/extractor/vessel.py
+++ b/youtube_dl/extractor/vessel.py
@@ -24,7 +24,7 @@ class VesselIE(InfoExtractor):
'id': 'HDN7G5UMs',
'ext': 'mp4',
'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20150317',
'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
'timestamp': int,
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index d82261e..c4e37f6 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -4,9 +4,9 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_etree_fromstring,
compat_str,
compat_urlparse,
+ compat_HTTPError,
)
from ..utils import (
ExtractorError,
@@ -140,21 +140,6 @@ class VevoIE(VevoBaseIE):
'url': 'http://www.vevo.com/watch/INS171400764',
'only_matching': True,
}]
- _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com'
- _SOURCE_TYPES = {
- 0: 'youtube',
- 1: 'brightcove',
- 2: 'http',
- 3: 'hls_ios',
- 4: 'hls',
- 5: 'smil', # http
- 7: 'f4m_cc',
- 8: 'f4m_ak',
- 9: 'f4m_l3',
- 10: 'ism',
- 13: 'smil', # rtmp
- 18: 'dash',
- }
_VERSIONS = {
0: 'youtube', # only in AuthenticateVideo videoVersions
1: 'level3',
@@ -163,41 +148,6 @@ class VevoIE(VevoBaseIE):
4: 'amazon',
}
- def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
- formats = []
- els = smil.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
- for el in els:
- src = el.attrib['src']
- m = re.match(r'''(?xi)
- (?P<ext>[a-z0-9]+):
- (?P<path>
- [/a-z0-9]+ # The directory and main part of the URL
- _(?P<tbr>[0-9]+)k
- _(?P<width>[0-9]+)x(?P<height>[0-9]+)
- _(?P<vcodec>[a-z0-9]+)
- _(?P<vbr>[0-9]+)
- _(?P<acodec>[a-z0-9]+)
- _(?P<abr>[0-9]+)
- \.[a-z0-9]+ # File extension
- )''', src)
- if not m:
- continue
-
- format_url = self._SMIL_BASE_URL + m.group('path')
- formats.append({
- 'url': format_url,
- 'format_id': 'smil_' + m.group('tbr'),
- 'vcodec': m.group('vcodec'),
- 'acodec': m.group('acodec'),
- 'tbr': int(m.group('tbr')),
- 'vbr': int(m.group('vbr')),
- 'abr': int(m.group('abr')),
- 'ext': m.group('ext'),
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- })
- return formats
-
def _initialize_api(self, video_id):
req = sanitized_Request(
'http://www.vevo.com/auth', data=b'')
@@ -206,7 +156,7 @@ class VevoIE(VevoBaseIE):
note='Retrieving oauth token',
errnote='Unable to retrieve oauth token')
- if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
+ if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
self.raise_geo_restricted(
'%s said: This page is currently unavailable in your region' % self.IE_NAME)
@@ -214,148 +164,91 @@ class VevoIE(VevoBaseIE):
self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
def _call_api(self, path, *args, **kwargs):
- return self._download_json(self._api_url_template % path, *args, **kwargs)
+ try:
+ data = self._download_json(self._api_url_template % path, *args, **kwargs)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ errors = self._parse_json(e.cause.read().decode(), None)['errors']
+ error_message = ', '.join([error['message'] for error in errors])
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
+ raise
+ return data
def _real_extract(self, url):
video_id = self._match_id(url)
- json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
- response = self._download_json(
- json_url, video_id, 'Downloading video info',
- 'Unable to download info', fatal=False) or {}
- video_info = response.get('video') or {}
+ self._initialize_api(video_id)
+
+ video_info = self._call_api(
+ 'video/%s' % video_id, video_id, 'Downloading api video info',
+ 'Failed to download video info')
+
+ video_versions = self._call_api(
+ 'video/%s/streams' % video_id, video_id,
+ 'Downloading video versions info',
+ 'Failed to download video versions info',
+ fatal=False)
+
+ # Some videos are only available via webpage (e.g.
+ # https://github.com/rg3/youtube-dl/issues/9366)
+ if not video_versions:
+ webpage = self._download_webpage(url, video_id)
+ video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0]
+
+ uploader = None
artist = None
featured_artist = None
- uploader = None
- view_count = None
+ artists = video_info.get('artists')
+ for curr_artist in artists:
+ if curr_artist.get('role') == 'Featured':
+ featured_artist = curr_artist['name']
+ else:
+ artist = uploader = curr_artist['name']
+
formats = []
+ for video_version in video_versions:
+ version = self._VERSIONS.get(video_version['version'])
+ version_url = video_version.get('url')
+ if not version_url:
+ continue
- if not video_info:
- try:
- self._initialize_api(video_id)
- except ExtractorError:
- ytid = response.get('errorInfo', {}).get('ytid')
- if ytid:
- self.report_warning(
- 'Video is geoblocked, trying with the YouTube video %s' % ytid)
- return self.url_result(ytid, 'Youtube', ytid)
-
- raise
-
- video_info = self._call_api(
- 'video/%s' % video_id, video_id, 'Downloading api video info',
- 'Failed to download video info')
-
- video_versions = self._call_api(
- 'video/%s/streams' % video_id, video_id,
- 'Downloading video versions info',
- 'Failed to download video versions info',
- fatal=False)
-
- # Some videos are only available via webpage (e.g.
- # https://github.com/rg3/youtube-dl/issues/9366)
- if not video_versions:
- webpage = self._download_webpage(url, video_id)
- video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0]
-
- timestamp = parse_iso8601(video_info.get('releaseDate'))
- artists = video_info.get('artists')
- for curr_artist in artists:
- if curr_artist.get('role') == 'Featured':
- featured_artist = curr_artist['name']
- else:
- artist = uploader = curr_artist['name']
- view_count = int_or_none(video_info.get('views', {}).get('total'))
-
- for video_version in video_versions:
- version = self._VERSIONS.get(video_version['version'])
- version_url = video_version.get('url')
- if not version_url:
+ if '.ism' in version_url:
+ continue
+ elif '.mpd' in version_url:
+ formats.extend(self._extract_mpd_formats(
+ version_url, video_id, mpd_id='dash-%s' % version,
+ note='Downloading %s MPD information' % version,
+ errnote='Failed to download %s MPD information' % version,
+ fatal=False))
+ elif '.m3u8' in version_url:
+ formats.extend(self._extract_m3u8_formats(
+ version_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls-%s' % version,
+ note='Downloading %s m3u8 information' % version,
+ errnote='Failed to download %s m3u8 information' % version,
+ fatal=False))
+ else:
+ m = re.search(r'''(?xi)
+ _(?P<width>[0-9]+)x(?P<height>[0-9]+)
+ _(?P<vcodec>[a-z0-9]+)
+ _(?P<vbr>[0-9]+)
+ _(?P<acodec>[a-z0-9]+)
+ _(?P<abr>[0-9]+)
+ \.(?P<ext>[a-z0-9]+)''', version_url)
+ if not m:
continue
- if '.ism' in version_url:
- continue
- elif '.mpd' in version_url:
- formats.extend(self._extract_mpd_formats(
- version_url, video_id, mpd_id='dash-%s' % version,
- note='Downloading %s MPD information' % version,
- errnote='Failed to download %s MPD information' % version,
- fatal=False))
- elif '.m3u8' in version_url:
- formats.extend(self._extract_m3u8_formats(
- version_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls-%s' % version,
- note='Downloading %s m3u8 information' % version,
- errnote='Failed to download %s m3u8 information' % version,
- fatal=False))
- else:
- m = re.search(r'''(?xi)
- _(?P<width>[0-9]+)x(?P<height>[0-9]+)
- _(?P<vcodec>[a-z0-9]+)
- _(?P<vbr>[0-9]+)
- _(?P<acodec>[a-z0-9]+)
- _(?P<abr>[0-9]+)
- \.(?P<ext>[a-z0-9]+)''', version_url)
- if not m:
- continue
-
- formats.append({
- 'url': version_url,
- 'format_id': 'http-%s-%s' % (version, video_version['quality']),
- 'vcodec': m.group('vcodec'),
- 'acodec': m.group('acodec'),
- 'vbr': int(m.group('vbr')),
- 'abr': int(m.group('abr')),
- 'ext': m.group('ext'),
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- })
- else:
- timestamp = int_or_none(self._search_regex(
- r'/Date\((\d+)\)/',
- video_info['releaseDate'], 'release date', fatal=False),
- scale=1000)
- artists = video_info.get('mainArtists')
- if artists:
- artist = uploader = artists[0]['artistName']
-
- featured_artists = video_info.get('featuredArtists')
- if featured_artists:
- featured_artist = featured_artists[0]['artistName']
-
- smil_parsed = False
- for video_version in video_info['videoVersions']:
- version = self._VERSIONS.get(video_version['version'])
- if version == 'youtube':
- continue
- else:
- source_type = self._SOURCE_TYPES.get(video_version['sourceType'])
- renditions = compat_etree_fromstring(video_version['data'])
- if source_type == 'http':
- for rend in renditions.findall('rendition'):
- attr = rend.attrib
- formats.append({
- 'url': attr['url'],
- 'format_id': 'http-%s-%s' % (version, attr['name']),
- 'height': int_or_none(attr.get('frameheight')),
- 'width': int_or_none(attr.get('frameWidth')),
- 'tbr': int_or_none(attr.get('totalBitrate')),
- 'vbr': int_or_none(attr.get('videoBitrate')),
- 'abr': int_or_none(attr.get('audioBitrate')),
- 'vcodec': attr.get('videoCodec'),
- 'acodec': attr.get('audioCodec'),
- })
- elif source_type == 'hls':
- formats.extend(self._extract_m3u8_formats(
- renditions.find('rendition').attrib['url'], video_id,
- 'mp4', 'm3u8_native', m3u8_id='hls-%s' % version,
- note='Downloading %s m3u8 information' % version,
- errnote='Failed to download %s m3u8 information' % version,
- fatal=False))
- elif source_type == 'smil' and version == 'level3' and not smil_parsed:
- formats.extend(self._extract_smil_formats(
- renditions.find('rendition').attrib['url'], video_id, False))
- smil_parsed = True
+ formats.append({
+ 'url': version_url,
+ 'format_id': 'http-%s-%s' % (version, video_version['quality']),
+ 'vcodec': m.group('vcodec'),
+ 'acodec': m.group('acodec'),
+ 'vbr': int(m.group('vbr')),
+ 'abr': int(m.group('abr')),
+ 'ext': m.group('ext'),
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
self._sort_formats(formats)
track = video_info['title']
@@ -376,17 +269,15 @@ class VevoIE(VevoBaseIE):
else:
age_limit = None
- duration = video_info.get('duration')
-
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'),
- 'timestamp': timestamp,
+ 'timestamp': parse_iso8601(video_info.get('releaseDate')),
'uploader': uploader,
- 'duration': duration,
- 'view_count': view_count,
+ 'duration': int_or_none(video_info.get('duration')),
+ 'view_count': int_or_none(video_info.get('views', {}).get('total')),
'age_limit': age_limit,
'track': track,
'artist': uploader,
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index 3b38ac7..8a574bc 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -61,7 +61,7 @@ class VGTVIE(XstreamIE):
'ext': 'mp4',
'title': 'Hevnen er søt: Episode 10 - Abu',
'description': 'md5:e25e4badb5f544b04341e14abdc72234',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 648.000,
'timestamp': 1404626400,
'upload_date': '20140706',
@@ -76,7 +76,7 @@ class VGTVIE(XstreamIE):
'ext': 'flv',
'title': 'OPPTAK: VGTV følger EM-kvalifiseringen',
'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 9103.0,
'timestamp': 1410113864,
'upload_date': '20140907',
@@ -96,7 +96,7 @@ class VGTVIE(XstreamIE):
'ext': 'mp4',
'title': 'V75 fra Solvalla 30.05.15',
'description': 'md5:b3743425765355855f88e096acc93231',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 25966,
'timestamp': 1432975582,
'upload_date': '20150530',
@@ -200,7 +200,7 @@ class VGTVIE(XstreamIE):
format_info = {
'url': mp4_url,
}
- mobj = re.search('(\d+)_(\d+)_(\d+)', mp4_url)
+ mobj = re.search(r'(\d+)_(\d+)_(\d+)', mp4_url)
if mobj:
tbr = int(mobj.group(3))
format_info.update({
@@ -246,7 +246,7 @@ class BTArticleIE(InfoExtractor):
'ext': 'mp4',
'title': 'Alrekstad internat',
'description': 'md5:dc81a9056c874fedb62fc48a300dac58',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 191,
'timestamp': 1289991323,
'upload_date': '20101117',
diff --git a/youtube_dl/extractor/vidbit.py b/youtube_dl/extractor/vidbit.py
index e7ac5a8..91f45b7 100644
--- a/youtube_dl/extractor/vidbit.py
+++ b/youtube_dl/extractor/vidbit.py
@@ -20,7 +20,7 @@ class VidbitIE(InfoExtractor):
'ext': 'mp4',
'title': 'Intro to VidBit',
'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'upload_date': '20160618',
'view_count': int,
'comment_count': int,
diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py
index 8d92aee..67808e7 100644
--- a/youtube_dl/extractor/viddler.py
+++ b/youtube_dl/extractor/viddler.py
@@ -26,7 +26,7 @@ class ViddlerIE(InfoExtractor):
'timestamp': 1335371429,
'upload_date': '20120425',
'duration': 100.89,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'view_count': int,
'comment_count': int,
'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],
diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py
new file mode 100644
index 0000000..311df58
--- /dev/null
+++ b/youtube_dl/extractor/videa.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ mimetype2ext,
+ parse_codecs,
+ xpath_element,
+ xpath_text,
+)
+
+
+class VideaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ videa\.hu/
+ (?:
+ videok/(?:[^/]+/)*[^?#&]+-|
+ player\?.*?\bv=|
+ player/v/
+ )
+ (?P<id>[^?#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
+ 'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
+ 'info_dict': {
+ 'id': '8YfIAjxwWGwT8HVQ',
+ 'ext': 'mp4',
+ 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
+ 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3',
+ 'duration': 21,
+ },
+ }, {
+ 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [url for _, url in re.findall(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = self._download_xml(
+ 'http://videa.hu/videaplayer_get_xml.php', video_id,
+ query={'v': video_id})
+
+ video = xpath_element(info, './/video', 'video', fatal=True)
+ sources = xpath_element(info, './/video_sources', 'sources', fatal=True)
+
+ title = xpath_text(video, './title', fatal=True)
+
+ formats = []
+ for source in sources.findall('./video_source'):
+ source_url = source.text
+ if not source_url:
+ continue
+ f = parse_codecs(source.get('codecs'))
+ f.update({
+ 'url': source_url,
+ 'ext': mimetype2ext(source.get('mimetype')) or 'mp4',
+ 'format_id': source.get('name'),
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ thumbnail = xpath_text(video, './poster_src')
+ duration = int_or_none(xpath_text(video, './duration'))
+
+ age_limit = None
+ is_adult = xpath_text(video, './is_adult_content', default=None)
+ if is_adult:
+ age_limit = 18 if is_adult == '1' else 0
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py
index 4f0dcd1..c02830d 100644
--- a/youtube_dl/extractor/videomega.py
+++ b/youtube_dl/extractor/videomega.py
@@ -19,7 +19,7 @@ class VideoMegaIE(InfoExtractor):
'id': 'AOSQBJYKIDDIKYJBQSOA',
'ext': 'mp4',
'title': '1254207',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA&width=1070&height=600',
diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py
index 7f25665..9b56630 100644
--- a/youtube_dl/extractor/videomore.py
+++ b/youtube_dl/extractor/videomore.py
@@ -23,7 +23,7 @@ class VideomoreIE(InfoExtractor):
'title': 'Кино в деталях 5 сезон В гостях Алексей Чумаков и Юлия Ковальчук',
'series': 'Кино в деталях',
'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 2910,
'view_count': int,
'comment_count': int,
@@ -37,7 +37,7 @@ class VideomoreIE(InfoExtractor):
'title': 'Молодежка 2 сезон 40 серия',
'series': 'Молодежка',
'episode': '40 серия',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 2809,
'view_count': int,
'comment_count': int,
@@ -53,7 +53,7 @@ class VideomoreIE(InfoExtractor):
'ext': 'flv',
'title': 'Промо Команда проиграла из-за Бакина?',
'episode': 'Команда проиграла из-за Бакина?',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 29,
'age_limit': 16,
'view_count': int,
@@ -145,7 +145,7 @@ class VideomoreVideoIE(InfoExtractor):
'ext': 'flv',
'title': 'Ёлки 3',
'description': '',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 5579,
'age_limit': 6,
'view_count': int,
@@ -168,7 +168,7 @@ class VideomoreVideoIE(InfoExtractor):
'ext': 'flv',
'title': '1 серия. Здравствуй, Аквавилль!',
'description': 'md5:c6003179538b5d353e7bcd5b1372b2d7',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 754,
'age_limit': 6,
'view_count': int,
diff --git a/youtube_dl/extractor/videopress.py b/youtube_dl/extractor/videopress.py
new file mode 100644
index 0000000..049db25
--- /dev/null
+++ b/youtube_dl/extractor/videopress.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ parse_age_limit,
+ qualities,
+ try_get,
+ unified_timestamp,
+ urljoin,
+)
+
+
+class VideoPressIE(InfoExtractor):
+ _VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'https://videopress.com/embed/kUJmAcSf',
+ 'md5': '706956a6c875873d51010921310e4bc6',
+ 'info_dict': {
+ 'id': 'kUJmAcSf',
+ 'ext': 'mp4',
+ 'title': 'VideoPress Demo',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 634.6,
+ 'timestamp': 1434983935,
+ 'upload_date': '20150622',
+ 'age_limit': 0,
+ },
+ }, {
+ # 17+, requires birth_* params
+ 'url': 'https://videopress.com/embed/iH3gstfZ',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
+ video_id, query={
+ 'birth_month': random.randint(1, 12),
+ 'birth_day': random.randint(1, 31),
+ 'birth_year': random.randint(1950, 1995),
+ })
+
+ title = video['title']
+
+ def base_url(scheme):
+ return try_get(
+ video, lambda x: x['file_url_base'][scheme], compat_str)
+
+ base_url = base_url('https') or base_url('http')
+
+ QUALITIES = ('std', 'dvd', 'hd')
+ quality = qualities(QUALITIES)
+
+ formats = []
+ for format_id, f in video['files'].items():
+ if not isinstance(f, dict):
+ continue
+ for ext, path in f.items():
+ if ext in ('mp4', 'ogg'):
+ formats.append({
+ 'url': urljoin(base_url, path),
+ 'format_id': '%s-%s' % (format_id, ext),
+ 'ext': determine_ext(path, ext),
+ 'quality': quality(format_id),
+ })
+ original_url = try_get(video, lambda x: x['original'], compat_str)
+ if original_url:
+ formats.append({
+ 'url': original_url,
+ 'format_id': 'original',
+ 'quality': len(QUALITIES),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': video.get('poster'),
+ 'duration': float_or_none(video.get('duration'), 1000),
+ 'timestamp': unified_timestamp(video.get('upload_date')),
+ 'age_limit': parse_age_limit(video.get('rating')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py
deleted file mode 100644
index 0f79871..0000000
--- a/youtube_dl/extractor/videott.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import base64
-
-from .common import InfoExtractor
-from ..utils import (
- unified_strdate,
- int_or_none,
-)
-
-
-class VideoTtIE(InfoExtractor):
- _WORKING = False
- ID_NAME = 'video.tt'
- IE_DESC = 'video.tt - Your True Tube'
- _VALID_URL = r'https?://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
-
- _TESTS = [{
- 'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8',
- 'md5': 'b13aa9e2f267effb5d1094443dff65ba',
- 'info_dict': {
- 'id': 'amd5YujV8',
- 'ext': 'flv',
- 'title': 'Motivational video Change your mind in just 2.50 mins',
- 'description': '',
- 'upload_date': '20130827',
- 'uploader': 'joseph313',
- }
- }, {
- 'url': 'http://video.tt/embed/amd5YujV8',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- settings = self._download_json(
- 'http://www.video.tt/player_control/settings.php?v=%s' % video_id, video_id,
- 'Downloading video JSON')['settings']
-
- video = settings['video_details']['video']
-
- formats = [
- {
- 'url': base64.b64decode(res['u'].encode('utf-8')).decode('utf-8'),
- 'ext': 'flv',
- 'format_id': res['l'],
- } for res in settings['res'] if res['u']
- ]
-
- return {
- 'id': video_id,
- 'title': video['title'],
- 'description': video['description'],
- 'thumbnail': settings['config']['thumbnail'],
- 'upload_date': unified_strdate(video['added']),
- 'uploader': video['owner'],
- 'view_count': int_or_none(video['view_count']),
- 'comment_count': None if video.get('comment_count') == '--' else int_or_none(video['comment_count']),
- 'like_count': int_or_none(video['liked']),
- 'dislike_count': int_or_none(video['disliked']),
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py
index 6898042..4e4b4e3 100644
--- a/youtube_dl/extractor/vidio.py
+++ b/youtube_dl/extractor/vidio.py
@@ -18,7 +18,7 @@ class VidioIE(InfoExtractor):
'ext': 'mp4',
'title': 'DJ_AMBRED - Booyah (Live 2015)',
'description': 'md5:27dc15f819b6a78a626490881adbadf8',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 149,
'like_count': int,
},
diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py
index b1156d5..e9ff336 100644
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -23,7 +23,7 @@ class VidmeIE(InfoExtractor):
'ext': 'mp4',
'title': 'Fishing for piranha - the easy way',
'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1406313244,
'upload_date': '20140725',
'age_limit': 0,
@@ -39,7 +39,7 @@ class VidmeIE(InfoExtractor):
'id': 'Gc6M',
'ext': 'mp4',
'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1441211642,
'upload_date': '20150902',
'uploader': 'SunshineM',
@@ -61,7 +61,7 @@ class VidmeIE(InfoExtractor):
'ext': 'mp4',
'title': 'The Carver',
'description': 'md5:e9c24870018ae8113be936645b93ba3c',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1433203629,
'upload_date': '20150602',
'uploader': 'Thomas',
@@ -82,7 +82,7 @@ class VidmeIE(InfoExtractor):
'id': 'Wmur',
'ext': 'mp4',
'title': 'naked smoking & stretching',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1430931613,
'upload_date': '20150506',
'uploader': 'naked-yogi',
@@ -115,7 +115,7 @@ class VidmeIE(InfoExtractor):
'id': 'e5g',
'ext': 'mp4',
'title': 'Video upload (e5g)',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1401480195,
'upload_date': '20140530',
'uploader': None,
diff --git a/youtube_dl/extractor/viewlift.py b/youtube_dl/extractor/viewlift.py
index 19500eb..18735cf 100644
--- a/youtube_dl/extractor/viewlift.py
+++ b/youtube_dl/extractor/viewlift.py
@@ -14,7 +14,7 @@ from ..utils import (
class ViewLiftBaseIE(InfoExtractor):
- _DOMAINS_REGEX = '(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv'
+ _DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv'
class ViewLiftEmbedIE(ViewLiftBaseIE):
@@ -110,7 +110,7 @@ class ViewLiftIE(ViewLiftBaseIE):
'ext': 'mp4',
'title': 'Lost for Life',
'description': 'md5:fbdacc8bb6b455e464aaf98bc02e1c82',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 4489,
'categories': ['Documentary', 'Crime', 'Award Winning', 'Festivals']
}
@@ -123,7 +123,7 @@ class ViewLiftIE(ViewLiftBaseIE):
'ext': 'mp4',
'title': 'India',
'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 979,
'categories': ['Documentary', 'Sports', 'Politics']
}
@@ -160,7 +160,7 @@ class ViewLiftIE(ViewLiftBaseIE):
snag = self._parse_json(
self._search_regex(
- 'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'),
+ r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'),
display_id)
for item in snag:
diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py
index a93196a..52dd95e 100644
--- a/youtube_dl/extractor/viewster.py
+++ b/youtube_dl/extractor/viewster.py
@@ -157,7 +157,7 @@ class ViewsterIE(InfoExtractor):
formats.extend(m3u8_formats)
else:
qualities_basename = self._search_regex(
- '/([^/]+)\.csmil/',
+ r'/([^/]+)\.csmil/',
manifest_url, 'qualities basename', default=None)
if not qualities_basename:
continue
diff --git a/youtube_dl/extractor/viidea.py b/youtube_dl/extractor/viidea.py
index a4f914d..4adcd18 100644
--- a/youtube_dl/extractor/viidea.py
+++ b/youtube_dl/extractor/viidea.py
@@ -40,7 +40,7 @@ class ViideaIE(InfoExtractor):
'ext': 'mp4',
'title': 'Automatics, robotics and biocybernetics',
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'timestamp': 1372349289,
'upload_date': '20130627',
'duration': 565,
@@ -58,7 +58,7 @@ class ViideaIE(InfoExtractor):
'ext': 'flv',
'title': 'NLP at Google',
'description': 'md5:fc7a6d9bf0302d7cc0e53f7ca23747b3',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'timestamp': 1284375600,
'upload_date': '20100913',
'duration': 5352,
@@ -74,7 +74,7 @@ class ViideaIE(InfoExtractor):
'id': '23181',
'title': 'Deep Learning Summer School, Montreal 2015',
'description': 'md5:0533a85e4bd918df52a01f0e1ebe87b7',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'timestamp': 1438560000,
},
'playlist_count': 30,
@@ -85,7 +85,7 @@ class ViideaIE(InfoExtractor):
'id': '9737',
'display_id': 'mlss09uk_bishop_ibi',
'title': 'Introduction To Bayesian Inference',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'timestamp': 1251622800,
},
'playlist': [{
@@ -94,7 +94,7 @@ class ViideaIE(InfoExtractor):
'display_id': 'mlss09uk_bishop_ibi_part1',
'ext': 'wmv',
'title': 'Introduction To Bayesian Inference (Part 1)',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'duration': 4622,
'timestamp': 1251622800,
'upload_date': '20090830',
@@ -105,7 +105,7 @@ class ViideaIE(InfoExtractor):
'display_id': 'mlss09uk_bishop_ibi_part2',
'ext': 'wmv',
'title': 'Introduction To Bayesian Inference (Part 2)',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
'duration': 5641,
'timestamp': 1251622800,
'upload_date': '20090830',
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 51c69a8..61cc469 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -21,12 +21,12 @@ from ..utils import (
sanitized_Request,
smuggle_url,
std_headers,
- unified_strdate,
+ try_get,
+ unified_timestamp,
unsmuggle_url,
urlencode_postdata,
unescapeHTML,
parse_filesize,
- try_get,
)
@@ -92,29 +92,30 @@ class VimeoBaseInfoExtractor(InfoExtractor):
def _vimeo_sort_formats(self, formats):
# Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
# at the same time without actual units specified. This lead to wrong sorting.
- self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id'))
+ self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
def _parse_config(self, config, video_id):
+ video_data = config['video']
# Extract title
- video_title = config['video']['title']
+ video_title = video_data['title']
# Extract uploader, uploader_url and uploader_id
- video_uploader = config['video'].get('owner', {}).get('name')
- video_uploader_url = config['video'].get('owner', {}).get('url')
+ video_uploader = video_data.get('owner', {}).get('name')
+ video_uploader_url = video_data.get('owner', {}).get('url')
video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None
# Extract video thumbnail
- video_thumbnail = config['video'].get('thumbnail')
+ video_thumbnail = video_data.get('thumbnail')
if video_thumbnail is None:
- video_thumbs = config['video'].get('thumbs')
+ video_thumbs = video_data.get('thumbs')
if video_thumbs and isinstance(video_thumbs, dict):
_, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
# Extract video duration
- video_duration = int_or_none(config['video'].get('duration'))
+ video_duration = int_or_none(video_data.get('duration'))
formats = []
- config_files = config['video'].get('files') or config['request'].get('files', {})
+ config_files = video_data.get('files') or config['request'].get('files', {})
for f in config_files.get('progressive', []):
video_url = f.get('url')
if not video_url:
@@ -127,10 +128,33 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'fps': int_or_none(f.get('fps')),
'tbr': int_or_none(f.get('bitrate')),
})
- m3u8_url = config_files.get('hls', {}).get('url')
- if m3u8_url:
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ for files_type in ('hls', 'dash'):
+ for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
+ manifest_url = cdn_data.get('url')
+ if not manifest_url:
+ continue
+ format_id = '%s-%s' % (files_type, cdn_name)
+ if files_type == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id=format_id,
+ note='Downloading %s m3u8 information' % cdn_name,
+ fatal=False))
+ elif files_type == 'dash':
+ mpd_pattern = r'/%s/(?:sep/)?video/' % video_id
+ mpd_manifest_urls = []
+ if re.search(mpd_pattern, manifest_url):
+ for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
+ mpd_manifest_urls.append((format_id + suffix, re.sub(
+ mpd_pattern, '/%s/%s/' % (video_id, repl), manifest_url)))
+ else:
+ mpd_manifest_urls = [(format_id, manifest_url)]
+ for f_id, m_url in mpd_manifest_urls:
+ formats.extend(self._extract_mpd_formats(
+ m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
+ 'Downloading %s MPD information' % cdn_name,
+ fatal=False))
subtitles = {}
text_tracks = config['request'].get('text_tracks')
@@ -189,11 +213,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
'description': 'md5:2d3305bad981a06ff79f027f19865021',
+ 'timestamp': 1355990239,
'upload_date': '20121220',
- 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user7108434',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
'uploader_id': 'user7108434',
'uploader': 'Filippo Valsorda',
'duration': 10,
+ 'license': 'by-sa',
},
},
{
@@ -203,7 +229,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'info_dict': {
'id': '68093876',
'ext': 'mp4',
- 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
'uploader_id': 'openstreetmapus',
'uploader': 'OpenStreetMap US',
'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
@@ -220,7 +246,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
'uploader': 'The BLN & Business of Software',
- 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/theblnbusinessofsoftware',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/theblnbusinessofsoftware',
'uploader_id': 'theblnbusinessofsoftware',
'duration': 3610,
'description': None,
@@ -234,12 +260,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
'id': '68375962',
'ext': 'mp4',
'title': 'youtube-dl password protected test video',
+ 'timestamp': 1371200155,
'upload_date': '20130614',
- 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user18948128',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
'uploader_id': 'user18948128',
'uploader': 'Jaime Marquínez Ferrándiz',
'duration': 10,
- 'description': 'This is "youtube-dl password protected test video" by on Vimeo, the home for high quality videos and the people who love them.',
+ 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f',
},
'params': {
'videopassword': 'youtube-dl',
@@ -253,10 +280,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'Key & Peele: Terrorist Interrogation',
'description': 'md5:8678b246399b070816b12313e8b4eb5c',
- 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/atencio',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
'uploader_id': 'atencio',
'uploader': 'Peter Atencio',
- 'upload_date': '20130927',
+ 'timestamp': 1380339469,
+ 'upload_date': '20130928',
'duration': 187,
},
},
@@ -268,8 +296,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'The New Vimeo Player (You Know, For Videos)',
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
+ 'timestamp': 1381846109,
'upload_date': '20131015',
- 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/staff',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
'uploader_id': 'staff',
'uploader': 'Vimeo Staff',
'duration': 62,
@@ -284,21 +313,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'Pier Solar OUYA Official Trailer',
'uploader': 'Tulio Gonçalves',
- 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user28849593',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user28849593',
'uploader_id': 'user28849593',
},
},
{
# contains original format
'url': 'https://vimeo.com/33951933',
- 'md5': '2d9f5475e0537f013d0073e812ab89e6',
+ 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
'info_dict': {
'id': '33951933',
'ext': 'mp4',
'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
'uploader': 'The DMCI',
- 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/dmci',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
'uploader_id': 'dmci',
+ 'timestamp': 1324343742,
'upload_date': '20111220',
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
},
@@ -309,11 +339,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
'url': 'https://vimeo.com/channels/tributes/6213729',
'info_dict': {
'id': '6213729',
- 'ext': 'mp4',
+ 'ext': 'mov',
'title': 'Vimeo Tribute: The Shining',
'uploader': 'Casey Donahue',
- 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/caseydonahue',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
'uploader_id': 'caseydonahue',
+ 'timestamp': 1250886430,
'upload_date': '20090821',
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
},
@@ -323,7 +354,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'expected_warnings': ['Unable to download JSON metadata'],
},
{
- # redirects to ondemand extractor and should be passed throught it
+ # redirects to ondemand extractor and should be passed through it
# for successful extraction
'url': 'https://vimeo.com/73445910',
'info_dict': {
@@ -331,7 +362,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'The Reluctant Revolutionary',
'uploader': '10Ft Films',
- 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
'uploader_id': 'tenfootfilms',
},
'params': {
@@ -462,6 +493,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
'%s said: %s' % (self.IE_NAME, seed_status['title']),
expected=True)
+ cc_license = None
+ timestamp = None
+
# Extract the config JSON
try:
try:
@@ -475,14 +509,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
vimeo_clip_page_config = self._search_regex(
r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
'vimeo clip page config')
- config_url = self._parse_json(
- vimeo_clip_page_config, video_id)['player']['config_url']
+ page_config = self._parse_json(vimeo_clip_page_config, video_id)
+ config_url = page_config['player']['config_url']
+ cc_license = page_config.get('cc_license')
+ timestamp = try_get(
+ page_config, lambda x: x['clip']['uploaded_on'],
+ compat_str)
config_json = self._download_webpage(config_url, video_id)
config = json.loads(config_json)
except RegexNotFoundError:
# For pro videos or player.vimeo.com urls
# We try to find out to which variable is assigned the config dic
- m_variable_name = re.search('(\w)\.video\.id', webpage)
+ m_variable_name = re.search(r'(\w)\.video\.id', webpage)
if m_variable_name is not None:
config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
else:
@@ -545,10 +583,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
self._downloader.report_warning('Cannot find video description')
# Extract upload date
- video_upload_date = None
- mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage)
- if mobj is not None:
- video_upload_date = unified_strdate(mobj.group(1))
+ if not timestamp:
+ timestamp = self._search_regex(
+ r'<time[^>]+datetime="([^"]+)"', webpage,
+ 'timestamp', default=None)
try:
view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
@@ -585,15 +623,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
info_dict = self._parse_config(config, video_id)
formats.extend(info_dict['formats'])
self._vimeo_sort_formats(formats)
+
+ if not cc_license:
+ cc_license = self._search_regex(
+ r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
+ webpage, 'license', default=None, group='license')
+
info_dict.update({
'id': video_id,
'formats': formats,
- 'upload_date': video_upload_date,
+ 'timestamp': unified_timestamp(timestamp),
'description': video_description,
'webpage_url': url,
'view_count': view_count,
'like_count': like_count,
'comment_count': comment_count,
+ 'license': cc_license,
})
return info_dict
@@ -611,9 +656,12 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'המעבדה - במאי יותם פלדמן',
'uploader': 'גם סרטים',
- 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/gumfilms',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms',
'uploader_id': 'gumfilms',
},
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
}, {
# requires Referer to be passed along with og:video:url
'url': 'https://vimeo.com/ondemand/36938/126682985',
@@ -622,7 +670,7 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'Rävlock, rätt läte på rätt plats',
'uploader': 'Lindroth & Norin',
- 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user14430847',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user14430847',
'uploader_id': 'user14430847',
},
'params': {
@@ -712,12 +760,12 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
# Try extracting href first since not all videos are available via
# short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
clips = re.findall(
- r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage)
+ r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage)
if clips:
- for video_id, video_url in clips:
+ for video_id, video_url, video_title in clips:
yield self.url_result(
compat_urlparse.urljoin(base_url, video_url),
- VimeoIE.ie_key(), video_id=video_id)
+ VimeoIE.ie_key(), video_id=video_id, video_title=video_title)
# More relaxed fallback
else:
for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
@@ -842,7 +890,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',
'uploader': 'DevWeek Events',
'duration': 2773,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader_id': 'user22258446',
}
}, {
@@ -866,10 +914,14 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
webpage = self._download_webpage(webpage_url, video_id)
- data = self._parse_json(self._search_regex(
- r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
- default=NO_DEFAULT if video_password_verified else '{}'), video_id)
- config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
+ config_url = self._html_search_regex(
+ r'data-config-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'config URL', default=None, group='url')
+ if not config_url:
+ data = self._parse_json(self._search_regex(
+ r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
+ default=NO_DEFAULT if video_password_verified else '{}'), video_id)
+ config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
if config_url is None:
self._verify_video_password(webpage_url, video_id, webpage)
config_url = self._get_config_url(
diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py
index 7fd9b77..c74b437 100644
--- a/youtube_dl/extractor/vimple.py
+++ b/youtube_dl/extractor/vimple.py
@@ -37,7 +37,7 @@ class VimpleIE(SprutoBaseIE):
'ext': 'mp4',
'title': 'Sunset',
'duration': 20,
- 'thumbnail': 're:https?://.*?\.jpg',
+ 'thumbnail': r're:https?://.*?\.jpg',
},
}, {
'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index 0183f05..4957a07 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -6,8 +6,9 @@ import itertools
from .common import InfoExtractor
from ..utils import (
+ determine_ext,
int_or_none,
- unified_strdate,
+ unified_timestamp,
)
@@ -20,9 +21,10 @@ class VineIE(InfoExtractor):
'id': 'b9KOOWX7HUx',
'ext': 'mp4',
'title': 'Chicken.',
- 'alt_title': 'Vine by Jack Dorsey',
+ 'alt_title': 'Vine by Jack',
+ 'timestamp': 1368997951,
'upload_date': '20130519',
- 'uploader': 'Jack Dorsey',
+ 'uploader': 'Jack',
'uploader_id': '76',
'view_count': int,
'like_count': int,
@@ -30,47 +32,13 @@ class VineIE(InfoExtractor):
'repost_count': int,
},
}, {
- 'url': 'https://vine.co/v/MYxVapFvz2z',
- 'md5': '7b9a7cbc76734424ff942eb52c8f1065',
- 'info_dict': {
- 'id': 'MYxVapFvz2z',
- 'ext': 'mp4',
- 'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
- 'alt_title': 'Vine by Mars Ruiz',
- 'upload_date': '20140815',
- 'uploader': 'Mars Ruiz',
- 'uploader_id': '1102363502380728320',
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- 'repost_count': int,
- },
- }, {
- 'url': 'https://vine.co/v/bxVjBbZlPUH',
- 'md5': 'ea27decea3fa670625aac92771a96b73',
- 'info_dict': {
- 'id': 'bxVjBbZlPUH',
- 'ext': 'mp4',
- 'title': '#mw3 #ac130 #killcam #angelofdeath',
- 'alt_title': 'Vine by Z3k3',
- 'upload_date': '20130430',
- 'uploader': 'Z3k3',
- 'uploader_id': '936470460173008896',
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- 'repost_count': int,
- },
- }, {
- 'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
- 'only_matching': True,
- }, {
'url': 'https://vine.co/v/e192BnZnZ9V',
'info_dict': {
'id': 'e192BnZnZ9V',
'ext': 'mp4',
'title': 'ยิ้ม~ เขิน~ อาย~ น่าร้ากอ้ะ >//< @n_whitewo @orlameena #lovesicktheseries #lovesickseason2',
'alt_title': 'Vine by Pimry_zaa',
+ 'timestamp': 1436057405,
'upload_date': '20150705',
'uploader': 'Pimry_zaa',
'uploader_id': '1135760698325307392',
@@ -82,43 +50,60 @@ class VineIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'https://vine.co/v/MYxVapFvz2z',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vine.co/v/bxVjBbZlPUH',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
-
- data = self._parse_json(
- self._search_regex(
- r'window\.POST_DATA\s*=\s*({.+?});\s*</script>',
- webpage, 'vine data'),
- video_id)
-
- data = data[list(data.keys())[0]]
-
- formats = [{
- 'format_id': '%(format)s-%(rate)s' % f,
- 'vcodec': f.get('format'),
- 'quality': f.get('rate'),
- 'url': f['videoUrl'],
- } for f in data['videoUrls'] if f.get('videoUrl')]
+ data = self._download_json(
+ 'https://archive.vine.co/posts/%s.json' % video_id, video_id)
+
+ def video_url(kind):
+ for url_suffix in ('Url', 'URL'):
+ format_url = data.get('video%s%s' % (kind, url_suffix))
+ if format_url:
+ return format_url
+
+ formats = []
+ for quality, format_id in enumerate(('low', '', 'dash')):
+ format_url = video_url(format_id.capitalize())
+ if not format_url:
+ continue
+ # DASH link returns plain mp4
+ if format_id == 'dash' and determine_ext(format_url) == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id or 'standard',
+ 'quality': quality,
+ })
self._sort_formats(formats)
username = data.get('username')
return {
'id': video_id,
- 'title': data.get('description') or self._og_search_title(webpage),
- 'alt_title': 'Vine by %s' % username if username else self._og_search_description(webpage, default=None),
+ 'title': data.get('description'),
+ 'alt_title': 'Vine by %s' % username if username else None,
'thumbnail': data.get('thumbnailUrl'),
- 'upload_date': unified_strdate(data.get('created')),
+ 'timestamp': unified_timestamp(data.get('created')),
'uploader': username,
'uploader_id': data.get('userIdStr'),
- 'view_count': int_or_none(data.get('loops', {}).get('count')),
- 'like_count': int_or_none(data.get('likes', {}).get('count')),
- 'comment_count': int_or_none(data.get('comments', {}).get('count')),
- 'repost_count': int_or_none(data.get('reposts', {}).get('count')),
+ 'view_count': int_or_none(data.get('loops')),
+ 'like_count': int_or_none(data.get('likes')),
+ 'comment_count': int_or_none(data.get('comments')),
+ 'repost_count': int_or_none(data.get('reposts')),
'formats': formats,
}
diff --git a/youtube_dl/extractor/viu.py b/youtube_dl/extractor/viu.py
new file mode 100644
index 0000000..3fd889c
--- /dev/null
+++ b/youtube_dl/extractor/viu.py
@@ -0,0 +1,249 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class ViuBaseIE(InfoExtractor):
+ def _real_initialize(self):
+ viu_auth_res = self._request_webpage(
+ 'https://www.viu.com/api/apps/v2/authenticate', None,
+ 'Requesting Viu auth', query={
+ 'acct': 'test',
+ 'appid': 'viu_desktop',
+ 'fmt': 'json',
+ 'iid': 'guest',
+ 'languageid': 'default',
+ 'platform': 'desktop',
+ 'userid': 'guest',
+ 'useridtype': 'guest',
+ 'ver': '1.0'
+ }, headers=self.geo_verification_headers())
+ self._auth_token = viu_auth_res.info()['X-VIU-AUTH']
+
+ def _call_api(self, path, *args, **kwargs):
+ headers = self.geo_verification_headers()
+ headers.update({
+ 'X-VIU-AUTH': self._auth_token
+ })
+ headers.update(kwargs.get('headers', {}))
+ kwargs['headers'] = headers
+ response = self._download_json(
+ 'https://www.viu.com/api/' + path, *args, **kwargs)['response']
+ if response.get('status') != 'success':
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, response['message']), expected=True)
+ return response
+
+
+class ViuIE(ViuBaseIE):
+ _VALID_URL = r'(?:viu:|https?://www\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059',
+ 'info_dict': {
+ 'id': '1116705532',
+ 'ext': 'mp4',
+ 'title': 'Citizen Khan - Ep 1',
+ 'description': 'md5:d7ea1604f49e5ba79c212c551ce2110e',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ 'skip': 'Geo-restricted to India',
+ }, {
+ 'url': 'https://www.viu.com/en/media/1130599965',
+ 'info_dict': {
+ 'id': '1130599965',
+ 'ext': 'mp4',
+ 'title': 'Jealousy Incarnate - Episode 1',
+ 'description': 'md5:d3d82375cab969415d2720b6894361e9',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ 'skip': 'Geo-restricted to Indonesia',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._call_api(
+ 'clip/load', video_id, 'Downloading video data', query={
+ 'appid': 'viu_desktop',
+ 'fmt': 'json',
+ 'id': video_id
+ })['item'][0]
+
+ title = video_data['title']
+
+ m3u8_url = None
+ url_path = video_data.get('urlpathd') or video_data.get('urlpath')
+ tdirforwhole = video_data.get('tdirforwhole')
+ # #EXT-X-BYTERANGE is not supported by native hls downloader
+ # and ffmpeg (#10955)
+ # hls_file = video_data.get('hlsfile')
+ hls_file = video_data.get('jwhlsfile')
+ if url_path and tdirforwhole and hls_file:
+ m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file)
+ else:
+ # m3u8_url = re.sub(
+ # r'(/hlsc_)[a-z]+(\d+\.m3u8)',
+ # r'\1whe\2', video_data['href'])
+ m3u8_url = video_data['href']
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for key, value in video_data.items():
+ mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
+ if not mobj:
+ continue
+ subtitles.setdefault(mobj.group('lang'), []).append({
+ 'url': value,
+ 'ext': mobj.group('ext')
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'series': video_data.get('moviealbumshowname'),
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('episodeno')),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class ViuPlaylistIE(ViuBaseIE):
+ IE_NAME = 'viu:playlist'
+ _VALID_URL = r'https?://www\.viu\.com/[^/]+/listing/playlist-(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.viu.com/en/listing/playlist-22461380',
+ 'info_dict': {
+ 'id': '22461380',
+ 'title': 'The Good Wife',
+ },
+ 'playlist_count': 16,
+ 'skip': 'Geo-restricted to Indonesia',
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ playlist_data = self._call_api(
+ 'container/load', playlist_id,
+ 'Downloading playlist info', query={
+ 'appid': 'viu_desktop',
+ 'fmt': 'json',
+ 'id': 'playlist-' + playlist_id
+ })['container']
+
+ entries = []
+ for item in playlist_data.get('item', []):
+ item_id = item.get('id')
+ if not item_id:
+ continue
+ item_id = compat_str(item_id)
+ entries.append(self.url_result(
+ 'viu:' + item_id, 'Viu', item_id))
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_data.get('title'))
+
+
+class ViuOTTIE(InfoExtractor):
+ IE_NAME = 'viu:ott'
+ _VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P<country_code>[a-z]{2})/[a-z]{2}-[a-z]{2}/vod/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I',
+ 'info_dict': {
+ 'id': '3421',
+ 'ext': 'mp4',
+ 'title': 'A New Beginning',
+ 'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ 'skip': 'Geo-restricted to Singapore',
+ }, {
+ 'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90',
+ 'info_dict': {
+ 'id': '7123',
+ 'ext': 'mp4',
+ 'title': '這就是我的生活之道',
+ 'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ 'skip': 'Geo-restricted to Hong Kong',
+ }]
+
+ def _real_extract(self, url):
+ country_code, video_id = re.match(self._VALID_URL, url).groups()
+
+ product_data = self._download_json(
+ 'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
+ 'Downloading video info', query={
+ 'r': 'vod/ajax-detail',
+ 'platform_flag_label': 'web',
+ 'product_id': video_id,
+ })['data']
+
+ video_data = product_data.get('current_product')
+ if not video_data:
+ raise ExtractorError('This video is not available in your region.', expected=True)
+
+ stream_data = self._download_json(
+ 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
+ video_id, 'Downloading stream info', query={
+ 'ccs_product_id': video_data['ccs_product_id'],
+ })['data']['stream']
+
+ stream_sizes = stream_data.get('size', {})
+ formats = []
+ for vid_format, stream_url in stream_data.get('url', {}).items():
+ height = int_or_none(self._search_regex(
+ r's(\d+)p', vid_format, 'height', default=None))
+ formats.append({
+ 'format_id': vid_format,
+ 'url': stream_url,
+ 'height': height,
+ 'ext': 'mp4',
+ 'filesize': int_or_none(stream_sizes.get(vid_format))
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for sub in video_data.get('subtitle', []):
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subtitles.setdefault(sub.get('name'), []).append({
+ 'url': sub_url,
+ 'ext': 'srt',
+ })
+
+ title = video_data['synopsis'].strip()
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'series': product_data.get('series', {}).get('name'),
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('number')),
+ 'duration': int_or_none(stream_data.get('duration')),
+ 'thumbnail': video_data.get('cover_image_url'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 1990e70..7c42a4f 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -245,7 +245,7 @@ class VKIE(VKBaseIE):
},
},
{
- # finished live stream, live_mp4
+ # finished live stream, postlive_mp4
'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
'md5': '90d22d051fccbbe9becfccc615be6791',
'info_dict': {
@@ -258,7 +258,7 @@ class VKIE(VKBaseIE):
},
},
{
- # live stream, hls and rtmp links,most likely already finished live
+ # live stream, hls and rtmp links, most likely already finished live
# stream by the time you are reading this comment
'url': 'https://vk.com/video-140332_456239111',
'only_matching': True,
@@ -281,6 +281,11 @@ class VKIE(VKBaseIE):
{
'url': 'http://new.vk.com/video205387401_165548505',
'only_matching': True,
+ },
+ {
+ # This video is no longer available, because its author has been blocked.
+ 'url': 'https://vk.com/video-10639516_456240611',
+ 'only_matching': True,
}
]
@@ -328,6 +333,12 @@ class VKIE(VKBaseIE):
r'<!>Access denied':
'Access denied to video %s.',
+
+ r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
+ 'Video %s is no longer available, because its author has been blocked.',
+
+ r'<!>This video is no longer available, because its author has been blocked.':
+ 'Video %s is no longer available, because its author has been blocked.',
}
for error_re, error_msg in ERRORS.items():
@@ -378,12 +389,24 @@ class VKIE(VKBaseIE):
if not data:
data = self._parse_json(
self._search_regex(
- r'<!json>\s*({.+?})\s*<!>', info_page, 'json'),
- video_id)['player']['params'][0]
+ r'<!json>\s*({.+?})\s*<!>', info_page, 'json', default='{}'),
+ video_id)
+ if data:
+ data = data['player']['params'][0]
+
+ if not data:
+ data = self._parse_json(
+ self._search_regex(
+ r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page,
+ 'player params'),
+ video_id)['params'][0]
title = unescapeHTML(data['md_title'])
- if data.get('live') == 2:
+ # 2 = live
+ # 3 = post live (finished live)
+ is_live = data.get('live') == 2
+ if is_live:
title = self._live_title(title)
timestamp = unified_timestamp(self._html_search_regex(
@@ -398,7 +421,8 @@ class VKIE(VKBaseIE):
for format_id, format_url in data.items():
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
continue
- if format_id.startswith(('url', 'cache')) or format_id in ('extra_data', 'live_mp4'):
+ if (format_id.startswith(('url', 'cache')) or
+ format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
height = int_or_none(self._search_regex(
r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
formats.append({
@@ -408,8 +432,9 @@ class VKIE(VKBaseIE):
})
elif format_id == 'hls':
formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', m3u8_id=format_id,
- fatal=False, live=True))
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8' if is_live else 'm3u8_native',
+ m3u8_id=format_id, fatal=False, live=is_live))
elif format_id == 'rtmp':
formats.append({
'format_id': format_id,
@@ -427,6 +452,7 @@ class VKIE(VKBaseIE):
'duration': data.get('duration'),
'timestamp': timestamp,
'view_count': view_count,
+ 'is_live': is_live,
}
diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py
index acf9fda..b971890 100644
--- a/youtube_dl/extractor/vlive.py
+++ b/youtube_dl/extractor/vlive.py
@@ -2,16 +2,23 @@
from __future__ import unicode_literals
import re
+import time
+import itertools
from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlencode,
+ compat_str,
+)
from ..utils import (
dict_get,
ExtractorError,
float_or_none,
int_or_none,
remove_start,
+ try_get,
+ urlencode_postdata,
)
-from ..compat import compat_urllib_parse_urlencode
class VLiveIE(InfoExtractor):
@@ -48,17 +55,23 @@ class VLiveIE(InfoExtractor):
webpage = self._download_webpage(
'http://www.vlive.tv/video/%s' % video_id, video_id)
- video_params = self._search_regex(
- r'\bvlive\.video\.init\(([^)]+)\)',
- webpage, 'video params')
- status, _, _, live_params, long_video_id, key = re.split(
- r'"\s*,\s*"', video_params)[2:8]
+ VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
+ VIDEO_PARAMS_FIELD = 'video params'
+
+ params = self._parse_json(self._search_regex(
+ VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
+ transform_source=lambda s: '[' + s + ']', fatal=False)
+
+ if not params or len(params) < 7:
+ params = self._search_regex(
+ VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD)
+ params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)]
+
+ status, long_video_id, key = params[2], params[5], params[6]
status = remove_start(status, 'PRODUCT_')
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
- live_params = self._parse_json('"%s"' % live_params, video_id)
- live_params = self._parse_json(live_params, video_id)
- return self._live(video_id, webpage, live_params)
+ return self._live(video_id, webpage)
elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
if long_video_id and key:
return self._replay(video_id, webpage, long_video_id, key)
@@ -89,7 +102,22 @@ class VLiveIE(InfoExtractor):
'thumbnail': thumbnail,
}
- def _live(self, video_id, webpage, live_params):
+ def _live(self, video_id, webpage):
+ init_page = self._download_webpage(
+ 'http://www.vlive.tv/video/init/view',
+ video_id, note='Downloading live webpage',
+ data=urlencode_postdata({'videoSeq': video_id}),
+ headers={
+ 'Referer': 'http://www.vlive.tv/video/%s' % video_id,
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+
+ live_params = self._search_regex(
+ r'"liveStreamInfo"\s*:\s*(".*"),',
+ init_page, 'live stream info')
+ live_params = self._parse_json(live_params, video_id)
+ live_params = self._parse_json(live_params, video_id)
+
formats = []
for vid in live_params.get('resolutions', []):
formats.extend(self._extract_m3u8_formats(
@@ -98,10 +126,14 @@ class VLiveIE(InfoExtractor):
fatal=False, live=True))
self._sort_formats(formats)
- return dict(self._get_common_fields(webpage),
- id=video_id,
- formats=formats,
- is_live=True)
+ info = self._get_common_fields(webpage)
+ info.update({
+ 'title': self._live_title(info['title']),
+ 'id': video_id,
+ 'formats': formats,
+ 'is_live': True,
+ })
+ return info
def _replay(self, video_id, webpage, long_video_id, key):
playinfo = self._download_json(
@@ -135,8 +167,97 @@ class VLiveIE(InfoExtractor):
'ext': 'vtt',
'url': caption['source']}]
- return dict(self._get_common_fields(webpage),
- id=video_id,
- formats=formats,
- view_count=view_count,
- subtitles=subtitles)
+ info = self._get_common_fields(webpage)
+ info.update({
+ 'id': video_id,
+ 'formats': formats,
+ 'view_count': view_count,
+ 'subtitles': subtitles,
+ })
+ return info
+
+
+class VLiveChannelIE(InfoExtractor):
+ IE_NAME = 'vlive:channel'
+ _VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)'
+ _TEST = {
+ 'url': 'http://channels.vlive.tv/FCD4B',
+ 'info_dict': {
+ 'id': 'FCD4B',
+ 'title': 'MAMAMOO',
+ },
+ 'playlist_mincount': 110
+ }
+ _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
+
+ def _real_extract(self, url):
+ channel_code = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
+
+ app_id = None
+
+ app_js_url = self._search_regex(
+ r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
+ webpage, 'app js', default=None, group='url')
+
+ if app_js_url:
+ app_js = self._download_webpage(
+ app_js_url, channel_code, 'Downloading app JS', fatal=False)
+ if app_js:
+ app_id = self._search_regex(
+ r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
+ app_js, 'app id', default=None)
+
+ app_id = app_id or self._APP_ID
+
+ channel_info = self._download_json(
+ 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
+ channel_code, note='Downloading decode channel code',
+ query={
+ 'app_id': app_id,
+ 'channelCode': channel_code,
+ '_': int(time.time())
+ })
+
+ channel_seq = channel_info['result']['channelSeq']
+ channel_name = None
+ entries = []
+
+ for page_num in itertools.count(1):
+ video_list = self._download_json(
+ 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
+ channel_code, note='Downloading channel list page #%d' % page_num,
+ query={
+ 'app_id': app_id,
+ 'channelSeq': channel_seq,
+ 'maxNumOfRows': 1000,
+ '_': int(time.time()),
+ 'pageNo': page_num
+ }
+ )
+
+ if not channel_name:
+ channel_name = try_get(
+ video_list,
+ lambda x: x['result']['channelInfo']['channelName'],
+ compat_str)
+
+ videos = try_get(
+ video_list, lambda x: x['result']['videoList'], list)
+ if not videos:
+ break
+
+ for video in videos:
+ video_id = video.get('videoSeq')
+ if not video_id:
+ continue
+ video_id = compat_str(video_id)
+ entries.append(
+ self.url_result(
+ 'http://www.vlive.tv/video/%s' % video_id,
+ ie=VLiveIE.ie_key(), video_id=video_id))
+
+ return self.playlist_result(
+ entries, channel_code, channel_name)
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py
index bbfa6e5..02c9617 100644
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dl/extractor/vodlocker.py
@@ -20,7 +20,7 @@ class VodlockerIE(InfoExtractor):
'id': 'e8wvyzz4sl42',
'ext': 'mp4',
'title': 'Germany vs Brazil',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
},
}]
diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py
index 4f1a99a..59e1359 100644
--- a/youtube_dl/extractor/voicerepublic.py
+++ b/youtube_dl/extractor/voicerepublic.py
@@ -26,7 +26,7 @@ class VoiceRepublicIE(InfoExtractor):
'ext': 'm4a',
'title': 'Watching the Watchers: Building a Sousveillance State',
'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.',
- 'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
+ 'thumbnail': r're:^https?://.*\.(?:png|jpg)$',
'duration': 1800,
'view_count': int,
}
diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py
index 1557a0e..858ac9e 100644
--- a/youtube_dl/extractor/vporn.py
+++ b/youtube_dl/extractor/vporn.py
@@ -7,6 +7,7 @@ from ..utils import (
ExtractorError,
parse_duration,
str_to_int,
+ urljoin,
)
@@ -22,7 +23,7 @@ class VpornIE(InfoExtractor):
'ext': 'mp4',
'title': 'Violet on her 19th birthday',
'description': 'Violet dances in front of the camera which is sure to get you horny.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'kileyGrope',
'categories': ['Masturbation', 'Teen'],
'duration': 393,
@@ -40,7 +41,7 @@ class VpornIE(InfoExtractor):
'ext': 'mp4',
'title': 'Hana Shower',
'description': 'Hana showers at the bathroom.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Hmmmmm',
'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'],
'duration': 588,
@@ -66,10 +67,9 @@ class VpornIE(InfoExtractor):
description = self._html_search_regex(
r'class="(?:descr|description_txt)">(.*?)</div>',
webpage, 'description', fatal=False)
- thumbnail = self._html_search_regex(
- r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None)
- if thumbnail:
- thumbnail = 'http://www.vporn.com' + thumbnail
+ thumbnail = urljoin('http://www.vporn.com', self._html_search_regex(
+ r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description',
+ default=None))
uploader = self._html_search_regex(
r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py
index 10ca6ac..8ce3a6b 100644
--- a/youtube_dl/extractor/vube.py
+++ b/youtube_dl/extractor/vube.py
@@ -26,7 +26,7 @@ class VubeIE(InfoExtractor):
'ext': 'mp4',
'title': 'Best Drummer Ever [HD]',
'description': 'md5:2d63c4b277b85c2277761c2cf7337d71',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'William',
'timestamp': 1406876915,
'upload_date': '20140801',
@@ -45,7 +45,7 @@ class VubeIE(InfoExtractor):
'ext': 'mp4',
'title': 'Chiara Grispo - Price Tag by Jessie J',
'description': 'md5:8ea652a1f36818352428cb5134933313',
- 'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f\.jpg$',
+ 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f\.jpg$',
'uploader': 'Chiara.Grispo',
'timestamp': 1388743358,
'upload_date': '20140103',
@@ -65,7 +65,7 @@ class VubeIE(InfoExtractor):
'ext': 'mp4',
'title': 'My 7 year old Sister and I singing "Alive" by Krewella',
'description': 'md5:40bcacb97796339f1690642c21d56f4a',
- 'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102265d5a9f-0f17-4f6b-5753-adf08484ee1e\.jpg$',
+ 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102265d5a9f-0f17-4f6b-5753-adf08484ee1e\.jpg$',
'uploader': 'Seraina',
'timestamp': 1396492438,
'upload_date': '20140403',
@@ -84,7 +84,7 @@ class VubeIE(InfoExtractor):
'ext': 'mp4',
'title': 'Frozen - Let It Go Cover by Siren Gene',
'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
- 'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
+ 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
'uploader': 'Siren',
'timestamp': 1395448018,
'upload_date': '20140322',
diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py
new file mode 100644
index 0000000..d44ec85
--- /dev/null
+++ b/youtube_dl/extractor/vvvvid.py
@@ -0,0 +1,140 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+)
+
+
+class VVVVIDIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vvvvid\.it/#!(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ # video_type == 'video/vvvvid'
+ 'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
+ 'md5': 'b8d3cecc2e981adc3835adf07f6df91b',
+ 'info_dict': {
+ 'id': '489048',
+ 'ext': 'mp4',
+ 'title': 'Ping Pong',
+ },
+ }, {
+ # video_type == 'video/rcs'
+ 'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
+ 'md5': '33e0edfba720ad73a8782157fdebc648',
+ 'info_dict': {
+ 'id': '482493',
+ 'ext': 'mp4',
+ 'title': 'Episodio 01',
+ },
+ }]
+ _conn_id = None
+
+ def _real_initialize(self):
+ self._conn_id = self._download_json(
+ 'https://www.vvvvid.it/user/login',
+ None, headers=self.geo_verification_headers())['data']['conn_id']
+
+ def _real_extract(self, url):
+ show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
+ response = self._download_json(
+ 'https://www.vvvvid.it/vvvvid/ondemand/%s/season/%s' % (show_id, season_id),
+ video_id, headers=self.geo_verification_headers(), query={
+ 'conn_id': self._conn_id,
+ })
+ if response['result'] == 'error':
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, response['message']), expected=True)
+
+ vid = int(video_id)
+ video_data = list(filter(
+ lambda episode: episode.get('video_id') == vid, response['data']))[0]
+ formats = []
+
+ # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
+ def ds(h):
+ g = "MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij"
+
+ def f(m):
+ l = []
+ o = 0
+ b = False
+ m_len = len(m)
+ while ((not b) and o < m_len):
+ n = m[o] << 2
+ o += 1
+ k = -1
+ j = -1
+ if o < m_len:
+ n += m[o] >> 4
+ o += 1
+ if o < m_len:
+ k = (m[o - 1] << 4) & 255
+ k += m[o] >> 2
+ o += 1
+ if o < m_len:
+ j = (m[o - 1] << 6) & 255
+ j += m[o]
+ o += 1
+ else:
+ b = True
+ else:
+ b = True
+ else:
+ b = True
+ l.append(n)
+ if k != -1:
+ l.append(k)
+ if j != -1:
+ l.append(j)
+ return l
+
+ c = []
+ for e in h:
+ c.append(g.index(e))
+
+ c_len = len(c)
+ for e in range(c_len * 2 - 1, -1, -1):
+ a = c[e % c_len] ^ c[(e + 1) % c_len]
+ c[e % c_len] = a
+
+ c = f(c)
+ d = ''
+ for e in c:
+ d += chr(e)
+
+ return d
+
+ for quality in ('_sd', ''):
+ embed_code = video_data.get('embed_info' + quality)
+ if not embed_code:
+ continue
+ embed_code = ds(embed_code)
+ video_type = video_data.get('video_type')
+ if video_type in ('video/rcs', 'video/kenc'):
+ formats.extend(self._extract_akamai_formats(
+ embed_code, video_id))
+ else:
+ formats.extend(self._extract_wowza_formats(
+ 'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_data['title'],
+ 'formats': formats,
+ 'thumbnail': video_data.get('thumbnail'),
+ 'duration': int_or_none(video_data.get('length')),
+ 'series': video_data.get('show_title'),
+ 'season_id': season_id,
+ 'season_number': video_data.get('season_number'),
+ 'episode_id': str_or_none(video_data.get('id')),
+ 'epidode_number': int_or_none(video_data.get('number')),
+ 'episode_title': video_data['title'],
+ 'view_count': int_or_none(video_data.get('views')),
+ 'like_count': int_or_none(video_data.get('video_likes')),
+ }
diff --git a/youtube_dl/extractor/walla.py b/youtube_dl/extractor/walla.py
index 8b94883..cbb5486 100644
--- a/youtube_dl/extractor/walla.py
+++ b/youtube_dl/extractor/walla.py
@@ -20,7 +20,7 @@ class WallaIE(InfoExtractor):
'ext': 'flv',
'title': 'וואן דיירקשן: ההיסטריה',
'description': 'md5:de9e2512a92442574cdb0913c49bc4d8',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 3600,
},
'params': {
diff --git a/youtube_dl/extractor/watchindianporn.py b/youtube_dl/extractor/watchindianporn.py
index 5d3b5bd..ed099be 100644
--- a/youtube_dl/extractor/watchindianporn.py
+++ b/youtube_dl/extractor/watchindianporn.py
@@ -22,7 +22,7 @@ class WatchIndianPornIE(InfoExtractor):
'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera',
'ext': 'mp4',
'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'LoveJay',
'upload_date': '20160428',
'duration': 226,
diff --git a/youtube_dl/extractor/webcaster.py b/youtube_dl/extractor/webcaster.py
index 7486cb3..e4b65f5 100644
--- a/youtube_dl/extractor/webcaster.py
+++ b/youtube_dl/extractor/webcaster.py
@@ -20,7 +20,7 @@ class WebcasterIE(InfoExtractor):
'id': 'c8cefd240aa593681c8d068cff59f407_hd',
'ext': 'mp4',
'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126',
diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py
index 7aea47e..1eb1f67 100644
--- a/youtube_dl/extractor/webofstories.py
+++ b/youtube_dl/extractor/webofstories.py
@@ -19,7 +19,7 @@ class WebOfStoriesIE(InfoExtractor):
'id': '4536',
'ext': 'mp4',
'title': 'The temperature of the sun',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'Hans Bethe talks about calculating the temperature of the sun',
'duration': 238,
}
@@ -30,7 +30,7 @@ class WebOfStoriesIE(InfoExtractor):
'id': '55908',
'ext': 'mp4',
'title': 'The story of Gemmata obscuriglobus',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
'duration': 169,
},
@@ -42,7 +42,7 @@ class WebOfStoriesIE(InfoExtractor):
'id': '54215',
'ext': 'mp4',
'title': '"A Leg to Stand On"',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'Oliver Sacks talks about the death and resurrection of a limb',
'duration': 97,
},
@@ -134,7 +134,7 @@ class WebOfStoriesPlaylistIE(InfoExtractor):
entries = [
self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories')
- for video_number in set(re.findall('href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage))
+ for video_number in set(re.findall(r'href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage))
]
title = self._search_regex(
diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py
index 8e09156..7e0befd 100644
--- a/youtube_dl/extractor/weiqitv.py
+++ b/youtube_dl/extractor/weiqitv.py
@@ -37,11 +37,11 @@ class WeiqiTVIE(InfoExtractor):
page = self._download_webpage(url, media_id)
info_json_str = self._search_regex(
- 'var\s+video\s*=\s*(.+});', page, 'info json str')
+ r'var\s+video\s*=\s*(.+});', page, 'info json str')
info_json = self._parse_json(info_json_str, media_id)
letvcloud_url = self._search_regex(
- 'var\s+letvurl\s*=\s*"([^"]+)', page, 'letvcloud url')
+ r'var\s+letvurl\s*=\s*"([^"]+)', page, 'letvcloud url')
return {
'_type': 'url_transparent',
diff --git a/youtube_dl/extractor/xbef.py b/youtube_dl/extractor/xbef.py
index e4a2baa..4c41e98 100644
--- a/youtube_dl/extractor/xbef.py
+++ b/youtube_dl/extractor/xbef.py
@@ -14,7 +14,7 @@ class XBefIE(InfoExtractor):
'ext': 'mp4',
'title': 'md5:7358a9faef8b7b57acda7c04816f170e',
'age_limit': 18,
- 'thumbnail': 're:^http://.*\.jpg',
+ 'thumbnail': r're:^http://.*\.jpg',
}
}
diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py
index de344ba..e616adc 100644
--- a/youtube_dl/extractor/xfileshare.py
+++ b/youtube_dl/extractor/xfileshare.py
@@ -44,7 +44,7 @@ class XFileShareIE(InfoExtractor):
'id': '06y9juieqpmi',
'ext': 'mp4',
'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
},
}, {
'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
@@ -56,7 +56,7 @@ class XFileShareIE(InfoExtractor):
'id': '3rso4kdn6f9m',
'ext': 'mp4',
'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
}
}, {
'url': 'http://movpod.in/0wguyyxi1yca',
@@ -67,7 +67,7 @@ class XFileShareIE(InfoExtractor):
'id': '3ivfabn7573c',
'ext': 'mp4',
'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
},
'skip': 'Video removed',
}, {
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index bd8e1af..36a8c98 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -5,8 +5,8 @@ import re
from .common import InfoExtractor
from ..utils import (
dict_get,
- float_or_none,
int_or_none,
+ parse_duration,
unified_strdate,
)
@@ -22,7 +22,7 @@ class XHamsterIE(InfoExtractor):
'title': 'FemaleAgent Shy beauty takes the bait',
'upload_date': '20121014',
'uploader': 'Ruseful2011',
- 'duration': 893.52,
+ 'duration': 893,
'age_limit': 18,
},
}, {
@@ -33,7 +33,7 @@ class XHamsterIE(InfoExtractor):
'title': 'Britney Spears Sexy Booty',
'upload_date': '20130914',
'uploader': 'jojo747400',
- 'duration': 200.48,
+ 'duration': 200,
'age_limit': 18,
},
'params': {
@@ -48,7 +48,7 @@ class XHamsterIE(InfoExtractor):
'title': '....',
'upload_date': '20160208',
'uploader': 'parejafree',
- 'duration': 72.0,
+ 'duration': 72,
'age_limit': 18,
},
'params': {
@@ -101,9 +101,9 @@ class XHamsterIE(InfoExtractor):
r'''<video[^>]+poster=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''],
webpage, 'thumbnail', fatal=False, group='thumbnail')
- duration = float_or_none(self._search_regex(
- r'(["\'])duration\1\s*:\s*(["\'])(?P<duration>.+?)\2',
- webpage, 'duration', fatal=False, group='duration'))
+ duration = parse_duration(self._search_regex(
+ r'Runtime:\s*</span>\s*([\d:]+)', webpage,
+ 'duration', fatal=False))
view_count = int_or_none(self._search_regex(
r'content=["\']User(?:View|Play)s:(\d+)',
diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py
index 86abef2..d017e03 100644
--- a/youtube_dl/extractor/xiami.py
+++ b/youtube_dl/extractor/xiami.py
@@ -16,7 +16,9 @@ class XiamiBaseIE(InfoExtractor):
return webpage
def _extract_track(self, track, track_id=None):
- title = track['title']
+ track_name = track.get('songName') or track.get('name') or track['subName']
+ artist = track.get('artist') or track.get('artist_name') or track.get('singers')
+ title = '%s - %s' % (artist, track_name) if artist else track_name
track_url = self._decrypt(track['location'])
subtitles = {}
@@ -31,9 +33,10 @@ class XiamiBaseIE(InfoExtractor):
'thumbnail': track.get('pic') or track.get('album_pic'),
'duration': int_or_none(track.get('length')),
'creator': track.get('artist', '').split(';')[0],
- 'track': title,
- 'album': track.get('album_name'),
- 'artist': track.get('artist'),
+ 'track': track_name,
+ 'track_number': int_or_none(track.get('track')),
+ 'album': track.get('album_name') or track.get('title'),
+ 'artist': artist,
'subtitles': subtitles,
}
@@ -68,14 +71,14 @@ class XiamiBaseIE(InfoExtractor):
class XiamiSongIE(XiamiBaseIE):
IE_NAME = 'xiami:song'
IE_DESC = '虾米音乐'
- _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.xiami.com/song/1775610518',
'md5': '521dd6bea40fd5c9c69f913c232cb57e',
'info_dict': {
'id': '1775610518',
'ext': 'mp3',
- 'title': 'Woman',
+ 'title': 'HONNE - Woman',
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
'duration': 265,
'creator': 'HONNE',
@@ -95,7 +98,7 @@ class XiamiSongIE(XiamiBaseIE):
'info_dict': {
'id': '1775256504',
'ext': 'mp3',
- 'title': '悟空',
+ 'title': '戴荃 - 悟空',
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
'duration': 200,
'creator': '戴荃',
@@ -109,6 +112,26 @@ class XiamiSongIE(XiamiBaseIE):
},
},
'skip': 'Georestricted',
+ }, {
+ 'url': 'http://www.xiami.com/song/1775953850',
+ 'info_dict': {
+ 'id': '1775953850',
+ 'ext': 'mp3',
+ 'title': 'До Скону - Чума Пожирает Землю',
+ 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
+ 'duration': 683,
+ 'creator': 'До Скону',
+ 'track': 'Чума Пожирает Землю',
+ 'track_number': 7,
+ 'album': 'Ад',
+ 'artist': 'До Скону',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.xiami.com/song/xLHGwgd07a1',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -124,7 +147,7 @@ class XiamiPlaylistBaseIE(XiamiBaseIE):
class XiamiAlbumIE(XiamiPlaylistBaseIE):
IE_NAME = 'xiami:album'
IE_DESC = '虾米音乐 - 专辑'
- _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[^/?#&]+)'
_TYPE = '1'
_TESTS = [{
'url': 'http://www.xiami.com/album/2100300444',
@@ -136,28 +159,34 @@ class XiamiAlbumIE(XiamiPlaylistBaseIE):
}, {
'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
'only_matching': True,
+ }, {
+ 'url': 'http://www.xiami.com/album/URVDji2a506',
+ 'only_matching': True,
}]
class XiamiArtistIE(XiamiPlaylistBaseIE):
IE_NAME = 'xiami:artist'
IE_DESC = '虾米音乐 - 歌手'
- _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[^/?#&]+)'
_TYPE = '2'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
'info_dict': {
'id': '2132',
},
'playlist_count': 20,
'skip': 'Georestricted',
- }
+ }, {
+ 'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99',
+ 'only_matching': True,
+ }]
class XiamiCollectionIE(XiamiPlaylistBaseIE):
IE_NAME = 'xiami:collection'
IE_DESC = '虾米音乐 - 精选集'
- _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[^/?#&]+)'
_TYPE = '3'
_TEST = {
'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py
index 4b9c1ee..e081820 100644
--- a/youtube_dl/extractor/xuite.py
+++ b/youtube_dl/extractor/xuite.py
@@ -24,7 +24,7 @@ class XuiteIE(InfoExtractor):
'id': '3860914',
'ext': 'mp3',
'title': '孤單南半球-歐德陽',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 247.246,
'timestamp': 1314932940,
'upload_date': '20110902',
@@ -40,7 +40,7 @@ class XuiteIE(InfoExtractor):
'id': '25925099',
'ext': 'mp4',
'title': 'BigBuckBunny_320x180',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 596.458,
'timestamp': 1454242500,
'upload_date': '20160131',
@@ -58,7 +58,7 @@ class XuiteIE(InfoExtractor):
'ext': 'mp4',
'title': '暗殺教室 02',
'description': '字幕:【極影字幕社】',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1384.907,
'timestamp': 1421481240,
'upload_date': '20150117',
diff --git a/youtube_dl/extractor/yesjapan.py b/youtube_dl/extractor/yesjapan.py
index 112a6c0..681338c 100644
--- a/youtube_dl/extractor/yesjapan.py
+++ b/youtube_dl/extractor/yesjapan.py
@@ -21,7 +21,7 @@ class YesJapanIE(InfoExtractor):
'ext': 'mp4',
'timestamp': 1416391590,
'upload_date': '20141119',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
}
}
diff --git a/youtube_dl/extractor/yinyuetai.py b/youtube_dl/extractor/yinyuetai.py
index 834d860..1fd8d35 100644
--- a/youtube_dl/extractor/yinyuetai.py
+++ b/youtube_dl/extractor/yinyuetai.py
@@ -18,7 +18,7 @@ class YinYueTaiIE(InfoExtractor):
'title': '少女时代_PARTY_Music Video Teaser',
'creator': '少女时代',
'duration': 25,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
'url': 'http://v.yinyuetai.com/video/h5/2322376',
diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py
index 0d943c3..c4ae4d8 100644
--- a/youtube_dl/extractor/ynet.py
+++ b/youtube_dl/extractor/ynet.py
@@ -17,7 +17,7 @@ class YnetIE(InfoExtractor):
'id': 'L-11659-99244',
'ext': 'flv',
'title': 'איש לא יודע מאיפה באנו',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
}
}, {
'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html',
@@ -25,7 +25,7 @@ class YnetIE(InfoExtractor):
'id': 'L-8859-84418',
'ext': 'flv',
'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין",
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg',
}
}
]
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index 0265a64..34ab878 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -24,7 +24,7 @@ class YouPornIE(InfoExtractor):
'ext': 'mp4',
'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Ask Dan And Jennifer',
'upload_date': '20101221',
'average_rating': int,
@@ -43,7 +43,7 @@ class YouPornIE(InfoExtractor):
'ext': 'mp4',
'title': 'Big Tits Awesome Brunette On amazing webcam show',
'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Unknown',
'upload_date': '20111125',
'average_rating': int,
diff --git a/youtube_dl/extractor/yourupload.py b/youtube_dl/extractor/yourupload.py
index 4e25d6f..9fa7728 100644
--- a/youtube_dl/extractor/yourupload.py
+++ b/youtube_dl/extractor/yourupload.py
@@ -2,44 +2,37 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import urljoin
class YourUploadIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?
- (?:yourupload\.com/watch|
- embed\.yourupload\.com|
- embed\.yucache\.net
- )/(?P<id>[A-Za-z0-9]+)
- '''
- _TESTS = [
- {
- 'url': 'http://yourupload.com/watch/14i14h',
- 'md5': '5e2c63385454c557f97c4c4131a393cd',
- 'info_dict': {
- 'id': '14i14h',
- 'ext': 'mp4',
- 'title': 'BigBuckBunny_320x180.mp4',
- 'thumbnail': 're:^https?://.*\.jpe?g',
- }
- },
- {
- 'url': 'http://embed.yourupload.com/14i14h',
- 'only_matching': True,
- },
- {
- 'url': 'http://embed.yucache.net/14i14h?client_file_id=803349',
- 'only_matching': True,
- },
- ]
+ _VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P<id>[A-Za-z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://yourupload.com/watch/14i14h',
+ 'md5': '5e2c63385454c557f97c4c4131a393cd',
+ 'info_dict': {
+ 'id': '14i14h',
+ 'ext': 'mp4',
+ 'title': 'BigBuckBunny_320x180.mp4',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ }
+ }, {
+ 'url': 'http://www.yourupload.com/embed/14i14h',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://embed.yourupload.com/14i14h',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- embed_url = 'http://embed.yucache.net/{0:}'.format(video_id)
+ embed_url = 'http://www.yourupload.com/embed/%s' % video_id
+
webpage = self._download_webpage(embed_url, video_id)
title = self._og_search_title(webpage)
- video_url = self._og_search_video_url(webpage)
+ video_url = urljoin(embed_url, self._og_search_video_url(webpage))
thumbnail = self._og_search_thumbnail(webpage, default=None)
return {
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index bd24a28..7671093 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -40,6 +40,7 @@ from ..utils import (
sanitized_Request,
smuggle_url,
str_to_int,
+ try_get,
unescapeHTML,
unified_strdate,
unsmuggle_url,
@@ -316,6 +317,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
+ '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
@@ -327,6 +329,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
+ '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'preference': -50, 'container': 'm4a_dash'},
+ '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'preference': -50, 'container': 'm4a_dash'},
# Dash webm
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
@@ -376,12 +380,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
'upload_date': '20121002',
'license': 'Standard YouTube License',
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
+ 'duration': 10,
'like_count': int,
'dislike_count': int,
'start_time': 1,
@@ -401,9 +406,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
'iconic ep', 'iconic', 'love', 'it'],
+ 'duration': 180,
'uploader': 'Icona Pop',
'uploader_id': 'IconaPop',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
'license': 'Standard YouTube License',
'creator': 'Icona Pop',
}
@@ -418,9 +424,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
'alt_title': 'Tunnel Vision',
'description': 'md5:64249768eec3bc4276236606ea996373',
+ 'duration': 419,
'uploader': 'justintimberlakeVEVO',
'uploader_id': 'justintimberlakeVEVO',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
'license': 'Standard YouTube License',
'creator': 'Justin Timberlake',
'age_limit': 18,
@@ -437,7 +444,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
'uploader': 'SET India',
'uploader_id': 'setindia',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
'license': 'Standard YouTube License',
'age_limit': 18,
}
@@ -451,12 +458,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
'upload_date': '20121002',
'license': 'Standard YouTube License',
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
+ 'duration': 10,
'like_count': int,
'dislike_count': int,
},
@@ -472,7 +480,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'm4a',
'upload_date': '20121002',
'uploader_id': '8KVIDEO',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
'description': '',
'uploader': '8KVIDEO',
'license': 'Standard YouTube License',
@@ -492,6 +500,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'm4a',
'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
+ 'duration': 244,
'uploader': 'AfrojackVEVO',
'uploader_id': 'AfrojackVEVO',
'upload_date': '20131011',
@@ -511,6 +520,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'Taylor Swift - Shake It Off',
'alt_title': 'Shake It Off',
'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
+ 'duration': 242,
'uploader': 'TaylorSwiftVEVO',
'uploader_id': 'TaylorSwiftVEVO',
'upload_date': '20140818',
@@ -528,10 +538,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'info_dict': {
'id': 'T4XJQO3qol8',
'ext': 'mp4',
+ 'duration': 219,
'upload_date': '20100909',
'uploader': 'The Amazing Atheist',
'uploader_id': 'TheAmazingAtheist',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
'license': 'Standard YouTube License',
'title': 'Burning Everyone\'s Koran',
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
@@ -544,10 +555,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'HtVdAasjOgU',
'ext': 'mp4',
'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
- 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
+ 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
+ 'duration': 142,
'uploader': 'The Witcher',
'uploader_id': 'WitcherGame',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
'upload_date': '20140605',
'license': 'Standard YouTube License',
'age_limit': 18,
@@ -561,9 +573,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
+ 'duration': 247,
'uploader': 'LloydVEVO',
'uploader_id': 'LloydVEVO',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
'upload_date': '20110629',
'license': 'Standard YouTube License',
'age_limit': 18,
@@ -575,9 +588,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'info_dict': {
'id': '__2ABJjxzNo',
'ext': 'mp4',
+ 'duration': 266,
'upload_date': '20100430',
'uploader_id': 'deadmau5',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
'creator': 'deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5',
@@ -595,9 +609,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'info_dict': {
'id': 'lqQg6PlCWgI',
'ext': 'mp4',
+ 'duration': 6085,
'upload_date': '20150827',
'uploader_id': 'olympic',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
'license': 'Standard YouTube License',
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
'uploader': 'Olympic',
@@ -614,9 +629,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': '_b-2C3KPAM0',
'ext': 'mp4',
'stretched_ratio': 16 / 9.,
+ 'duration': 85,
'upload_date': '20110310',
'uploader_id': 'AllenMeow',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
'uploader': '孫艾倫',
'license': 'Standard YouTube License',
@@ -648,9 +664,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'title': 'md5:7b81415841e02ecd4313668cde88737a',
'description': 'md5:116377fd2963b81ec4ce64b542173306',
+ 'duration': 220,
'upload_date': '20150625',
'uploader_id': 'dorappi2000',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
'uploader': 'dorappi2000',
'license': 'Standard YouTube License',
'formats': 'mincount:32',
@@ -690,10 +707,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7335,
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
'license': 'Standard YouTube License',
},
}, {
@@ -702,10 +720,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7337,
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
'license': 'Standard YouTube License',
},
}, {
@@ -714,10 +733,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7337,
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
'license': 'Standard YouTube License',
},
}, {
@@ -726,10 +746,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'title': 'teamPGP: Rocket League Noob Stream (zim)',
'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7334,
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
'license': 'Standard YouTube License',
},
}],
@@ -767,9 +788,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
'alt_title': 'Dark Walk',
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
+ 'duration': 133,
'upload_date': '20151119',
'uploader_id': 'IronSoulElf',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
'uploader': 'IronSoulElf',
'license': 'Standard YouTube License',
'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
@@ -808,10 +830,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'title': 'md5:e41008789470fc2533a3252216f1c1d1',
'description': 'md5:a677553cf0840649b731a3024aeff4cc',
+ 'duration': 721,
'upload_date': '20150127',
'uploader_id': 'BerkmanCenter',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
- 'uploader': 'BerkmanCenter',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
+ 'uploader': 'The Berkman Klein Center for Internet & Society',
'license': 'Creative Commons Attribution license (reuse allowed)',
},
'params': {
@@ -826,10 +849,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
'description': 'md5:dda0d780d5a6e120758d1711d062a867',
+ 'duration': 4060,
'upload_date': '20151119',
'uploader': 'Bernie 2016',
'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
'license': 'Creative Commons Attribution license (reuse allowed)',
},
'params': {
@@ -856,12 +880,42 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20150811',
'uploader': 'FlixMatrix',
'uploader_id': 'FlixMatrixKaravan',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
'license': 'Standard YouTube License',
},
'params': {
'skip_download': True,
},
+ },
+ {
+ # YouTube Red video with episode data
+ 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
+ 'info_dict': {
+ 'id': 'iqKdEhx-dD4',
+ 'ext': 'mp4',
+ 'title': 'Isolation - Mind Field (Ep 1)',
+ 'description': 'md5:8013b7ddea787342608f63a13ddc9492',
+ 'duration': 2085,
+ 'upload_date': '20170118',
+ 'uploader': 'Vsauce',
+ 'uploader_id': 'Vsauce',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
+ 'license': 'Standard YouTube License',
+ 'series': 'Mind Field',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Skipping DASH manifest',
+ ],
+ },
+ {
+ # itag 212
+ 'url': '1t24XAntNCY',
+ 'only_matching': True,
}
]
@@ -976,8 +1030,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _parse_sig_js(self, jscode):
funcname = self._search_regex(
- r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
- 'Initial JS player signature function name')
+ (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
+ jscode, 'Initial JS player signature function name', group='sig')
jsi = JSInterpreter(jscode)
initial_function = jsi.extract_function(funcname)
@@ -998,6 +1053,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if player_url.startswith('//'):
player_url = 'https:' + player_url
+ elif not re.match(r'https?://', player_url):
+ player_url = compat_urlparse.urljoin(
+ 'https://www.youtube.com', player_url)
try:
player_id = (player_url, self._signature_cache_id(s))
if player_id not in self._player_cache:
@@ -1448,6 +1506,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else:
video_alt_title = video_creator = None
+ m_episode = re.search(
+ r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
+ video_webpage)
+ if m_episode:
+ series = m_episode.group('series')
+ season_number = int(m_episode.group('season'))
+ episode_number = int(m_episode.group('episode'))
+ else:
+ series = season_number = episode_number = None
+
m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
video_webpage, 'categories', default=None)
@@ -1476,11 +1544,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_subtitles = self.extract_subtitles(video_id, video_webpage)
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
- if 'length_seconds' not in video_info:
- self._downloader.report_warning('unable to extract video duration')
- video_duration = None
- else:
- video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
+ video_duration = try_get(
+ video_info, lambda x: int_or_none(x['length_seconds'][0]))
+ if not video_duration:
+ video_duration = parse_duration(self._html_search_meta(
+ 'duration', video_webpage, 'video duration'))
# annotations
video_annotations = None
@@ -1737,6 +1805,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'is_live': is_live,
'start_time': start_time,
'end_time': end_time,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
}
@@ -1788,13 +1859,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
)
(
- (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
+ (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots
|(?:MC)[\w\.]*
)
.*
|
- ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
+ ((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,})
)"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
@@ -1813,6 +1884,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'title': 'YDL_Empty_List',
},
'playlist_count': 0,
+ 'skip': 'This playlist is private',
}, {
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
@@ -1844,6 +1916,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
},
'playlist_count': 2,
+ 'skip': 'This playlist is private',
}, {
'note': 'embedded',
'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
@@ -1877,7 +1950,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'title': "Smiley's People 01 detective, Adventure Series, Action",
'uploader': 'STREEM',
'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
'upload_date': '20150526',
'license': 'Standard YouTube License',
'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
@@ -1898,7 +1971,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'title': 'Small Scale Baler and Braiding Rugs',
'uploader': 'Backus-Page House Museum',
'uploader_id': 'backuspagemuseum',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
'upload_date': '20161008',
'license': 'Standard YouTube License',
'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
@@ -1914,6 +1987,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
}, {
'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
'only_matching': True,
+ }, {
+ 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
+ 'only_matching': True,
}]
def _real_initialize(self):
@@ -1955,14 +2031,18 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
- for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
+ # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
+ for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
match = match.strip()
# Check if the playlist exists or is private
- if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
- raise ExtractorError(
- 'The playlist doesn\'t exist or is private, use --username or '
- '--netrc to access it.',
- expected=True)
+ mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
+ if mobj:
+ reason = mobj.group('reason')
+ message = 'This playlist %s' % reason
+ if 'private' in reason:
+ message += ', use --username or --netrc to access it'
+ message += '.'
+ raise ExtractorError(message, expected=True)
elif re.match(r'[^<]*Invalid parameters[^<]*', match):
raise ExtractorError(
'Invalid parameters. Maybe URL is incorrect.',
@@ -2186,7 +2266,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
'title': 'The Young Turks - Live Main Show',
'uploader': 'The Young Turks',
'uploader_id': 'TheYoungTurks',
- 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
'upload_date': '20150715',
'license': 'Standard YouTube License',
'description': 'md5:438179573adcdff3c97ebb1ee632b891',
@@ -2270,18 +2350,18 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
videos = []
limit = n
+ url_query = {
+ 'search_query': query.encode('utf-8'),
+ }
+ url_query.update(self._EXTRA_QUERY_ARGS)
+ result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
+
for pagenum in itertools.count(1):
- url_query = {
- 'search_query': query.encode('utf-8'),
- 'page': pagenum,
- 'spf': 'navigate',
- }
- url_query.update(self._EXTRA_QUERY_ARGS)
- result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
data = self._download_json(
result_url, video_id='query "%s"' % query,
note='Downloading page %s' % pagenum,
- errnote='Unable to download API page')
+ errnote='Unable to download API page',
+ query={'spf': 'navigate'})
html_content = data[1]['body']['content']
if 'class="search-message' in html_content:
@@ -2293,6 +2373,12 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
videos += new_videos
if not new_videos or len(videos) > limit:
break
+ next_link = self._html_search_regex(
+ r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
+ html_content, 'next link', default=None)
+ if next_link is None:
+ break
+ result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
if len(videos) > n:
videos = videos[:n]
diff --git a/youtube_dl/extractor/zapiks.py b/youtube_dl/extractor/zapiks.py
index 22a9a57..bacb82e 100644
--- a/youtube_dl/extractor/zapiks.py
+++ b/youtube_dl/extractor/zapiks.py
@@ -24,7 +24,7 @@ class ZapiksIE(InfoExtractor):
'ext': 'mp4',
'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 528,
'timestamp': 1359044972,
'upload_date': '20130124',
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index 2ef1772..a365923 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -1,262 +1,312 @@
# coding: utf-8
from __future__ import unicode_literals
-import functools
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
- int_or_none,
- unified_strdate,
- OnDemandPagedList,
- xpath_text,
determine_ext,
+ int_or_none,
+ NO_DEFAULT,
+ orderedSet,
+ parse_codecs,
qualities,
- float_or_none,
- ExtractorError,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ urljoin,
)
-class ZDFIE(InfoExtractor):
- _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
+class ZDFBaseIE(InfoExtractor):
+ def _call_api(self, url, player, referrer, video_id):
+ return self._download_json(
+ url, video_id, 'Downloading JSON content',
+ headers={
+ 'Referer': referrer,
+ 'Api-Auth': 'Bearer %s' % player['apiToken'],
+ })
+
+ def _extract_player(self, webpage, video_id, fatal=True):
+ return self._parse_json(
+ self._search_regex(
+ r'(?s)data-zdfplayer-jsb=(["\'])(?P<json>{.+?})\1', webpage,
+ 'player JSON', default='{}' if not fatal else NO_DEFAULT,
+ group='json'),
+ video_id)
+
+
+class ZDFIE(ZDFBaseIE):
+ _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
+ _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
_TESTS = [{
- 'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
+ 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
'info_dict': {
- 'id': '2037704',
- 'ext': 'webm',
- 'title': 'ZDFspezial - Ende des Machtpokers',
- 'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".',
- 'duration': 1022,
- 'uploader': 'spezial',
- 'uploader_id': '225948',
- 'upload_date': '20131127',
- },
- 'skip': 'Videos on ZDF.de are depublicised in short order',
+ 'id': 'zdfmediathek-trailer-100',
+ 'ext': 'mp4',
+ 'title': 'Die neue ZDFmediathek',
+ 'description': 'md5:3003d36487fb9a5ea2d1ff60beb55e8d',
+ 'duration': 30,
+ 'timestamp': 1477627200,
+ 'upload_date': '20161028',
+ }
+ }, {
+ 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
+ 'only_matching': True,
}]
- def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
- param_groups = {}
- for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
- group_id = param_group.attrib.get(self._xpath_ns('id', 'http://www.w3.org/XML/1998/namespace'))
- params = {}
- for param in param_group:
- params[param.get('name')] = param.get('value')
- param_groups[group_id] = params
+ @staticmethod
+ def _extract_subtitles(src):
+ subtitles = {}
+ for caption in try_get(src, lambda x: x['captions'], list) or []:
+ subtitle_url = caption.get('uri')
+ if subtitle_url and isinstance(subtitle_url, compat_str):
+ lang = caption.get('language', 'deu')
+ subtitles.setdefault(lang, []).append({
+ 'url': subtitle_url,
+ })
+ return subtitles
+
+ def _extract_format(self, video_id, formats, format_urls, meta):
+ format_url = meta.get('url')
+ if not format_url or not isinstance(format_url, compat_str):
+ return
+ if format_url in format_urls:
+ return
+ format_urls.add(format_url)
+ mime_type = meta.get('mimeType')
+ ext = determine_ext(format_url)
+ if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id='hls',
+ entry_protocol='m3u8_native', fatal=False))
+ elif mime_type == 'application/f4m+xml' or ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
+ else:
+ f = parse_codecs(meta.get('mimeCodec'))
+ format_id = ['http']
+ for p in (meta.get('type'), meta.get('quality')):
+ if p and isinstance(p, compat_str):
+ format_id.append(p)
+ f.update({
+ 'url': format_url,
+ 'format_id': '-'.join(format_id),
+ 'format_note': meta.get('quality'),
+ 'language': meta.get('language'),
+ 'quality': qualities(self._QUALITIES)(meta.get('quality')),
+ 'preference': -10,
+ })
+ formats.append(f)
+
+ def _extract_entry(self, url, content, video_id):
+ title = content.get('title') or content['teaserHeadline']
+
+ t = content['mainVideoContent']['http://zdf.de/rels/target']
+
+ ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
+
+ if not ptmd_path:
+ ptmd_path = t[
+ 'http://zdf.de/rels/streams/ptmd-template'].replace(
+ '{playerId}', 'portal')
+
+ ptmd = self._download_json(urljoin(url, ptmd_path), video_id)
formats = []
- for video in smil.findall(self._xpath_ns('.//video', namespace)):
- src = video.get('src')
- if not src:
+ track_uris = set()
+ for p in ptmd['priorityList']:
+ formitaeten = p.get('formitaeten')
+ if not isinstance(formitaeten, list):
continue
- bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
- group_id = video.get('paramGroup')
- param_group = param_groups[group_id]
- for proto in param_group['protocols'].split(','):
- formats.append({
- 'url': '%s://%s' % (proto, param_group['host']),
- 'app': param_group['app'],
- 'play_path': src,
- 'ext': 'flv',
- 'format_id': '%s-%d' % (proto, bitrate),
- 'tbr': bitrate,
- })
+ for f in formitaeten:
+ f_qualities = f.get('qualities')
+ if not isinstance(f_qualities, list):
+ continue
+ for quality in f_qualities:
+ tracks = try_get(quality, lambda x: x['audio']['tracks'], list)
+ if not tracks:
+ continue
+ for track in tracks:
+ self._extract_format(
+ video_id, formats, track_uris, {
+ 'url': track.get('uri'),
+ 'type': f.get('type'),
+ 'mimeType': f.get('mimeType'),
+ 'quality': quality.get('quality'),
+ 'language': track.get('language'),
+ })
self._sort_formats(formats)
- return formats
-
- def extract_from_xml_url(self, video_id, xml_url):
- doc = self._download_xml(
- xml_url, video_id,
- note='Downloading video info',
- errnote='Failed to download video info')
-
- status_code = doc.find('./status/statuscode')
- if status_code is not None and status_code.text != 'ok':
- code = status_code.text
- if code == 'notVisibleAnymore':
- message = 'Video %s is not available' % video_id
- else:
- message = '%s returned error: %s' % (self.IE_NAME, code)
- raise ExtractorError(message, expected=True)
-
- title = doc.find('.//information/title').text
- description = xpath_text(doc, './/information/detail', 'description')
- duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
- uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
- uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
- upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))
- subtitles = {}
- captions_url = doc.find('.//caption/url')
- if captions_url is not None:
- subtitles['de'] = [{
- 'url': captions_url.text,
- 'ext': 'ttml',
- }]
-
- def xml_to_thumbnails(fnode):
- thumbnails = []
- for node in fnode:
- thumbnail_url = node.text
- if not thumbnail_url:
+
+ thumbnails = []
+ layouts = try_get(
+ content, lambda x: x['teaserImageRef']['layouts'], dict)
+ if layouts:
+ for layout_key, layout_url in layouts.items():
+ if not isinstance(layout_url, compat_str):
continue
thumbnail = {
- 'url': thumbnail_url,
+ 'url': layout_url,
+ 'format_id': layout_key,
}
- if 'key' in node.attrib:
- m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key'])
- if m:
- thumbnail['width'] = int(m.group(1))
- thumbnail['height'] = int(m.group(2))
+ mobj = re.search(r'(?P<width>\d+)x(?P<height>\d+)', layout_key)
+ if mobj:
+ thumbnail.update({
+ 'width': int(mobj.group('width')),
+ 'height': int(mobj.group('height')),
+ })
thumbnails.append(thumbnail)
- return thumbnails
- thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage'))
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': content.get('leadParagraph') or content.get('teasertext'),
+ 'duration': int_or_none(t.get('duration')),
+ 'timestamp': unified_timestamp(content.get('editorialDate')),
+ 'thumbnails': thumbnails,
+ 'subtitles': self._extract_subtitles(ptmd),
+ 'formats': formats,
+ }
- format_nodes = doc.findall('.//formitaeten/formitaet')
- quality = qualities(['veryhigh', 'high', 'med', 'low'])
+ def _extract_regular(self, url, player, video_id):
+ content = self._call_api(player['content'], player, url, video_id)
+ return self._extract_entry(player['content'], content, video_id)
- def get_quality(elem):
- return quality(xpath_text(elem, 'quality'))
- format_nodes.sort(key=get_quality)
- format_ids = []
- formats = []
- for fnode in format_nodes:
- video_url = fnode.find('url').text
- is_available = 'http://www.metafilegenerator' not in video_url
- if not is_available:
- continue
- format_id = fnode.attrib['basetype']
- quality = xpath_text(fnode, './quality', 'quality')
- format_m = re.match(r'''(?x)
- (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
- (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
- ''', format_id)
-
- ext = determine_ext(video_url, None) or format_m.group('container')
- if ext not in ('smil', 'f4m', 'm3u8'):
- format_id = format_id + '-' + quality
- if format_id in format_ids:
- continue
+ def _extract_mobile(self, video_id):
+ document = self._download_json(
+ 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
+ video_id)['document']
- if ext == 'meta':
- continue
- elif ext == 'smil':
- formats.extend(self._extract_smil_formats(
- video_url, video_id, fatal=False))
- elif ext == 'm3u8':
- # the certificates are misconfigured (see
- # https://github.com/rg3/youtube-dl/issues/8665)
- if video_url.startswith('https://'):
- continue
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- video_url, video_id, f4m_id=format_id, fatal=False))
- else:
- proto = format_m.group('proto').lower()
-
- abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000)
- vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000)
-
- width = int_or_none(xpath_text(fnode, './width', 'width'))
- height = int_or_none(xpath_text(fnode, './height', 'height'))
-
- filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize'))
-
- format_note = ''
- if not format_note:
- format_note = None
-
- formats.append({
- 'format_id': format_id,
- 'url': video_url,
- 'ext': ext,
- 'acodec': format_m.group('acodec'),
- 'vcodec': format_m.group('vcodec'),
- 'abr': abr,
- 'vbr': vbr,
- 'width': width,
- 'height': height,
- 'filesize': filesize,
- 'format_note': format_note,
- 'protocol': proto,
- '_available': is_available,
- })
- format_ids.append(format_id)
+ title = document['titel']
+ formats = []
+ format_urls = set()
+ for f in document['formitaeten']:
+ self._extract_format(video_id, formats, format_urls, f)
self._sort_formats(formats)
+ thumbnails = []
+ teaser_bild = document.get('teaserBild')
+ if isinstance(teaser_bild, dict):
+ for thumbnail_key, thumbnail in teaser_bild.items():
+ thumbnail_url = try_get(
+ thumbnail, lambda x: x['url'], compat_str)
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'id': thumbnail_key,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
return {
'id': video_id,
'title': title,
- 'description': description,
- 'duration': duration,
+ 'description': document.get('beschreibung'),
+ 'duration': int_or_none(document.get('length')),
+ 'timestamp': unified_timestamp(try_get(
+ document, lambda x: x['meta']['editorialDate'], compat_str)),
'thumbnails': thumbnails,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'upload_date': upload_date,
+ 'subtitles': self._extract_subtitles(document),
'formats': formats,
- 'subtitles': subtitles,
}
def _real_extract(self, url):
video_id = self._match_id(url)
- xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
- return self.extract_from_xml_url(video_id, xml_url)
+ webpage = self._download_webpage(url, video_id, fatal=False)
+ if webpage:
+ player = self._extract_player(webpage, url, fatal=False)
+ if player:
+ return self._extract_regular(url, player, video_id)
+
+ return self._extract_mobile(video_id)
-class ZDFChannelIE(InfoExtractor):
- _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P<id>[0-9]+)'
+
+class ZDFChannelIE(ZDFBaseIE):
+ _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
- 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic',
+ 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
'info_dict': {
- 'id': '1586442',
+ 'id': 'das-aktuelle-sportstudio',
+ 'title': 'das aktuelle sportstudio | ZDF',
},
- 'playlist_count': 3,
- }, {
- 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/aktuellste/332',
- 'only_matching': True,
+ 'playlist_count': 21,
}, {
- 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/meist-gesehen/332',
- 'only_matching': True,
+ 'url': 'https://www.zdf.de/dokumentation/planet-e',
+ 'info_dict': {
+ 'id': 'planet-e',
+ 'title': 'planet e.',
+ },
+ 'playlist_count': 4,
}, {
- 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/_/1798716?bc=nrt;nrm?flash=off',
+ 'url': 'https://www.zdf.de/filme/taunuskrimi/',
'only_matching': True,
}]
- _PAGE_SIZE = 50
-
- def _fetch_page(self, channel_id, page):
- offset = page * self._PAGE_SIZE
- xml_url = (
- 'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s'
- % (offset, self._PAGE_SIZE, channel_id))
- doc = self._download_xml(
- xml_url, channel_id,
- note='Downloading channel info',
- errnote='Failed to download channel info')
-
- title = doc.find('.//information/title').text
- description = doc.find('.//information/detail').text
- for asset in doc.findall('.//teasers/teaser'):
- a_type = asset.find('./type').text
- a_id = asset.find('./details/assetId').text
- if a_type not in ('video', 'topic'):
- continue
- yield {
- '_type': 'url',
- 'playlist_title': title,
- 'playlist_description': description,
- 'url': 'zdf:%s:%s' % (a_type, a_id),
- }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
def _real_extract(self, url):
channel_id = self._match_id(url)
- entries = OnDemandPagedList(
- functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE)
- return {
- '_type': 'playlist',
- 'id': channel_id,
- 'entries': entries,
- }
+ webpage = self._download_webpage(url, channel_id)
+
+ entries = [
+ self.url_result(item_url, ie=ZDFIE.ie_key())
+ for item_url in orderedSet(re.findall(
+ r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
+
+ return self.playlist_result(
+ entries, channel_id, self._og_search_title(webpage, fatal=False))
+
+ r"""
+ player = self._extract_player(webpage, channel_id)
+
+ channel_id = self._search_regex(
+ r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
+ 'channel id', group='id')
+
+ channel = self._call_api(
+ 'https://api.zdf.de/content/documents/%s.json' % channel_id,
+ player, url, channel_id)
+
+ items = []
+ for module in channel['module']:
+ for teaser in try_get(module, lambda x: x['teaser'], list) or []:
+ t = try_get(
+ teaser, lambda x: x['http://zdf.de/rels/target'], dict)
+ if not t:
+ continue
+ items.extend(try_get(
+ t,
+ lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
+ list) or [])
+ items.extend(try_get(
+ module,
+ lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
+ list) or [])
+
+ entries = []
+ entry_urls = set()
+ for item in items:
+ t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
+ if not t:
+ continue
+ sharing_url = t.get('http://zdf.de/rels/sharing-url')
+ if not sharing_url or not isinstance(sharing_url, compat_str):
+ continue
+ if sharing_url in entry_urls:
+ continue
+ entry_urls.add(sharing_url)
+ entries.append(self.url_result(
+ sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
+
+ return self.playlist_result(entries, channel_id, channel.get('title'))
+ """
diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py
index 0f0e9d0..adfdcaa 100644
--- a/youtube_dl/extractor/zingmp3.py
+++ b/youtube_dl/extractor/zingmp3.py
@@ -95,7 +95,7 @@ class ZingMp3IE(ZingMp3BaseInfoExtractor):
'id': 'ZWZB9WAB',
'title': 'Xa Mãi Xa',
'ext': 'mp3',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
'url': 'http://mp3.zing.vn/video-clip/Let-It-Go-Frozen-OST-Sungha-Jung/ZW6BAEA0.html',
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index a8df4ae..24cdec2 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -213,7 +213,7 @@ class JSInterpreter(object):
def extract_object(self, objname):
obj = {}
obj_m = re.search(
- (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
+ (r'(?<!this\.)%s\s*=\s*\{' % re.escape(objname)) +
r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' +
r'\}\s*;',
self.code)
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 53497fb..349f447 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -179,6 +179,10 @@ def parseOpts(overrideArguments=None):
'Do not read the user configuration in ~/.config/youtube-dl/config '
'(%APPDATA%/youtube-dl/config.txt on Windows)')
general.add_option(
+ '--config-location',
+ dest='config_location', metavar='PATH',
+ help='Location of the configuration file; either the path to the config or its containing directory.')
+ general.add_option(
'--flat-playlist',
action='store_const', dest='extract_flat', const='in_playlist',
default=False,
@@ -212,23 +216,23 @@ def parseOpts(overrideArguments=None):
network.add_option(
'--source-address',
metavar='IP', dest='source_address', default=None,
- help='Client-side IP address to bind to (experimental)',
+ help='Client-side IP address to bind to',
)
network.add_option(
'-4', '--force-ipv4',
action='store_const', const='0.0.0.0', dest='source_address',
- help='Make all connections via IPv4 (experimental)',
+ help='Make all connections via IPv4',
)
network.add_option(
'-6', '--force-ipv6',
action='store_const', const='::', dest='source_address',
- help='Make all connections via IPv6 (experimental)',
+ help='Make all connections via IPv6',
)
network.add_option(
'--geo-verification-proxy',
dest='geo_verification_proxy', default=None, metavar='URL',
help='Use this proxy to verify the IP address for some geo-restricted sites. '
- 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
+ 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.'
)
network.add_option(
'--cn-verification-proxy',
@@ -293,7 +297,7 @@ def parseOpts(overrideArguments=None):
'--match-filter',
metavar='FILTER', dest='match_filter', default=None,
help=(
- 'Generic video filter (experimental). '
+ 'Generic video filter. '
'Specify any key (see help for -o for a list of available keys) to'
' match if the key is present, '
'!key to check if the key is not present,'
@@ -341,7 +345,7 @@ def parseOpts(overrideArguments=None):
authentication.add_option(
'-2', '--twofactor',
dest='twofactor', metavar='TWOFACTOR',
- help='Two-factor auth code')
+ help='Two-factor authentication code')
authentication.add_option(
'-n', '--netrc',
action='store_true', dest='usenetrc', default=False,
@@ -446,7 +450,7 @@ def parseOpts(overrideArguments=None):
'--skip-unavailable-fragments',
action='store_true', dest='skip_unavailable_fragments', default=True,
help='Skip unavailable fragments (DASH and hlsnative only)')
- general.add_option(
+ downloader.add_option(
'--abort-on-unavailable-fragment',
action='store_false', dest='skip_unavailable_fragments',
help='Abort downloading when some fragment is not available')
@@ -467,9 +471,13 @@ def parseOpts(overrideArguments=None):
action='store_true',
help='Download playlist videos in reverse order')
downloader.add_option(
+ '--playlist-random',
+ action='store_true',
+ help='Download playlist videos in random order')
+ downloader.add_option(
'--xattr-set-filesize',
dest='xattr_set_filesize', action='store_true',
- help='Set file xattribute ytdl.filesize with expected filesize (experimental)')
+ help='Set file xattribute ytdl.filesize with expected file size (experimental)')
downloader.add_option(
'--hls-prefer-native',
dest='hls_prefer_native', action='store_true', default=None,
@@ -657,8 +665,12 @@ def parseOpts(overrideArguments=None):
help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info'))
filesystem.add_option(
'--autonumber-size',
- dest='autonumber_size', metavar='NUMBER',
- help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
+ dest='autonumber_size', metavar='NUMBER', default=5, type=int,
+ help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given (default is %default)')
+ filesystem.add_option(
+ '--autonumber-start',
+ dest='autonumber_start', metavar='NUMBER', default=1, type=int,
+ help='Specify the start value for %(autonumber)s (default is %default)')
filesystem.add_option(
'--restrict-filenames',
action='store_true', dest='restrictfilenames', default=False,
@@ -747,7 +759,7 @@ def parseOpts(overrideArguments=None):
help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
postproc.add_option(
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
- help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default')
+ help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default; No effect without -x')
postproc.add_option(
'--audio-quality', metavar='QUALITY',
dest='audioquality', default='5',
@@ -845,22 +857,32 @@ def parseOpts(overrideArguments=None):
return conf
command_line_conf = compat_conf(sys.argv[1:])
-
- if '--ignore-config' in command_line_conf:
- system_conf = []
- user_conf = []
+ opts, args = parser.parse_args(command_line_conf)
+
+ system_conf = user_conf = custom_conf = []
+
+ if '--config-location' in command_line_conf:
+ location = compat_expanduser(opts.config_location)
+ if os.path.isdir(location):
+ location = os.path.join(location, 'youtube-dl.conf')
+ if not os.path.exists(location):
+ parser.error('config-location %s does not exist.' % location)
+ custom_conf = _readOptions(location)
+ elif '--ignore-config' in command_line_conf:
+ pass
else:
system_conf = _readOptions('/etc/youtube-dl.conf')
- if '--ignore-config' in system_conf:
- user_conf = []
- else:
+ if '--ignore-config' not in system_conf:
user_conf = _readUserConf()
- argv = system_conf + user_conf + command_line_conf
+ argv = system_conf + user_conf + custom_conf + command_line_conf
opts, args = parser.parse_args(argv)
if opts.verbose:
- write_string('[debug] System config: ' + repr(_hide_login_info(system_conf)) + '\n')
- write_string('[debug] User config: ' + repr(_hide_login_info(user_conf)) + '\n')
- write_string('[debug] Command-line args: ' + repr(_hide_login_info(command_line_conf)) + '\n')
+ for conf_label, conf in (
+ ('System config', system_conf),
+ ('User config', user_conf),
+ ('Custom config', custom_conf),
+ ('Command-line args', command_line_conf)):
+ write_string('[debug] %s: %s\n' % (conf_label, repr(_hide_login_info(conf))))
return parser, opts, args
diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py
index 920573d..164edd3 100644
--- a/youtube_dl/postprocessor/metadatafromtitle.py
+++ b/youtube_dl/postprocessor/metadatafromtitle.py
@@ -12,7 +12,7 @@ class MetadataFromTitlePP(PostProcessor):
self._titleregex = self.format_to_regex(titleformat)
def format_to_regex(self, fmt):
- """
+ r"""
Converts a string like
'%(title)s - %(artist)s'
to a regex like
diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py
index 63d19b3..0f5d7bd 100644
--- a/youtube_dl/socks.py
+++ b/youtube_dl/socks.py
@@ -55,12 +55,12 @@ class Socks5AddressType(object):
ATYP_IPV6 = 0x04
-class ProxyError(IOError):
+class ProxyError(socket.error):
ERR_SUCCESS = 0x00
def __init__(self, code=None, msg=None):
if code is not None and msg is None:
- msg = self.CODES.get(code) and 'unknown error'
+ msg = self.CODES.get(code) or 'unknown error'
super(ProxyError, self).__init__(code, msg)
@@ -123,7 +123,7 @@ class sockssocket(socket.socket):
while len(data) < cnt:
cur = self.recv(cnt - len(data))
if not cur:
- raise IOError('{0} bytes missing'.format(cnt - len(data)))
+ raise EOFError('{0} bytes missing'.format(cnt - len(data)))
data += cur
return data
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 9595bcf..67a847e 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -86,6 +86,11 @@ std_headers = {
}
+USER_AGENTS = {
+ 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
+}
+
+
NO_DEFAULT = object()
ENGLISH_MONTH_NAMES = [
@@ -123,7 +128,13 @@ DATE_FORMATS = (
'%d %B %Y',
'%d %b %Y',
'%B %d %Y',
+ '%B %dst %Y',
+ '%B %dnd %Y',
+ '%B %dth %Y',
'%b %d %Y',
+ '%b %dst %Y',
+ '%b %dnd %Y',
+ '%b %dth %Y',
'%b %dst %Y %I:%M',
'%b %dnd %Y %I:%M',
'%b %dth %Y %I:%M',
@@ -132,6 +143,7 @@ DATE_FORMATS = (
'%Y/%m/%d',
'%Y/%m/%d %H:%M',
'%Y/%m/%d %H:%M:%S',
+ '%Y-%m-%d %H:%M',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
'%d.%m.%Y %H:%M',
@@ -496,7 +508,7 @@ def sanitize_path(s):
if drive_or_unc:
norm_path.pop(0)
sanitized_path = [
- path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part)
+ path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
for path_part in norm_path]
if drive_or_unc:
sanitized_path.insert(0, drive_or_unc + os.path.sep)
@@ -1178,7 +1190,7 @@ def date_from_str(date_str):
return today
if date_str == 'yesterday':
return today - datetime.timedelta(days=1)
- match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
+ match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
if match is not None:
sign = match.group('sign')
time = int(match.group('time'))
@@ -1695,6 +1707,16 @@ def base_url(url):
return re.match(r'https?://[^?#&]+/', url).group()
+def urljoin(base, path):
+ if not isinstance(path, compat_str) or not path:
+ return None
+ if re.match(r'^(?:https?:)?//', path):
+ return path
+ if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base):
+ return None
+ return compat_urlparse.urljoin(base, path)
+
+
class HEADRequest(compat_urllib_request.Request):
def get_method(self):
return 'HEAD'
@@ -1751,7 +1773,7 @@ def parse_duration(s):
s = s.strip()
days, hours, mins, secs, ms = [None] * 5
- m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?$', s)
+ m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
if m:
days, hours, mins, secs, ms = m.groups()
else:
@@ -1768,11 +1790,11 @@ def parse_duration(s):
)?
(?:
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
- )?$''', s)
+ )?Z?$''', s)
if m:
days, hours, mins, secs, ms = m.groups()
else:
- m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)$', s)
+ m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
if m:
hours, mins = m.groups()
else:
@@ -2081,11 +2103,18 @@ def strip_jsonp(code):
def js_to_json(code):
+ COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
+ SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
+ INTEGER_TABLE = (
+ (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
+ (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
+ )
+
def fix_kv(m):
v = m.group(0)
if v in ('true', 'false', 'null'):
return v
- elif v.startswith('/*') or v == ',':
+ elif v.startswith('/*') or v.startswith('//') or v == ',':
return ""
if v[0] in ("'", '"'):
@@ -2096,11 +2125,6 @@ def js_to_json(code):
'\\x': '\\u00',
}.get(m.group(0), m.group(0)), v[1:-1])
- INTEGER_TABLE = (
- (r'^(0[xX][0-9a-fA-F]+)\s*:?$', 16),
- (r'^(0+[0-7]+)\s*:?$', 8),
- )
-
for regex, base in INTEGER_TABLE:
im = re.match(regex, v)
if im:
@@ -2112,11 +2136,11 @@ def js_to_json(code):
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
- /\*.*?\*/|,(?=\s*[\]}])|
+ {comment}|,(?={skip}[\]}}])|
[a-zA-Z_][.a-zA-Z_0-9]*|
- \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
- [0-9]+(?=\s*:)
- ''', fix_kv, code)
+ \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
+ [0-9]+(?={skip}:)
+ '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
def qualities(quality_ids):
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 1acb630..a73e9d8 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2016.12.01'
+__version__ = '2017.02.07'