From: Jaime Marquínez Ferrándiz Date: Sun, 27 Oct 2013 13:40:25 +0000 (+0100) Subject: [generic] Detect bandcamp pages that use custom domains (closes #1662) X-Git-Tag: 2013.10.28~7 X-Git-Url: https://asedeno.scripts.mit.edu/gitweb/?a=commitdiff_plain;h=c19f7764a5499b0f1e1914dd5101619b8d57d7cf;p=youtube-dl.git [generic] Detect bandcamp pages that use custom domains (closes #1662) They embed the original url in the 'og:url' property. --- diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ab4a5b7de5..2c8fcf5ae5 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -41,7 +41,17 @@ class GenericIE(InfoExtractor): u"uploader_id": u"skillsmatter", u"uploader": u"Skills Matter", } - } + }, + # bandcamp page with custom domain + { + u'url': u'http://bronyrock.com/track/the-pony-mash', + u'file': u'3235767654.mp3', + u'info_dict': { + u'title': u'The Pony Mash', + u'uploader': u'M_Pallante', + }, + u'skip': u'There is a limit of 200 free downloads / month for the test song', + }, ] def report_download_webpage(self, video_id): @@ -155,6 +165,12 @@ class GenericIE(InfoExtractor): surl = unescapeHTML(mobj.group(1)) return self.url_result(surl, 'Youtube') + # Look for Bandcamp pages with custom domain + mobj = re.search(r']*?content="(.*?bandcamp\.com.*?)"', webpage) + if mobj is not None: + burl = unescapeHTML(mobj.group(1)) + return self.url_result(burl, 'Bandcamp') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: