]> asedeno.scripts.mit.edu Git - youtube-dl.git/commitdiff
Merge remote-tracking branch 'nulloz/telebruxelles'
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 23 Nov 2014 08:38:18 +0000 (09:38 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 23 Nov 2014 08:38:18 +0000 (09:38 +0100)
19 files changed:
youtube_dl/YoutubeDL.py
youtube_dl/compat.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/bliptv.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/folketinget.py [new file with mode: 0644]
youtube_dl/extractor/generic.py
youtube_dl/extractor/goldenmoustache.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/rtlnl.py
youtube_dl/extractor/stanfordoc.py
youtube_dl/extractor/sztvhu.py
youtube_dl/extractor/vh1.py
youtube_dl/jsinterp.py
youtube_dl/utils.py
youtube_dl/version.py

index 94c50903c737ea298b44e772b55500766b2f904b..fde026fbf695781ac22d936c87c0216e4c12cfd9 100755 (executable)
@@ -624,7 +624,7 @@ class YoutubeDL(object):
 
             return self.process_ie_result(
                 new_result, download=download, extra_info=extra_info)
-        elif result_type == 'playlist':
+        elif result_type == 'playlist' or result_type == 'multi_video':
             # We process each entry in the playlist
             playlist = ie_result.get('title', None) or ie_result.get('id', None)
             self.to_screen('[download] Downloading playlist: %s' % playlist)
@@ -679,6 +679,9 @@ class YoutubeDL(object):
             ie_result['entries'] = playlist_results
             return ie_result
         elif result_type == 'compat_list':
+            self.report_warning(
+                'Extractor %s returned a compat_list result. '
+                'It needs to be updated.' % ie_result.get('extractor'))
             def _fixup(r):
                 self.add_extra_info(r,
                     {
index 64a97548933eadec42aa578583ce878ce2cc1e9e..9d33a8ec5fbd80a75a5d382e8621f6dc616c7aa0 100644 (file)
@@ -302,8 +302,10 @@ else:
 # Fix https://github.com/rg3/youtube-dl/issues/4223
 # See http://bugs.python.org/issue9161 for what is broken
 def workaround_optparse_bug9161():
+    op = optparse.OptionParser()
+    og = optparse.OptionGroup(op, 'foo')
     try:
-        optparse.OptionGroup('foo').add_option('-t')
+        og.add_option('-t')
     except TypeError:
         real_add_option = optparse.OptionGroup.add_option
 
index 7275d247ad200cde97e73390672a7561386f864e..7497a97f5c7ece071b0ddc5427dc3ed746d1421d 100644 (file)
@@ -115,6 +115,7 @@ from .fktv import (
     FKTVPosteckeIE,
 )
 from .flickr import FlickrIE
+from .folketinget import FolketingetIE
 from .fourtube import FourTubeIE
 from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
index b9a9440c09b85365a2997bd5feddbae017601c2d..3a57ce52785a7d4950ec694b71fb9bdff69d1e55 100644 (file)
@@ -5,13 +5,12 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
     find_xpath_attr,
     unified_strdate,
-    determine_ext,
     get_element_by_id,
     get_element_by_attribute,
     int_or_none,
+    qualities,
 )
 
 # There are different sources of video in arte.tv, the extraction process 
@@ -102,79 +101,54 @@ class ArteTVPlus7IE(InfoExtractor):
             'upload_date': unified_strdate(upload_date_str),
             'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
         }
+        qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
 
-        all_formats = []
+        formats = []
         for format_id, format_dict in player_info['VSR'].items():
-            fmt = dict(format_dict)
-            fmt['format_id'] = format_id
-            all_formats.append(fmt)
-        # Some formats use the m3u8 protocol
-        all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
-        def _match_lang(f):
-            if f.get('versionCode') is None:
-                return True
-            # Return true if that format is in the language of the url
-            if lang == 'fr':
-                l = 'F'
-            elif lang == 'de':
-                l = 'A'
-            else:
-                l = lang
-            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
-            return any(re.match(r, f['versionCode']) for r in regexes)
-        # Some formats may not be in the same language as the url
-        # TODO: Might want not to drop videos that does not match requested language
-        # but to process those formats with lower precedence
-        formats = filter(_match_lang, all_formats)
-        formats = list(formats)  # in python3 filter returns an iterator
-        if not formats:
-            # Some videos are only available in the 'Originalversion'
-            # they aren't tagged as being in French or German
-            # Sometimes there are neither videos of requested lang code
-            # nor original version videos available
-            # For such cases we just take all_formats as is
-            formats = all_formats
-            if not formats:
-                raise ExtractorError('The formats list is empty')
-
-        if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
-            def sort_key(f):
-                return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
-        else:
-            def sort_key(f):
-                versionCode = f.get('versionCode')
-                if versionCode is None:
-                    versionCode = ''
-                return (
-                    # Sort first by quality
-                    int(f.get('height', -1)),
-                    int(f.get('bitrate', -1)),
-                    # The original version with subtitles has lower relevance
-                    re.match(r'VO-ST(F|A)', versionCode) is None,
-                    # The version with sourds/mal subtitles has also lower relevance
-                    re.match(r'VO?(F|A)-STM\1', versionCode) is None,
-                    # Prefer http downloads over m3u8
-                    0 if f['url'].endswith('m3u8') else 1,
-                )
-        formats = sorted(formats, key=sort_key)
-        def _format(format_info):
-            info = {
-                'format_id': format_info['format_id'],
-                'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
-                'width': int_or_none(format_info.get('width')),
-                'height': int_or_none(format_info.get('height')),
-                'tbr': int_or_none(format_info.get('bitrate')),
+            f = dict(format_dict)
+            versionCode = f.get('versionCode')
+
+            langcode = {
+                'fr': 'F',
+                'de': 'A',
+            }.get(lang, lang)
+            lang_rexs = [r'VO?%s' % langcode, r'VO?.-ST%s' % langcode]
+            lang_pref = (
+                None if versionCode is None else (
+                    10 if any(re.match(r, versionCode) for r in lang_rexs)
+                    else -10))
+            source_pref = 0
+            if versionCode is not None:
+                # The original version with subtitles has lower relevance
+                if re.match(r'VO-ST(F|A)', versionCode):
+                    source_pref -= 10
+                # The version with sourds/mal subtitles has also lower relevance
+                elif re.match(r'VO?(F|A)-STM\1', versionCode):
+                    source_pref -= 9
+            format = {
+                'format_id': format_id,
+                'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
+                'language_preference': lang_pref,
+                'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
+                'width': int_or_none(f.get('width')),
+                'height': int_or_none(f.get('height')),
+                'tbr': int_or_none(f.get('bitrate')),
+                'quality': qfunc(f['quality']),
+                'source_preference': source_pref,
             }
-            if format_info['mediaType'] == 'rtmp':
-                info['url'] = format_info['streamer']
-                info['play_path'] = 'mp4:' + format_info['url']
-                info['ext'] = 'flv'
+
+            if f.get('mediaType') == 'rtmp':
+                format['url'] = f['streamer']
+                format['play_path'] = 'mp4:' + f['url']
+                format['ext'] = 'flv'
             else:
-                info['url'] = format_info['url']
-                info['ext'] = determine_ext(info['url'])
-            return info
-        info_dict['formats'] = [_format(f) for f in formats]
+                format['url'] = f['url']
+
+            formats.append(format)
+
+        self._sort_formats(formats)
 
+        info_dict['formats'] = formats
         return info_dict
 
 
index 2370c24b06eb9b9547fbc822a413405b2223ae42..f2b02643de162ff1547bef47a62dca55f2a405a3 100644 (file)
@@ -166,9 +166,17 @@ class BlipTVIE(SubtitlesInfoExtractor):
 
 
 class BlipTVUserIE(InfoExtractor):
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
+    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
     _PAGE_SIZE = 12
     IE_NAME = 'blip.tv:user'
+    _TEST = {
+        'url': 'http://blip.tv/actone',
+        'info_dict': {
+            'id': 'actone',
+            'title': 'Act One: The Series',
+        },
+        'playlist_count': 5,
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -179,6 +187,7 @@ class BlipTVUserIE(InfoExtractor):
         page = self._download_webpage(url, username, 'Downloading user page')
         mobj = re.search(r'data-users-id="([^"]+)"', page)
         page_base = page_base % mobj.group(1)
+        title = self._og_search_title(page)
 
         # Download video ids using BlipTV Ajax calls. Result size per
         # query is limited (currently to 12 videos) so we need to query
@@ -215,4 +224,5 @@ class BlipTVUserIE(InfoExtractor):
 
         urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
         url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
-        return [self.playlist_result(url_entries, playlist_title=username)]
+        return self.playlist_result(
+            url_entries, playlist_title=title, playlist_id=username)
index a6920685e1ff93aeaecb0e551215a8343a50e8b0..2db7f9fef4f50f4f7f95b812d5245c224dc7ed20 100644 (file)
@@ -111,6 +111,8 @@ class BrightcoveIE(InfoExtractor):
                             lambda m: m.group(1) + '/>', object_str)
         # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
         object_str = object_str.replace('<--', '<!--')
+        # remove namespace to simplify extraction
+        object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
         object_str = fix_xml_ampersands(object_str)
 
         object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
@@ -219,7 +221,7 @@ class BrightcoveIE(InfoExtractor):
         webpage = self._download_webpage(req, video_id)
 
         error_msg = self._html_search_regex(
-            r"<h1>We're sorry.</h1>\s*<p>(.*?)</p>", webpage,
+            r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
             'error message', default=None)
         if error_msg is not None:
             raise ExtractorError(
index 49b978b4eb0f17555f20e6c27f16880371610720..2e3ef3fdab4c25f2818f46a820702056ceb3f294 100644 (file)
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
 from .mtv import MTVServicesInfoExtractor
 from ..utils import (
     compat_str,
@@ -110,9 +109,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
+        mobj = re.match(self._VALID_URL, url)
 
         if mobj.group('shortname'):
             if mobj.group('shortname') in ('tds', 'thedailyshow'):
index b77f0e51904d1539a43c056dafbc82a587041744..93a5a3d57bf027cce5f6293c65f219c5d2cfe51d 100644 (file)
@@ -43,7 +43,11 @@ class InfoExtractor(object):
     information possibly downloading the video to the file system, among
     other possible outcomes.
 
-    The dictionaries must include the following fields:
+    The type field determines the the type of the result.
+    By far the most common value (and the default if _type is missing) is
+    "video", which indicates a single video.
+
+    For a video, the dictionaries must include the following fields:
 
     id:             Video identifier.
     title:          Video title, unescaped.
@@ -87,6 +91,11 @@ class InfoExtractor(object):
                                  by this field, regardless of all other values.
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
+                    * language_preference  Is this in the correct requested
+                                 language?
+                                 10 if it's what the URL is about,
+                                 -1 for default (don't know),
+                                 -10 otherwise, other values reserved for now.
                     * quality    Order number of the video quality of this
                                  format, irrespective of the file format.
                                  -1 for default (order by other properties),
@@ -146,6 +155,38 @@ class InfoExtractor(object):
 
     Unless mentioned otherwise, None is equivalent to absence of information.
 
+
+    _type "playlist" indicates multiple videos.
+    There must be a key "entries", which is a list or a PagedList object, each
+    element of which is a valid dictionary under this specfication.
+
+    Additionally, playlists can have "title" and "id" attributes with the same
+    semantics as videos (see above).
+
+
+    _type "multi_video" indicates that there are multiple videos that
+    form a single show, for examples multiple acts of an opera or TV episode.
+    It must have an entries key like a playlist and contain all the keys
+    required for a video at the same time.
+
+
+    _type "url" indicates that the video must be extracted from another
+    location, possibly by a different extractor. Its only required key is:
+    "url" - the next URL to extract.
+
+    Additionally, it may have properties believed to be identical to the
+    resolved entity, for example "title" if the title of the referred video is
+    known ahead of time.
+
+
+    _type "url_transparent" entities have the same specification as "url", but
+    indicate that the given additional information is more precise than the one
+    associated with the resolved URL.
+    This is useful when a site employs a video service that hosts the video and
+    its technical metadata, but that video service does not embed a useful
+    title, description etc.
+
+
     Subclasses of this one should re-define the _real_initialize() and
     _real_extract() methods and define a _VALID_URL regexp.
     Probably, they should also be added to the list of extractors.
@@ -615,6 +656,7 @@ class InfoExtractor(object):
 
             return (
                 preference,
+                f.get('language_preference') if f.get('language_preference') is not None else -1,
                 f.get('quality') if f.get('quality') is not None else -1,
                 f.get('height') if f.get('height') is not None else -1,
                 f.get('width') if f.get('width') is not None else -1,
diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py
new file mode 100644 (file)
index 0000000..68e2db9
--- /dev/null
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+    xpath_text,
+)
+
+
+class FolketingetIE(InfoExtractor):
+    IE_DESC = 'Folketinget (ft.dk; Danish parliament)'
+    _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
+    _TEST = {
+        'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
+        'info_dict': {
+            'id': '1165642',
+            'ext': 'mp4',
+            'title': 'Åbent samråd i Erhvervsudvalget',
+            'description': 'Åbent samråd med erhvervs- og vækstministeren om regeringens politik på teleområdet',
+            'view_count': int,
+            'width': 768,
+            'height': 432,
+            'tbr': 928000,
+            'timestamp': 1416493800,
+            'upload_date': '20141120',
+            'duration': 3960,
+        },
+        'params': {
+            'skip_download': 'rtmpdump required',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_regex(
+            r'(?s)<div class="video-item-agenda"[^>]*>(.*?)<',
+            webpage, 'description', fatal=False)
+
+        player_params = compat_parse_qs(self._search_regex(
+            r'<embed src="http://ft\.arkena\.tv/flash/ftplayer\.swf\?([^"]+)"',
+            webpage, 'player params'))
+        xml_url = player_params['xml'][0]
+        doc = self._download_xml(xml_url, video_id)
+
+        timestamp = parse_iso8601(xpath_text(doc, './/date'))
+        duration = parse_duration(xpath_text(doc, './/duration'))
+        width = int_or_none(xpath_text(doc, './/width'))
+        height = int_or_none(xpath_text(doc, './/height'))
+        view_count = int_or_none(xpath_text(doc, './/views'))
+
+        formats = [{
+            'format_id': n.attrib['bitrate'],
+            'url': xpath_text(n, './url', fatal=True),
+            'tbr': int_or_none(n.attrib['bitrate']),
+        } for n in doc.findall('.//streams/stream')]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'description': description,
+            'timestamp': timestamp,
+            'width': width,
+            'height': height,
+            'duration': duration,
+            'view_count': view_count,
+        }
index af769ab619eac9cb9b3d20869ba4a9e009977f6c..c7a824c29b15187becb0bb9758ee94348f2309da 100644 (file)
@@ -979,7 +979,7 @@ class GenericIE(InfoExtractor):
                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
         if not found:
             # HTML5 video
-            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src="([^"]+)"', webpage)
+            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
         if not found:
             found = re.search(
                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
index 7e13b131ba7702bc0db40781a0e9942f721afeee..10001d4d95f9895c5e965f88c967aa87f63ff7f2 100644 (file)
@@ -1,9 +1,7 @@
 from __future__ import unicode_literals
 
-import re
 from .common import InfoExtractor
 from ..utils import (
-    parse_duration,
     int_or_none,
 )
 
index 474bdff7dcd50712b20c0cf1a70e20f960e0b1be..5f0f476b690579dbbc0aeb03171982944270409f 100644 (file)
@@ -145,7 +145,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
         idoc = self._download_xml(
             feed_url + '?' + data, video_id,
             'Downloading info', transform_source=fix_xml_ampersands)
-        return [self._get_video_info(item) for item in idoc.findall('.//item')]
+        return self.playlist_result(
+            [self._get_video_info(item) for item in idoc.findall('.//item')])
 
     def _real_extract(self, url):
         title = url_basename(url)
index 5daef2fc5656191ea2cc0c411d520b0b0f5cd455..4a188e5d46304ab8059d646cea667edc86cf7cb4 100644 (file)
@@ -28,9 +28,8 @@ class RtlXlIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         uuid = mobj.group('uuid')
 
-        # Use m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
         info = self._download_json(
-            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/d=pc/fmt=adaptive/' % uuid,
+            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
             uuid)
 
         material = info['material'][0]
@@ -39,12 +38,13 @@ class RtlXlIE(InfoExtractor):
         progname = info['abstracts'][0]['name']
         subtitle = material['title'] or info['episodes'][0]['name']
 
-        videopath = material['videopath']
+        # Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
+        videopath = material['videopath'].replace('.f4m', '.m3u8')
         m3u8_url = 'http://manifest.us.rtl.nl' + videopath
 
         formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
 
-        video_urlpart = videopath.split('/adaptive/')[1][:-4]
+        video_urlpart = videopath.split('/flash/')[1][:-4]
         PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
 
         formats.extend([
index 44c52c718e2090cca8f28c6542d67ae4585332e6..5feb4ff83f9c4d2def9196666bde4fcc2a3a9101 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -9,24 +11,23 @@ from ..utils import (
 
 
 class StanfordOpenClassroomIE(InfoExtractor):
-    IE_NAME = u'stanfordoc'
-    IE_DESC = u'Stanford Open ClassRoom'
-    _VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
+    IE_NAME = 'stanfordoc'
+    IE_DESC = 'Stanford Open ClassRoom'
+    _VALID_URL = r'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
     _TEST = {
-        u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
-        u'file': u'PracticalUnix_intro-environment.mp4',
-        u'md5': u'544a9468546059d4e80d76265b0443b8',
-        u'info_dict': {
-            u"title": u"Intro Environment"
+        'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
+        'md5': '544a9468546059d4e80d76265b0443b8',
+        'info_dict': {
+            'id': 'PracticalUnix_intro-environment',
+            'ext': 'mp4',
+            'title': 'Intro Environment',
         }
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
 
-        if mobj.group('course') and mobj.group('video'): # A specific video
+        if mobj.group('course') and mobj.group('video'):  # A specific video
             course = mobj.group('course')
             video = mobj.group('video')
             info = {
@@ -35,7 +36,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
                 'upload_date': None,
             }
 
-            self.report_extraction(info['id'])
             baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
             xmlUrl = baseUrl + video + '.xml'
             mdoc = self._download_xml(xmlUrl, info['id'])
@@ -43,63 +43,49 @@ class StanfordOpenClassroomIE(InfoExtractor):
                 info['title'] = mdoc.findall('./title')[0].text
                 info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
             except IndexError:
-                raise ExtractorError(u'Invalid metadata XML file')
-            info['ext'] = info['url'].rpartition('.')[2]
-            return [info]
-        elif mobj.group('course'): # A course page
+                raise ExtractorError('Invalid metadata XML file')
+            return info
+        elif mobj.group('course'):  # A course page
             course = mobj.group('course')
             info = {
                 'id': course,
-                'type': 'playlist',
+                '_type': 'playlist',
                 'uploader': None,
                 'upload_date': None,
             }
 
-            coursepage = self._download_webpage(url, info['id'],
-                                        note='Downloading course info page',
-                                        errnote='Unable to download course info page')
+            coursepage = self._download_webpage(
+                url, info['id'],
+                note='Downloading course info page',
+                errnote='Unable to download course info page')
 
-            info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
+            info['title'] = self._html_search_regex(
+                r'<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
 
-            info['description'] = self._html_search_regex('<description>([^<]+)</description>',
-                coursepage, u'description', fatal=False)
+            info['description'] = self._html_search_regex(
+                r'(?s)<description>([^<]+)</description>',
+                coursepage, 'description', fatal=False)
 
             links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
-            info['list'] = [
-                {
-                    'type': 'reference',
-                    'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
-                }
-                    for vpage in links]
-            results = []
-            for entry in info['list']:
-                assert entry['type'] == 'reference'
-                results += self.extract(entry['url'])
-            return results
-        else: # Root page
+            info['entries'] = [self.url_result(
+                'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
+            ) for l in links]
+            return info
+        else:  # Root page
             info = {
                 'id': 'Stanford OpenClassroom',
-                'type': 'playlist',
+                '_type': 'playlist',
                 'uploader': None,
                 'upload_date': None,
             }
+            info['title'] = info['id']
 
             rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
             rootpage = self._download_webpage(rootURL, info['id'],
-                errnote=u'Unable to download course info page')
-
-            info['title'] = info['id']
+                errnote='Unable to download course info page')
 
             links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
-            info['list'] = [
-                {
-                    'type': 'reference',
-                    'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
-                }
-                    for cpage in links]
-
-            results = []
-            for entry in info['list']:
-                assert entry['type'] == 'reference'
-                results += self.extract(entry['url'])
-            return results
+            info['entries'] = [self.url_result(
+                'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
+            ) for l in links]
+            return info
index c9359fafb5c5989923c6320e3e684673b80057d6..aa5964acb6b3f40b0d663bd2169ac6aec0c210ae 100644 (file)
@@ -1,27 +1,24 @@
 # -*- coding: utf-8 -*-
-
-import re
+from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import determine_ext
 
 
 class SztvHuIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
+    _VALID_URL = r'http://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
     _TEST = {
-        u'url': u'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
-        u'file': u'20130909.mp4',
-        u'md5': u'a6df607b11fb07d0e9f2ad94613375cb',
-        u'info_dict': {
-            u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
-            u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
+        'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
+        'md5': 'a6df607b11fb07d0e9f2ad94613375cb',
+        'info_dict': {
+            'id': '20130909',
+            'ext': 'mp4',
+            'title': 'Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren',
+            'description': 'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
         },
-        u'skip': u'Service temporarily disabled as of 2013-11-20'
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         video_file = self._search_regex(
             r'file: "...:(.*?)",', webpage, 'video file')
@@ -39,7 +36,6 @@ class SztvHuIE(InfoExtractor):
             'id': video_id,
             'url': video_url,
             'title': title,
-            'ext': determine_ext(video_url),
             'description': description,
             'thumbnail': thumbnail,
         }
index 2f77e38981c3607b6dceb00cac3bca514bf2fcd8..6be3774b7aa7367e9a067b417d7a749fece55d05 100644 (file)
@@ -121,4 +121,7 @@ class VH1IE(MTVIE):
         idoc = self._download_xml(
             doc_url, video_id,
             'Downloading info', transform_source=fix_xml_ampersands)
-        return [self._get_video_info(item) for item in idoc.findall('.//item')]
+        return self.playlist_result(
+            [self._get_video_info(item) for item in idoc.findall('.//item')],
+            playlist_id=video_id,
+        )
index c40cd376d120f2063bb4cf6958ca4cf701db1f00..b4617fbad0fc40323a129ce1218f9f97590c89bb 100644 (file)
@@ -61,7 +61,7 @@ class JSInterpreter(object):
             pass
 
         m = re.match(
-            r'^(?P<var>[a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
+            r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
             expr)
         if m:
             variable = m.group('var')
index 94b496dd0a693ae61997881458e1e202a184a330..5be7cf99200c57639af17b1dc89f9a861658f331 100644 (file)
@@ -71,10 +71,10 @@ def preferredencoding():
 
 
 def write_json_file(obj, fn):
-    """ Encode obj as JSON and write it to fn, atomically """
+    """ Encode obj as JSON and write it to fn, atomically if possible """
 
     fn = encodeFilename(fn)
-    if sys.version_info < (3, 0):
+    if sys.version_info < (3, 0) and sys.platform != 'win32':
         encoding = get_filesystem_encoding()
         # os.path.basename returns a bytes object, but NamedTemporaryFile
         # will fail if the filename contains non ascii characters unless we
@@ -108,6 +108,13 @@ def write_json_file(obj, fn):
     try:
         with tf:
             json.dump(obj, tf)
+        if sys.platform == 'win32':
+            # Need to remove existing file on Windows, else os.rename raises
+            # WindowsError or FileExistsError.
+            try:
+                os.unlink(fn)
+            except OSError:
+                pass
         os.rename(tf.name, fn)
     except:
         try:
@@ -413,6 +420,7 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
             pass  # Python < 3.4
         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 
+
 class ExtractorError(Exception):
     """Error during info extraction."""
     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
@@ -427,7 +435,13 @@ class ExtractorError(Exception):
         if cause:
             msg += ' (caused by %r)' % cause
         if not expected:
-            msg = msg + '; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
+            if ytdl_is_updateable():
+                update_cmd = 'type  youtube-dl -U  to update'
+            else:
+                update_cmd = 'see  https://yt-dl.org/update  on how to update'
+            msg += '; please report this issue on https://yt-dl.org/bug .'
+            msg += ' Make sure you are using the latest version; %s.' % update_cmd
+            msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
         super(ExtractorError, self).__init__(msg)
 
         self.traceback = tb
@@ -1412,3 +1426,10 @@ def is_outdated_version(version, limit, assume_new=True):
         return version_tuple(version) < version_tuple(limit)
     except ValueError:
         return not assume_new
+
+
+def ytdl_is_updateable():
+    """ Returns if youtube-dl can be updated with -U """
+    from zipimport import zipimporter
+
+    return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
index 34bf665adbcea74760b81f3a6d0ab06b56f68066..a283afbe374d97dfa7775082c73c4b68b9ba372b 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.11.16'
+__version__ = '2014.11.21.1'