youtube_dl/extractor/steam.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     ExtractorError,
   8     unescapeHTML,
   9 )
  10
  11
  12 class SteamIE(InfoExtractor):
  13     _VALID_URL = r"""http://store\.steampowered\.com/
  14                 (agecheck/)?
  15                 (?P<urltype>video|app)/ #If the page is only for videos or for a game
  16                 (?P<gameID>\d+)/?
  17                 (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
  18                 """
  19     _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
  20     _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
  21     _TEST = {
  22         "url": "http://store.steampowered.com/video/105600/",
  23         "playlist": [
  24             {
  25                 "md5": "f870007cee7065d7c76b88f0a45ecc07",
  26                 "info_dict": {
  27                     'id': '81300',
  28                     'ext': 'flv',
  29                     "title": "Terraria 1.1 Trailer",
  30                     'playlist_index': 1,
  31                 }
  32             },
  33             {
  34                 "md5": "61aaf31a5c5c3041afb58fb83cbb5751",
  35                 "info_dict": {
  36                     'id': '80859',
  37                     'ext': 'flv',
  38                     "title": "Terraria Trailer",
  39                     'playlist_index': 2,
  40                 }
  41             }
  42         ]
  43     }
  44
  45
  46     @classmethod
  47     def suitable(cls, url):
  48         """Receives a URL and returns True if suitable for this IE."""
  49         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  50
  51     def _real_extract(self, url):
  52         m = re.match(self._VALID_URL, url, re.VERBOSE)
  53         gameID = m.group('gameID')
  54
  55         videourl = self._VIDEO_PAGE_TEMPLATE % gameID
  56         webpage = self._download_webpage(videourl, gameID)
  57
  58         if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
  59             videourl = self._AGECHECK_TEMPLATE % gameID
  60             self.report_age_confirmation()
  61             webpage = self._download_webpage(videourl, gameID)
  62
  63         self.report_extraction(gameID)
  64         game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
  65                                              webpage, 'game title')
  66
  67         urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
  68         mweb = re.finditer(urlRE, webpage)
  69         namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
  70         titles = re.finditer(namesRE, webpage)
  71         thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
  72         thumbs = re.finditer(thumbsRE, webpage)
  73         videos = []
  74         for vid,vtitle,thumb in zip(mweb,titles,thumbs):
  75             video_id = vid.group('videoID')
  76             title = vtitle.group('videoName')
  77             video_url = vid.group('videoURL')
  78             video_thumb = thumb.group('thumbnail')
  79             if not video_url:
  80                 raise ExtractorError('Cannot find video url for %s' % video_id)
  81             info = {
  82                 'id':video_id,
  83                 'url':video_url,
  84                 'ext': 'flv',
  85                 'title': unescapeHTML(title),
  86                 'thumbnail': video_thumb
  87                   }
  88             videos.append(info)
  89         return self.playlist_result(videos, gameID, game_title)