youtube_dl/extractor/videomore.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..compat import (
   8     compat_parse_qs,
   9     compat_str,
  10     compat_urllib_parse_urlparse,
  11 )
  12 from ..utils import (
  13     ExtractorError,
  14     int_or_none,
  15 )
  16
  17
  18 class VideomoreBaseIE(InfoExtractor):
  19     _API_BASE_URL = 'https://more.tv/api/v3/web/'
  20     _VALID_URL_BASE = r'https?://(?:videomore\.ru|more\.tv)/'
  21
  22     def _download_page_data(self, display_id):
  23         return self._download_json(
  24             self._API_BASE_URL + 'PageData', display_id, query={
  25                 'url': '/' + display_id,
  26             })['attributes']['response']['data']
  27
  28     def _track_url_result(self, track):
  29         track_vod = track['trackVod']
  30         video_url = track_vod.get('playerLink') or track_vod['link']
  31         return self.url_result(
  32             video_url, VideomoreIE.ie_key(), track_vod.get('hubId'))
  33
  34
  35 class VideomoreIE(InfoExtractor):
  36     IE_NAME = 'videomore'
  37     _VALID_URL = r'''(?x)
  38                     videomore:(?P<sid>\d+)$|
  39                     https?://
  40                         (?:
  41                             videomore\.ru/
  42                             (?:
  43                                 embed|
  44                                 [^/]+/[^/]+
  45                             )/|
  46                             (?:
  47                                 (?:player\.)?videomore\.ru|
  48                                 siren\.more\.tv/player
  49                             )/[^/]*\?.*?\btrack_id=|
  50                             odysseus\.more.tv/player/(?P<partner_id>\d+)/
  51                         )
  52                         (?P<id>\d+)
  53                         (?:[/?#&]|\.(?:xml|json)|$)
  54                     '''
  55     _TESTS = [{
  56         'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
  57         'md5': '44455a346edc0d509ac5b5a5b531dc35',
  58         'info_dict': {
  59             'id': '367617',
  60             'ext': 'flv',
  61             'title': 'Кино в деталях 5 сезон В гостях Алексей Чумаков и Юлия Ковальчук',
  62             'series': 'Кино в деталях',
  63             'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук',
  64             'thumbnail': r're:^https?://.*\.jpg',
  65             'duration': 2910,
  66             'view_count': int,
  67             'comment_count': int,
  68             'age_limit': 16,
  69         },
  70         'skip': 'The video is not available for viewing.',
  71     }, {
  72         'url': 'http://videomore.ru/embed/259974',
  73         'info_dict': {
  74             'id': '259974',
  75             'ext': 'mp4',
  76             'title': 'Молодежка 2 сезон 40 серия',
  77             'series': 'Молодежка',
  78             'season': '2 сезон',
  79             'episode': '40 серия',
  80             'thumbnail': r're:^https?://.*\.jpg',
  81             'duration': 2789,
  82             'view_count': int,
  83             'age_limit': 16,
  84         },
  85         'params': {
  86             'skip_download': True,
  87         },
  88     }, {
  89         'url': 'http://videomore.ru/molodezhka/sezon_promo/341073',
  90         'info_dict': {
  91             'id': '341073',
  92             'ext': 'flv',
  93             'title': 'Промо Команда проиграла из-за Бакина?',
  94             'episode': 'Команда проиграла из-за Бакина?',
  95             'thumbnail': r're:^https?://.*\.jpg',
  96             'duration': 29,
  97             'age_limit': 16,
  98             'view_count': int,
  99         },
 100         'params': {
 101             'skip_download': True,
 102         },
 103         'skip': 'The video is not available for viewing.',
 104     }, {
 105         'url': 'http://videomore.ru/elki_3?track_id=364623',
 106         'only_matching': True,
 107     }, {
 108         'url': 'http://videomore.ru/embed/364623',
 109         'only_matching': True,
 110     }, {
 111         'url': 'http://videomore.ru/video/tracks/364623.xml',
 112         'only_matching': True,
 113     }, {
 114         'url': 'http://videomore.ru/video/tracks/364623.json',
 115         'only_matching': True,
 116     }, {
 117         'url': 'http://videomore.ru/video/tracks/158031/quotes/33248',
 118         'only_matching': True,
 119     }, {
 120         'url': 'videomore:367617',
 121         'only_matching': True,
 122     }, {
 123         'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=',
 124         'only_matching': True,
 125     }, {
 126         'url': 'https://odysseus.more.tv/player/1788/352317',
 127         'only_matching': True,
 128     }, {
 129         'url': 'https://siren.more.tv/player/config?track_id=352317&partner_id=1788&user_token=',
 130         'only_matching': True,
 131     }]
 132     _GEO_BYPASS = False
 133
 134     @staticmethod
 135     def _extract_url(webpage):
 136         mobj = re.search(
 137             r'<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1',
 138             webpage)
 139         if not mobj:
 140             mobj = re.search(
 141                 r'<iframe[^>]+src=([\'"])(?P<url>https?://videomore\.ru/embed/\d+)',
 142                 webpage)
 143
 144         if mobj:
 145             return mobj.group('url')
 146
 147     def _real_extract(self, url):
 148         mobj = re.match(self._VALID_URL, url)
 149         video_id = mobj.group('sid') or mobj.group('id')
 150         partner_id = mobj.group('partner_id') or compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('partner_id', [None])[0] or '97'
 151
 152         item = self._download_json(
 153             'https://siren.more.tv/player/config', video_id, query={
 154                 'partner_id': partner_id,
 155                 'track_id': video_id,
 156             })['data']['playlist']['items'][0]
 157
 158         title = item.get('title')
 159         series = item.get('project_name')
 160         season = item.get('season_name')
 161         episode = item.get('episode_name')
 162         if not title:
 163             title = []
 164             for v in (series, season, episode):
 165                 if v:
 166                     title.append(v)
 167             title = ' '.join(title)
 168
 169         streams = item.get('streams') or []
 170         for protocol in ('DASH', 'HLS'):
 171             stream_url = item.get(protocol.lower() + '_url')
 172             if stream_url:
 173                 streams.append({'protocol': protocol, 'url': stream_url})
 174
 175         formats = []
 176         for stream in streams:
 177             stream_url = stream.get('url')
 178             if not stream_url:
 179                 continue
 180             protocol = stream.get('protocol')
 181             if protocol == 'DASH':
 182                 formats.extend(self._extract_mpd_formats(
 183                     stream_url, video_id, mpd_id='dash', fatal=False))
 184             elif protocol == 'HLS':
 185                 formats.extend(self._extract_m3u8_formats(
 186                     stream_url, video_id, 'mp4', 'm3u8_native',
 187                     m3u8_id='hls', fatal=False))
 188             elif protocol == 'MSS':
 189                 formats.extend(self._extract_ism_formats(
 190                     stream_url, video_id, ism_id='mss', fatal=False))
 191
 192         if not formats:
 193             error = item.get('error')
 194             if error:
 195                 if error in ('Данное видео недоступно для просмотра на территории этой страны', 'Данное видео доступно для просмотра только на территории России'):
 196                     self.raise_geo_restricted(countries=['RU'])
 197                 raise ExtractorError(error, expected=True)
 198         self._sort_formats(formats)
 199
 200         return {
 201             'id': video_id,
 202             'title': title,
 203             'series': series,
 204             'season': season,
 205             'episode': episode,
 206             'thumbnail': item.get('thumbnail_url'),
 207             'duration': int_or_none(item.get('duration')),
 208             'view_count': int_or_none(item.get('views')),
 209             'age_limit': int_or_none(item.get('min_age')),
 210             'formats': formats,
 211         }
 212
 213
 214 class VideomoreVideoIE(VideomoreBaseIE):
 215     IE_NAME = 'videomore:video'
 216     _VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?P<id>(?:(?:[^/]+/){2})?[^/?#&]+)(?:/*|[?#&].*?)$'
 217     _TESTS = [{
 218         # single video with og:video:iframe
 219         'url': 'http://videomore.ru/elki_3',
 220         'info_dict': {
 221             'id': '364623',
 222             'ext': 'flv',
 223             'title': 'Ёлки 3',
 224             'description': '',
 225             'thumbnail': r're:^https?://.*\.jpg',
 226             'duration': 5579,
 227             'age_limit': 6,
 228             'view_count': int,
 229         },
 230         'params': {
 231             'skip_download': True,
 232         },
 233         'skip': 'Requires logging in',
 234     }, {
 235         # season single series with og:video:iframe
 236         'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya',
 237         'info_dict': {
 238             'id': '352317',
 239             'ext': 'mp4',
 240             'title': 'Последний мент 1 сезон 14 серия',
 241             'series': 'Последний мент',
 242             'season': '1 сезон',
 243             'episode': '14 серия',
 244             'thumbnail': r're:^https?://.*\.jpg',
 245             'duration': 2464,
 246             'age_limit': 16,
 247             'view_count': int,
 248         },
 249         'params': {
 250             'skip_download': True,
 251         },
 252     }, {
 253         'url': 'http://videomore.ru/sejchas_v_seti/serii_221-240/226_vypusk',
 254         'only_matching': True,
 255     }, {
 256         # single video without og:video:iframe
 257         'url': 'http://videomore.ru/marin_i_ego_druzya',
 258         'info_dict': {
 259             'id': '359073',
 260             'ext': 'flv',
 261             'title': '1 серия. Здравствуй, Аквавилль!',
 262             'description': 'md5:c6003179538b5d353e7bcd5b1372b2d7',
 263             'thumbnail': r're:^https?://.*\.jpg',
 264             'duration': 754,
 265             'age_limit': 6,
 266             'view_count': int,
 267         },
 268         'params': {
 269             'skip_download': True,
 270         },
 271         'skip': 'redirects to https://more.tv/'
 272     }, {
 273         'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so',
 274         'only_matching': True,
 275     }, {
 276         'url': 'https://more.tv/poslednii_ment/1_sezon/14_seriya',
 277         'only_matching': True,
 278     }]
 279
 280     @classmethod
 281     def suitable(cls, url):
 282         return False if VideomoreIE.suitable(url) else super(VideomoreVideoIE, cls).suitable(url)
 283
 284     def _real_extract(self, url):
 285         display_id = self._match_id(url)
 286         return self._track_url_result(self._download_page_data(display_id))
 287
 288
 289 class VideomoreSeasonIE(VideomoreBaseIE):
 290     IE_NAME = 'videomore:season'
 291     _VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$'
 292     _TESTS = [{
 293         'url': 'http://videomore.ru/molodezhka/film_o_filme',
 294         'info_dict': {
 295             'id': 'molodezhka/film_o_filme',
 296             'title': 'Фильм о фильме',
 297         },
 298         'playlist_mincount': 3,
 299     }, {
 300         'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so',
 301         'only_matching': True,
 302     }, {
 303         'url': 'https://more.tv/molodezhka/film_o_filme',
 304         'only_matching': True,
 305     }]
 306
 307     @classmethod
 308     def suitable(cls, url):
 309         return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url))
 310                 else super(VideomoreSeasonIE, cls).suitable(url))
 311
 312     def _real_extract(self, url):
 313         display_id = self._match_id(url)
 314         season = self._download_page_data(display_id)
 315         season_id = compat_str(season['id'])
 316         tracks = self._download_json(
 317             self._API_BASE_URL + 'seasons/%s/tracks' % season_id,
 318             season_id)['data']
 319         entries = []
 320         for track in tracks:
 321             entries.append(self._track_url_result(track))
 322         return self.playlist_result(entries, display_id, season.get('title'))