youtube_dl/extractor/aenetworks.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .theplatform import ThePlatformIE
   7 from ..utils import (
   8     ExtractorError,
   9     GeoRestrictedError,
  10     int_or_none,
  11     update_url_query,
  12     urlencode_postdata,
  13 )
  14
  15
  16 class AENetworksBaseIE(ThePlatformIE):
  17     _BASE_URL_REGEX = r'''(?x)https?://
  18         (?:(?:www|play|watch)\.)?
  19         (?P<domain>
  20             (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
  21             fyi\.tv
  22         )/'''
  23     _THEPLATFORM_KEY = 'crazyjava'
  24     _THEPLATFORM_SECRET = 's3cr3t'
  25     _DOMAIN_MAP = {
  26         'history.com': ('HISTORY', 'history'),
  27         'aetv.com': ('AETV', 'aetv'),
  28         'mylifetime.com': ('LIFETIME', 'lifetime'),
  29         'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
  30         'fyi.tv': ('FYI', 'fyi'),
  31         'historyvault.com': (None, 'historyvault'),
  32         'biography.com': (None, 'biography'),
  33     }
  34
  35     def _extract_aen_smil(self, smil_url, video_id, auth=None):
  36         query = {'mbr': 'true'}
  37         if auth:
  38             query['auth'] = auth
  39         TP_SMIL_QUERY = [{
  40             'assetTypes': 'high_video_ak',
  41             'switch': 'hls_high_ak'
  42         }, {
  43             'assetTypes': 'high_video_s3'
  44         }, {
  45             'assetTypes': 'high_video_s3',
  46             'switch': 'hls_high_fastly',
  47         }]
  48         formats = []
  49         subtitles = {}
  50         last_e = None
  51         for q in TP_SMIL_QUERY:
  52             q.update(query)
  53             m_url = update_url_query(smil_url, q)
  54             m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
  55             try:
  56                 tp_formats, tp_subtitles = self._extract_theplatform_smil(
  57                     m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
  58             except ExtractorError as e:
  59                 if isinstance(e, GeoRestrictedError):
  60                     raise
  61                 last_e = e
  62                 continue
  63             formats.extend(tp_formats)
  64             subtitles = self._merge_subtitles(subtitles, tp_subtitles)
  65         if last_e and not formats:
  66             raise last_e
  67         self._sort_formats(formats)
  68         return {
  69             'id': video_id,
  70             'formats': formats,
  71             'subtitles': subtitles,
  72         }
  73
  74     def _extract_aetn_info(self, domain, filter_key, filter_value, url):
  75         requestor_id, brand = self._DOMAIN_MAP[domain]
  76         result = self._download_json(
  77             'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
  78             filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
  79         title = result['title']
  80         video_id = result['id']
  81         media_url = result['publicUrl']
  82         theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
  83             r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
  84         info = self._parse_theplatform_metadata(theplatform_metadata)
  85         auth = None
  86         if theplatform_metadata.get('AETN$isBehindWall'):
  87             resource = self._get_mvpd_resource(
  88                 requestor_id, theplatform_metadata['title'],
  89                 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
  90                 theplatform_metadata['ratings'][0]['rating'])
  91             auth = self._extract_mvpd_auth(
  92                 url, video_id, requestor_id, resource)
  93         info.update(self._extract_aen_smil(media_url, video_id, auth))
  94         info.update({
  95             'title': title,
  96             'series': result.get('seriesName'),
  97             'season_number': int_or_none(result.get('tvSeasonNumber')),
  98             'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
  99         })
 100         return info
 101
 102
 103 class AENetworksIE(AENetworksBaseIE):
 104     IE_NAME = 'aenetworks'
 105     IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
 106     _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
 107         shows/[^/]+/season-\d+/episode-\d+|
 108         (?:
 109             (?:movie|special)s/[^/]+|
 110             (?:shows/[^/]+/)?videos
 111         )/[^/?#&]+
 112     )'''
 113     _TESTS = [{
 114         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
 115         'info_dict': {
 116             'id': '22253814',
 117             'ext': 'mp4',
 118             'title': 'Winter is Coming',
 119             'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
 120             'timestamp': 1338306241,
 121             'upload_date': '20120529',
 122             'uploader': 'AENE-NEW',
 123         },
 124         'params': {
 125             # m3u8 download
 126             'skip_download': True,
 127         },
 128         'add_ie': ['ThePlatform'],
 129         'skip': 'This video is only available for users of participating TV providers.',
 130     }, {
 131         'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
 132         'info_dict': {
 133             'id': '600587331957',
 134             'ext': 'mp4',
 135             'title': 'Inlawful Entry',
 136             'description': 'md5:57c12115a2b384d883fe64ca50529e08',
 137             'timestamp': 1452634428,
 138             'upload_date': '20160112',
 139             'uploader': 'AENE-NEW',
 140         },
 141         'params': {
 142             # m3u8 download
 143             'skip_download': True,
 144         },
 145         'add_ie': ['ThePlatform'],
 146     }, {
 147         'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
 148         'only_matching': True
 149     }, {
 150         'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
 151         'only_matching': True
 152     }, {
 153         'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
 154         'only_matching': True
 155     }, {
 156         'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
 157         'only_matching': True
 158     }, {
 159         'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
 160         'only_matching': True
 161     }, {
 162         'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
 163         'only_matching': True
 164     }, {
 165         'url': 'http://www.history.com/videos/history-of-valentines-day',
 166         'only_matching': True
 167     }, {
 168         'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
 169         'only_matching': True
 170     }]
 171
 172     def _real_extract(self, url):
 173         domain, canonical = re.match(self._VALID_URL, url).groups()
 174         return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
 175
 176
 177 class AENetworksListBaseIE(AENetworksBaseIE):
 178     def _call_api(self, resource, slug, brand, fields):
 179         return self._download_json(
 180             'https://yoga.appsvcs.aetnd.com/graphql',
 181             slug, query={'brand': brand}, data=urlencode_postdata({
 182                 'query': '''{
 183   %s(slug: "%s") {
 184     %s
 185   }
 186 }''' % (resource, slug, fields),
 187             }))['data'][resource]
 188
 189     def _real_extract(self, url):
 190         domain, slug = re.match(self._VALID_URL, url).groups()
 191         _, brand = self._DOMAIN_MAP[domain]
 192         playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
 193         base_url = 'http://watch.%s' % domain
 194
 195         entries = []
 196         for item in (playlist.get(self._ITEMS_KEY) or []):
 197             doc = self._get_doc(item)
 198             canonical = doc.get('canonical')
 199             if not canonical:
 200                 continue
 201             entries.append(self.url_result(
 202                 base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
 203
 204         description = None
 205         if self._PLAYLIST_DESCRIPTION_KEY:
 206             description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
 207
 208         return self.playlist_result(
 209             entries, playlist.get('id'),
 210             playlist.get(self._PLAYLIST_TITLE_KEY), description)
 211
 212
 213 class AENetworksCollectionIE(AENetworksListBaseIE):
 214     IE_NAME = 'aenetworks:collection'
 215     _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
 216     _TESTS = [{
 217         'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
 218         'info_dict': {
 219             'id': '282',
 220             'title': 'America The Story of Us',
 221         },
 222         'playlist_mincount': 12,
 223     }, {
 224         'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
 225         'only_matching': True
 226     }, {
 227         'url': 'https://www.historyvault.com/collections/mysteryquest',
 228         'only_matching': True
 229     }]
 230     _RESOURCE = 'list'
 231     _ITEMS_KEY = 'items'
 232     _PLAYLIST_TITLE_KEY = 'display_title'
 233     _PLAYLIST_DESCRIPTION_KEY = None
 234     _FIELDS = '''id
 235     display_title
 236     items {
 237       ... on ListVideoItem {
 238         doc {
 239           canonical
 240           id
 241         }
 242       }
 243     }'''
 244
 245     def _get_doc(self, item):
 246         return item.get('doc') or {}
 247
 248
 249 class AENetworksShowIE(AENetworksListBaseIE):
 250     IE_NAME = 'aenetworks:show'
 251     _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
 252     _TESTS = [{
 253         'url': 'http://www.history.com/shows/ancient-aliens',
 254         'info_dict': {
 255             'id': 'SERIES1574',
 256             'title': 'Ancient Aliens',
 257             'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
 258         },
 259         'playlist_mincount': 150,
 260     }]
 261     _RESOURCE = 'series'
 262     _ITEMS_KEY = 'episodes'
 263     _PLAYLIST_TITLE_KEY = 'title'
 264     _PLAYLIST_DESCRIPTION_KEY = 'description'
 265     _FIELDS = '''description
 266     id
 267     title
 268     episodes {
 269       canonical
 270       id
 271     }'''
 272
 273     def _get_doc(self, item):
 274         return item
 275
 276
 277 class HistoryTopicIE(AENetworksBaseIE):
 278     IE_NAME = 'history:topic'
 279     IE_DESC = 'History.com Topic'
 280     _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
 281     _TESTS = [{
 282         'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
 283         'info_dict': {
 284             'id': '40700995724',
 285             'ext': 'mp4',
 286             'title': "History of Valentine’s Day",
 287             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
 288             'timestamp': 1375819729,
 289             'upload_date': '20130806',
 290             'uploader': 'AENE-NEW',
 291         },
 292         'params': {
 293             # m3u8 download
 294             'skip_download': True,
 295         },
 296         'add_ie': ['ThePlatform'],
 297     }]
 298
 299     def _real_extract(self, url):
 300         display_id = self._match_id(url)
 301         return self.url_result(
 302             'http://www.history.com/videos/' + display_id,
 303             AENetworksIE.ie_key())
 304
 305
 306 class HistoryPlayerIE(AENetworksBaseIE):
 307     IE_NAME = 'history:player'
 308     _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
 309     _TESTS = []
 310
 311     def _real_extract(self, url):
 312         domain, video_id = re.match(self._VALID_URL, url).groups()
 313         return self._extract_aetn_info(domain, 'id', video_id, url)
 314
 315
 316 class BiographyIE(AENetworksBaseIE):
 317     _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
 318     _TESTS = [{
 319         'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
 320         'info_dict': {
 321             'id': '30322987',
 322             'ext': 'mp4',
 323             'title': 'Vincent Van Gogh - Full Episode',
 324             'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
 325             'timestamp': 1311970571,
 326             'upload_date': '20110729',
 327             'uploader': 'AENE-NEW',
 328         },
 329         'params': {
 330             # m3u8 download
 331             'skip_download': True,
 332         },
 333         'add_ie': ['ThePlatform'],
 334     }]
 335
 336     def _real_extract(self, url):
 337         display_id = self._match_id(url)
 338         webpage = self._download_webpage(url, display_id)
 339         player_url = self._search_regex(
 340             r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
 341             webpage, 'player URL')
 342         return self.url_result(player_url, HistoryPlayerIE.ie_key())