youtube_dl/extractor/trovo.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import json
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     ExtractorError,
   9     int_or_none,
  10     str_or_none,
  11     try_get,
  12 )
  13
  14
  15 class TrovoBaseIE(InfoExtractor):
  16     _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
  17
  18     def _extract_streamer_info(self, data):
  19         streamer_info = data.get('streamerInfo') or {}
  20         username = streamer_info.get('userName')
  21         return {
  22             'uploader': streamer_info.get('nickName'),
  23             'uploader_id': str_or_none(streamer_info.get('uid')),
  24             'uploader_url': 'https://trovo.live/' + username if username else None,
  25         }
  26
  27
  28 class TrovoIE(TrovoBaseIE):
  29     _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
  30
  31     def _real_extract(self, url):
  32         username = self._match_id(url)
  33         live_info = self._download_json(
  34             'https://gql.trovo.live/', username, query={
  35                 'query': '''{
  36   getLiveInfo(params: {userName: "%s"}) {
  37     isLive
  38     programInfo {
  39       coverUrl
  40       id
  41       streamInfo {
  42         desc
  43         playUrl
  44       }
  45       title
  46     }
  47     streamerInfo {
  48         nickName
  49         uid
  50         userName
  51     }
  52   }
  53 }''' % username,
  54             })['data']['getLiveInfo']
  55         if live_info.get('isLive') == 0:
  56             raise ExtractorError('%s is offline' % username, expected=True)
  57         program_info = live_info['programInfo']
  58         program_id = program_info['id']
  59         title = self._live_title(program_info['title'])
  60
  61         formats = []
  62         for stream_info in (program_info.get('streamInfo') or []):
  63             play_url = stream_info.get('playUrl')
  64             if not play_url:
  65                 continue
  66             format_id = stream_info.get('desc')
  67             formats.append({
  68                 'format_id': format_id,
  69                 'height': int_or_none(format_id[:-1]) if format_id else None,
  70                 'url': play_url,
  71             })
  72         self._sort_formats(formats)
  73
  74         info = {
  75             'id': program_id,
  76             'title': title,
  77             'formats': formats,
  78             'thumbnail': program_info.get('coverUrl'),
  79             'is_live': True,
  80         }
  81         info.update(self._extract_streamer_info(live_info))
  82         return info
  83
  84
  85 class TrovoVodIE(TrovoBaseIE):
  86     _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
  87     _TESTS = [{
  88         'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
  89         'info_dict': {
  90             'id': 'ltv-100095501_100095501_1609596043',
  91             'ext': 'mp4',
  92             'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
  93             'uploader': 'Exsl',
  94             'timestamp': 1609640305,
  95             'upload_date': '20210103',
  96             'uploader_id': '100095501',
  97             'duration': 43977,
  98             'view_count': int,
  99             'like_count': int,
 100             'comment_count': int,
 101             'comments': 'mincount:8',
 102             'categories': ['Grand Theft Auto V'],
 103         },
 104     }, {
 105         'url': 'https://trovo.live/clip/lc-5285890810184026005',
 106         'only_matching': True,
 107     }]
 108
 109     def _real_extract(self, url):
 110         vid = self._match_id(url)
 111         resp = self._download_json(
 112             'https://gql.trovo.live/', vid, data=json.dumps([{
 113                 'query': '''{
 114   batchGetVodDetailInfo(params: {vids: ["%s"]}) {
 115     VodDetailInfos
 116   }
 117 }''' % vid,
 118             }, {
 119                 'query': '''{
 120   getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
 121     commentList {
 122       author {
 123         nickName
 124         uid
 125       }
 126       commentID
 127       content
 128       createdAt
 129       parentID
 130     }
 131   }
 132 }''' % vid,
 133             }]).encode(), headers={
 134                 'Content-Type': 'application/json',
 135             })
 136         vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
 137         vod_info = vod_detail_info['vodInfo']
 138         title = vod_info['title']
 139
 140         language = vod_info.get('languageName')
 141         formats = []
 142         for play_info in (vod_info.get('playInfos') or []):
 143             play_url = play_info.get('playUrl')
 144             if not play_url:
 145                 continue
 146             format_id = play_info.get('desc')
 147             formats.append({
 148                 'ext': 'mp4',
 149                 'filesize': int_or_none(play_info.get('fileSize')),
 150                 'format_id': format_id,
 151                 'height': int_or_none(format_id[:-1]) if format_id else None,
 152                 'language': language,
 153                 'protocol': 'm3u8_native',
 154                 'tbr': int_or_none(play_info.get('bitrate')),
 155                 'url': play_url,
 156             })
 157         self._sort_formats(formats)
 158
 159         category = vod_info.get('categoryName')
 160         get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
 161
 162         comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
 163         comments = []
 164         for comment in comment_list:
 165             content = comment.get('content')
 166             if not content:
 167                 continue
 168             author = comment.get('author') or {}
 169             parent = comment.get('parentID')
 170             comments.append({
 171                 'author': author.get('nickName'),
 172                 'author_id': str_or_none(author.get('uid')),
 173                 'id': str_or_none(comment.get('commentID')),
 174                 'text': content,
 175                 'timestamp': int_or_none(comment.get('createdAt')),
 176                 'parent': 'root' if parent == 0 else str_or_none(parent),
 177             })
 178
 179         info = {
 180             'id': vid,
 181             'title': title,
 182             'formats': formats,
 183             'thumbnail': vod_info.get('coverUrl'),
 184             'timestamp': int_or_none(vod_info.get('publishTs')),
 185             'duration': int_or_none(vod_info.get('duration')),
 186             'view_count': get_count('watch'),
 187             'like_count': get_count('like'),
 188             'comment_count': get_count('comment'),
 189             'comments': comments,
 190             'categories': [category] if category else None,
 191         }
 192         info.update(self._extract_streamer_info(vod_detail_info))
 193         return info