]> asedeno.scripts.mit.edu Git - youtube-dl.git/blob - youtube_dl/extractor/trovo.py
[trovo] Add new extractor(closes #26125)
[youtube-dl.git] / youtube_dl / extractor / trovo.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import json
5
6 from .common import InfoExtractor
7 from ..utils import (
8     ExtractorError,
9     int_or_none,
10     str_or_none,
11     try_get,
12 )
13
14
15 class TrovoBaseIE(InfoExtractor):
16     _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
17
18     def _extract_streamer_info(self, data):
19         streamer_info = data.get('streamerInfo') or {}
20         username = streamer_info.get('userName')
21         return {
22             'uploader': streamer_info.get('nickName'),
23             'uploader_id': str_or_none(streamer_info.get('uid')),
24             'uploader_url': 'https://trovo.live/' + username if username else None,
25         }
26
27
28 class TrovoIE(TrovoBaseIE):
29     _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
30
31     def _real_extract(self, url):
32         username = self._match_id(url)
33         live_info = self._download_json(
34             'https://gql.trovo.live/', username, query={
35                 'query': '''{
36   getLiveInfo(params: {userName: "%s"}) {
37     isLive
38     programInfo {
39       coverUrl
40       id
41       streamInfo {
42         desc
43         playUrl
44       }
45       title
46     }
47     streamerInfo {
48         nickName
49         uid
50         userName
51     }
52   }
53 }''' % username,
54             })['data']['getLiveInfo']
55         if live_info.get('isLive') == 0:
56             raise ExtractorError('%s is offline' % username, expected=True)
57         program_info = live_info['programInfo']
58         program_id = program_info['id']
59         title = self._live_title(program_info['title'])
60
61         formats = []
62         for stream_info in (program_info.get('streamInfo') or []):
63             play_url = stream_info.get('playUrl')
64             if not play_url:
65                 continue
66             format_id = stream_info.get('desc')
67             formats.append({
68                 'format_id': format_id,
69                 'height': int_or_none(format_id[:-1]) if format_id else None,
70                 'url': play_url,
71             })
72         self._sort_formats(formats)
73
74         info = {
75             'id': program_id,
76             'title': title,
77             'formats': formats,
78             'thumbnail': program_info.get('coverUrl'),
79             'is_live': True,
80         }
81         info.update(self._extract_streamer_info(live_info))
82         return info
83
84
85 class TrovoVodIE(TrovoBaseIE):
86     _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
87     _TESTS = [{
88         'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
89         'info_dict': {
90             'id': 'ltv-100095501_100095501_1609596043',
91             'ext': 'mp4',
92             'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
93             'uploader': 'Exsl',
94             'timestamp': 1609640305,
95             'upload_date': '20210103',
96             'uploader_id': '100095501',
97             'duration': 43977,
98             'view_count': int,
99             'like_count': int,
100             'comment_count': int,
101             'comments': 'mincount:8',
102             'categories': ['Grand Theft Auto V'],
103         },
104     }, {
105         'url': 'https://trovo.live/clip/lc-5285890810184026005',
106         'only_matching': True,
107     }]
108
109     def _real_extract(self, url):
110         vid = self._match_id(url)
111         resp = self._download_json(
112             'https://gql.trovo.live/', vid, data=json.dumps([{
113                 'query': '''{
114   batchGetVodDetailInfo(params: {vids: ["%s"]}) {
115     VodDetailInfos
116   }
117 }''' % vid,
118             }, {
119                 'query': '''{
120   getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
121     commentList {
122       author {
123         nickName
124         uid
125       }
126       commentID
127       content
128       createdAt
129       parentID
130     }
131   }
132 }''' % vid,
133             }]).encode(), headers={
134                 'Content-Type': 'application/json',
135             })
136         vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
137         vod_info = vod_detail_info['vodInfo']
138         title = vod_info['title']
139
140         language = vod_info.get('languageName')
141         formats = []
142         for play_info in (vod_info.get('playInfos') or []):
143             play_url = play_info.get('playUrl')
144             if not play_url:
145                 continue
146             format_id = play_info.get('desc')
147             formats.append({
148                 'ext': 'mp4',
149                 'filesize': int_or_none(play_info.get('fileSize')),
150                 'format_id': format_id,
151                 'height': int_or_none(format_id[:-1]) if format_id else None,
152                 'language': language,
153                 'protocol': 'm3u8_native',
154                 'tbr': int_or_none(play_info.get('bitrate')),
155                 'url': play_url,
156             })
157         self._sort_formats(formats)
158
159         category = vod_info.get('categoryName')
160         get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
161
162         comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
163         comments = []
164         for comment in comment_list:
165             content = comment.get('content')
166             if not content:
167                 continue
168             author = comment.get('author') or {}
169             parent = comment.get('parentID')
170             comments.append({
171                 'author': author.get('nickName'),
172                 'author_id': str_or_none(author.get('uid')),
173                 'id': str_or_none(comment.get('commentID')),
174                 'text': content,
175                 'timestamp': int_or_none(comment.get('createdAt')),
176                 'parent': 'root' if parent == 0 else str_or_none(parent),
177             })
178
179         info = {
180             'id': vid,
181             'title': title,
182             'formats': formats,
183             'thumbnail': vod_info.get('coverUrl'),
184             'timestamp': int_or_none(vod_info.get('publishTs')),
185             'duration': int_or_none(vod_info.get('duration')),
186             'view_count': get_count('watch'),
187             'like_count': get_count('like'),
188             'comment_count': get_count('comment'),
189             'comments': comments,
190             'categories': [category] if category else None,
191         }
192         info.update(self._extract_streamer_info(vod_detail_info))
193         return info