]> asedeno.scripts.mit.edu Git - youtube-dl.git/blob - youtube_dl/extractor/picarto.py
[twitch:clips] Add access token query to download URLs (closes #29136)
[youtube-dl.git] / youtube_dl / extractor / picarto.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     ExtractorError,
7     js_to_json,
8 )
9
10
11 class PicartoIE(InfoExtractor):
12     _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
13     _TEST = {
14         'url': 'https://picarto.tv/Setz',
15         'info_dict': {
16             'id': 'Setz',
17             'ext': 'mp4',
18             'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
19             'timestamp': int,
20             'is_live': True
21         },
22         'skip': 'Stream is offline',
23     }
24
25     @classmethod
26     def suitable(cls, url):
27         return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
28
29     def _real_extract(self, url):
30         channel_id = self._match_id(url)
31
32         data = self._download_json(
33             'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
34                 'query': '''{
35   channel(name: "%s") {
36     adult
37     id
38     online
39     stream_name
40     title
41   }
42   getLoadBalancerUrl(channel_name: "%s") {
43     url
44   }
45 }''' % (channel_id, channel_id),
46             })['data']
47         metadata = data['channel']
48
49         if metadata.get('online') == 0:
50             raise ExtractorError('Stream is offline', expected=True)
51         title = metadata['title']
52
53         cdn_data = self._download_json(
54             data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
55             channel_id, 'Downloading load balancing info')
56
57         formats = []
58         for source in (cdn_data.get('source') or []):
59             source_url = source.get('url')
60             if not source_url:
61                 continue
62             source_type = source.get('type')
63             if source_type == 'html5/application/vnd.apple.mpegurl':
64                 formats.extend(self._extract_m3u8_formats(
65                     source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
66             elif source_type == 'html5/video/mp4':
67                 formats.append({
68                     'url': source_url,
69                 })
70         self._sort_formats(formats)
71
72         mature = metadata.get('adult')
73         if mature is None:
74             age_limit = None
75         else:
76             age_limit = 18 if mature is True else 0
77
78         return {
79             'id': channel_id,
80             'title': self._live_title(title.strip()),
81             'is_live': True,
82             'channel': channel_id,
83             'channel_id': metadata.get('id'),
84             'channel_url': 'https://picarto.tv/%s' % channel_id,
85             'age_limit': age_limit,
86             'formats': formats,
87         }
88
89
90 class PicartoVodIE(InfoExtractor):
91     _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
92     _TESTS = [{
93         'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
94         'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
95         'info_dict': {
96             'id': 'ArtofZod_2017.12.12.00.13.23.flv',
97             'ext': 'mp4',
98             'title': 'ArtofZod_2017.12.12.00.13.23.flv',
99             'thumbnail': r're:^https?://.*\.jpg'
100         },
101     }, {
102         'url': 'https://picarto.tv/videopopout/Plague',
103         'only_matching': True,
104     }]
105
106     def _real_extract(self, url):
107         video_id = self._match_id(url)
108
109         webpage = self._download_webpage(url, video_id)
110
111         vod_info = self._parse_json(
112             self._search_regex(
113                 r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
114                 video_id),
115             video_id, transform_source=js_to_json)
116
117         formats = self._extract_m3u8_formats(
118             vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
119             m3u8_id='hls')
120         self._sort_formats(formats)
121
122         return {
123             'id': video_id,
124             'title': video_id,
125             'thumbnail': vod_info.get('vodThumb'),
126             'formats': formats,
127         }