]> asedeno.scripts.mit.edu Git - youtube-dl.git/blob - youtube_dl/extractor/streamcz.py
97b2eb7f8d89a67590a4f3b5b2f88a1893915bd0
[youtube-dl.git] / youtube_dl / extractor / streamcz.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import json
5 import re
6
7 from .common import InfoExtractor
8 from ..utils import (
9     float_or_none,
10     int_or_none,
11     merge_dicts,
12     parse_codecs,
13     urljoin,
14 )
15
16
17 class StreamCZIE(InfoExtractor):
18     _VALID_URL = r'https?://(?:www\.)?(?:stream|televizeseznam)\.cz/[^?#]+/(?P<display_id>[^?#]+)-(?P<id>[0-9]+)'
19     _TESTS = [{
20         'url': 'https://www.televizeseznam.cz/video/lajna/buh-57953890',
21         'md5': '40c41ade1464a390a0b447e333df4239',
22         'info_dict': {
23             'id': '57953890',
24             'ext': 'mp4',
25             'title': 'Bůh',
26             'display_id': 'buh',
27             'description': 'md5:8f5f09b9b7bc67df910486cdd88f7165',
28             'duration': 1369.6,
29             'view_count': int,
30         }
31     }, {
32         'url': 'https://www.stream.cz/kdo-to-mluvi/kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna-64087937',
33         'md5': '41fd358000086a1ccdb068c77809b158',
34         'info_dict': {
35             'id': '64087937',
36             'ext': 'mp4',
37             'title': 'Kdo to mluví? Velké odhalení přináší nový pořad už od 25. srpna',
38             'display_id': 'kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna',
39             'description': 'md5:97a811000a6460266029d6c1c2ebcd59',
40             'duration': 50.2,
41             'view_count': int,
42         }
43     }, {
44         'url': 'https://www.stream.cz/tajemno/znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili-64147267',
45         'md5': '3ee4d0be040e8f4a543e67e509d55e3f',
46         'info_dict': {
47             'id': '64147267',
48             'ext': 'mp4',
49             'title': 'Zničehonic jim skrz střechu prolítnul záhadný předmět. Badatelé vše objasnili',
50             'display_id': 'znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili',
51             'description': 'md5:4b8ada6718d34bb011c4e04ca4bc19bf',
52             'duration': 442.84,
53             'view_count': int,
54         }
55     }]
56
57     def _extract_formats(self, spl_url, video):
58         for ext, pref, streams in (
59                 ('ts', -1, video.get('http_stream', {}).get('qualities', {})),
60                 ('mp4', 1, video.get('mp4'))):
61             for format_id, stream in streams.items():
62                 if not stream.get('url'):
63                     continue
64                 yield merge_dicts({
65                     'format_id': '-'.join((format_id, ext)),
66                     'ext': ext,
67                     'source_preference': pref,
68                     'url': urljoin(spl_url, stream['url']),
69                     'tbr': float_or_none(stream.get('bandwidth'), scale=1000),
70                     'duration': float_or_none(stream.get('duration'), scale=1000),
71                     'width': stream.get('resolution', 2 * [0])[0] or None,
72                     'height': stream.get('resolution', 2 * [0])[1] or int_or_none(format_id.replace('p', '')),
73                 }, parse_codecs(stream.get('codec')))
74
75     def _real_extract(self, url):
76         display_id, video_id = re.match(self._VALID_URL, url).groups()
77
78         data = self._download_json(
79             'https://www.televizeseznam.cz/api/graphql', video_id, 'Downloading GraphQL result',
80             data=json.dumps({
81                 'variables': {'urlName': video_id},
82                 'query': '''
83                     query LoadEpisode($urlName : String){ episode(urlName: $urlName){ ...VideoDetailFragmentOnEpisode } }
84                     fragment VideoDetailFragmentOnEpisode on Episode {
85                         id
86                         spl
87                         urlName
88                         name
89                         perex
90                         duration
91                         views
92                     }'''
93             }).encode('utf-8'),
94             headers={'Content-Type': 'application/json;charset=UTF-8'}
95         )['data']['episode']
96
97         spl_url = data['spl'] + 'spl2,3'
98         metadata = self._download_json(spl_url, video_id, 'Downloading playlist')
99         if 'Location' in metadata and 'data' not in metadata:
100             spl_url = metadata['Location']
101             metadata = self._download_json(spl_url, video_id, 'Downloading redirected playlist')
102         video = metadata['data']
103
104         subtitles = {}
105         for subs in video.get('subtitles', {}).values():
106             if not subs.get('language'):
107                 continue
108             for ext, sub_url in subs.get('urls').items():
109                 subtitles.setdefault(subs['language'], []).append({
110                     'ext': ext,
111                     'url': urljoin(spl_url, sub_url)
112                 })
113
114         formats = list(self._extract_formats(spl_url, video))
115         self._sort_formats(formats)
116
117         return {
118             'id': video_id,
119             'display_id': display_id,
120             'title': data.get('name'),
121             'description': data.get('perex'),
122             'duration': float_or_none(data.get('duration')),
123             'view_count': int_or_none(data.get('views')),
124             'formats': formats,
125             'subtitles': subtitles,
126         }