]> asedeno.scripts.mit.edu Git - youtube-dl.git/blob - youtube_dl/extractor/youtube.py
052926ee53bde137eabfb4b91c9fbcab9fc0b468
[youtube-dl.git] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5
6 import itertools
7 import json
8 import os.path
9 import random
10 import re
11 import time
12 import traceback
13
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
18     compat_chr,
19     compat_parse_qs,
20     compat_urllib_parse_unquote,
21     compat_urllib_parse_unquote_plus,
22     compat_urllib_parse_urlencode,
23     compat_urllib_parse_urlparse,
24     compat_urlparse,
25     compat_str,
26 )
27 from ..utils import (
28     bool_or_none,
29     clean_html,
30     error_to_compat_str,
31     ExtractorError,
32     float_or_none,
33     get_element_by_id,
34     int_or_none,
35     mimetype2ext,
36     parse_codecs,
37     parse_duration,
38     remove_quotes,
39     remove_start,
40     smuggle_url,
41     str_or_none,
42     str_to_int,
43     try_get,
44     unescapeHTML,
45     unified_strdate,
46     unsmuggle_url,
47     update_url_query,
48     uppercase_escape,
49     url_or_none,
50     urlencode_postdata,
51     urljoin,
52 )
53
54
55 class YoutubeBaseInfoExtractor(InfoExtractor):
56     """Provide base functions for Youtube extractors"""
57     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
58     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
59
60     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
61     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
62     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
63
64     _NETRC_MACHINE = 'youtube'
65     # If True it will raise an error if no login info is provided
66     _LOGIN_REQUIRED = False
67
68     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
69
70     def _set_language(self):
71         self._set_cookie(
72             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
73             # YouTube sets the expire time to about two months
74             expire_time=time.time() + 2 * 30 * 24 * 3600)
75
76     def _ids_to_results(self, ids):
77         return [
78             self.url_result(vid_id, 'Youtube', video_id=vid_id)
79             for vid_id in ids]
80
81     def _login(self):
82         """
83         Attempt to log in to YouTube.
84         True is returned if successful or skipped.
85         False is returned if login failed.
86
87         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
88         """
89         username, password = self._get_login_info()
90         # No authentication to be performed
91         if username is None:
92             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
93                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
94             return True
95
96         login_page = self._download_webpage(
97             self._LOGIN_URL, None,
98             note='Downloading login page',
99             errnote='unable to fetch login page', fatal=False)
100         if login_page is False:
101             return
102
103         login_form = self._hidden_inputs(login_page)
104
105         def req(url, f_req, note, errnote):
106             data = login_form.copy()
107             data.update({
108                 'pstMsg': 1,
109                 'checkConnection': 'youtube',
110                 'checkedDomains': 'youtube',
111                 'hl': 'en',
112                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
113                 'f.req': json.dumps(f_req),
114                 'flowName': 'GlifWebSignIn',
115                 'flowEntry': 'ServiceLogin',
116                 # TODO: reverse actual botguard identifier generation algo
117                 'bgRequest': '["identifier",""]',
118             })
119             return self._download_json(
120                 url, None, note=note, errnote=errnote,
121                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
122                 fatal=False,
123                 data=urlencode_postdata(data), headers={
124                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
125                     'Google-Accounts-XSRF': 1,
126                 })
127
128         def warn(message):
129             self._downloader.report_warning(message)
130
131         lookup_req = [
132             username,
133             None, [], None, 'US', None, None, 2, False, True,
134             [
135                 None, None,
136                 [2, 1, None, 1,
137                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
138                  None, [], 4],
139                 1, [None, None, []], None, None, None, True
140             ],
141             username,
142         ]
143
144         lookup_results = req(
145             self._LOOKUP_URL, lookup_req,
146             'Looking up account info', 'Unable to look up account info')
147
148         if lookup_results is False:
149             return False
150
151         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
152         if not user_hash:
153             warn('Unable to extract user hash')
154             return False
155
156         challenge_req = [
157             user_hash,
158             None, 1, None, [1, None, None, None, [password, None, True]],
159             [
160                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
161                 1, [None, None, []], None, None, None, True
162             ]]
163
164         challenge_results = req(
165             self._CHALLENGE_URL, challenge_req,
166             'Logging in', 'Unable to log in')
167
168         if challenge_results is False:
169             return
170
171         login_res = try_get(challenge_results, lambda x: x[0][5], list)
172         if login_res:
173             login_msg = try_get(login_res, lambda x: x[5], compat_str)
174             warn(
175                 'Unable to login: %s' % 'Invalid password'
176                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
177             return False
178
179         res = try_get(challenge_results, lambda x: x[0][-1], list)
180         if not res:
181             warn('Unable to extract result entry')
182             return False
183
184         login_challenge = try_get(res, lambda x: x[0][0], list)
185         if login_challenge:
186             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
187             if challenge_str == 'TWO_STEP_VERIFICATION':
188                 # SEND_SUCCESS - TFA code has been successfully sent to phone
189                 # QUOTA_EXCEEDED - reached the limit of TFA codes
190                 status = try_get(login_challenge, lambda x: x[5], compat_str)
191                 if status == 'QUOTA_EXCEEDED':
192                     warn('Exceeded the limit of TFA codes, try later')
193                     return False
194
195                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
196                 if not tl:
197                     warn('Unable to extract TL')
198                     return False
199
200                 tfa_code = self._get_tfa_info('2-step verification code')
201
202                 if not tfa_code:
203                     warn(
204                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
205                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
206                     return False
207
208                 tfa_code = remove_start(tfa_code, 'G-')
209
210                 tfa_req = [
211                     user_hash, None, 2, None,
212                     [
213                         9, None, None, None, None, None, None, None,
214                         [None, tfa_code, True, 2]
215                     ]]
216
217                 tfa_results = req(
218                     self._TFA_URL.format(tl), tfa_req,
219                     'Submitting TFA code', 'Unable to submit TFA code')
220
221                 if tfa_results is False:
222                     return False
223
224                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
225                 if tfa_res:
226                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
227                     warn(
228                         'Unable to finish TFA: %s' % 'Invalid TFA code'
229                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
230                     return False
231
232                 check_cookie_url = try_get(
233                     tfa_results, lambda x: x[0][-1][2], compat_str)
234             else:
235                 CHALLENGES = {
236                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
237                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
238                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
239                 }
240                 challenge = CHALLENGES.get(
241                     challenge_str,
242                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
243                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
244                 return False
245         else:
246             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
247
248         if not check_cookie_url:
249             warn('Unable to extract CheckCookie URL')
250             return False
251
252         check_cookie_results = self._download_webpage(
253             check_cookie_url, None, 'Checking cookie', fatal=False)
254
255         if check_cookie_results is False:
256             return False
257
258         if 'https://myaccount.google.com/' not in check_cookie_results:
259             warn('Unable to log in')
260             return False
261
262         return True
263
264     def _real_initialize(self):
265         if self._downloader is None:
266             return
267         self._set_language()
268         if not self._login():
269             return
270
271     _DEFAULT_API_DATA = {
272         'context': {
273             'client': {
274                 'clientName': 'WEB',
275                 'clientVersion': '2.20201021.03.00',
276             }
277         },
278     }
279
280     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
281     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
282
283     def _call_api(self, ep, query, video_id):
284         data = self._DEFAULT_API_DATA.copy()
285         data.update(query)
286
287         response = self._download_json(
288             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
289             note='Downloading API JSON', errnote='Unable to download API page',
290             data=json.dumps(data).encode('utf8'),
291             headers={'content-type': 'application/json'},
292             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
293
294         return response
295
296     def _extract_yt_initial_data(self, video_id, webpage):
297         return self._parse_json(
298             self._search_regex(
299                 (r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
300                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
301             video_id)
302
303
304 class YoutubeIE(YoutubeBaseInfoExtractor):
305     IE_DESC = 'YouTube.com'
306     _VALID_URL = r"""(?x)^
307                      (
308                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
309                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
310                             (?:www\.)?deturl\.com/www\.youtube\.com/|
311                             (?:www\.)?pwnyoutube\.com/|
312                             (?:www\.)?hooktube\.com/|
313                             (?:www\.)?yourepeat\.com/|
314                             tube\.majestyc\.net/|
315                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
316                             (?:(?:www|dev)\.)?invidio\.us/|
317                             (?:(?:www|no)\.)?invidiou\.sh/|
318                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
319                             (?:www\.)?invidious\.kabi\.tk/|
320                             (?:www\.)?invidious\.13ad\.de/|
321                             (?:www\.)?invidious\.mastodon\.host/|
322                             (?:www\.)?invidious\.nixnet\.xyz/|
323                             (?:www\.)?invidious\.drycat\.fr/|
324                             (?:www\.)?tube\.poal\.co/|
325                             (?:www\.)?vid\.wxzm\.sx/|
326                             (?:www\.)?yewtu\.be/|
327                             (?:www\.)?yt\.elukerio\.org/|
328                             (?:www\.)?yt\.lelux\.fi/|
329                             (?:www\.)?invidious\.ggc-project\.de/|
330                             (?:www\.)?yt\.maisputain\.ovh/|
331                             (?:www\.)?invidious\.13ad\.de/|
332                             (?:www\.)?invidious\.toot\.koeln/|
333                             (?:www\.)?invidious\.fdn\.fr/|
334                             (?:www\.)?watch\.nettohikari\.com/|
335                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
336                             (?:www\.)?qklhadlycap4cnod\.onion/|
337                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
338                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
339                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
340                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
341                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
342                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
343                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
344                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
345                          (?:                                                  # the various things that can precede the ID:
346                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
347                              |(?:                                             # or the v= param in all its forms
348                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
349                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
350                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
351                                  v=
352                              )
353                          ))
354                          |(?:
355                             youtu\.be|                                        # just youtu.be/xxxx
356                             vid\.plus|                                        # or vid.plus/xxxx
357                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
358                          )/
359                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
360                          )
361                      )?                                                       # all until now is optional -> you can pass the naked ID
362                      (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
363                      (?!.*?\blist=
364                         (?:
365                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
366                             WL                                                # WL are handled by the watch later IE
367                         )
368                      )
369                      (?(1).+)?                                                # if we found the ID, everything can follow
370                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
371     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
372     _PLAYER_INFO_RE = (
373         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
374         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
375     )
376     _formats = {
377         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
378         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
379         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
380         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
381         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
382         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
383         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
384         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
385         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
386         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
387         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
388         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
389         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
390         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
391         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
392         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
393         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
394         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
395
396
397         # 3D videos
398         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
399         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
400         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
401         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
402         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
403         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
404         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
405
406         # Apple HTTP Live Streaming
407         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
408         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
409         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
410         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
411         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
412         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
413         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
414         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
415
416         # DASH mp4 video
417         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
418         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
419         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
420         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
421         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
422         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
423         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
424         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
425         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
426         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
427         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
428         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
429
430         # Dash mp4 audio
431         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
432         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
433         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
434         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
435         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
436         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
437         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
438
439         # Dash webm
440         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
441         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
442         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
443         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
444         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
445         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
446         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
447         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
448         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
449         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
450         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
451         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
452         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
453         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
454         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
455         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
456         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
457         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
458         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
459         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
460         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
461         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
462
463         # Dash webm audio
464         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
465         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
466
467         # Dash webm audio with opus inside
468         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
469         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
470         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
471
472         # RTMP (unnamed)
473         '_rtmp': {'protocol': 'rtmp'},
474
475         # av01 video only formats sometimes served with "unknown" codecs
476         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
477         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
478         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
479         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
480     }
481     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
482
483     _GEO_BYPASS = False
484
485     IE_NAME = 'youtube'
486     _TESTS = [
487         {
488             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
489             'info_dict': {
490                 'id': 'BaW_jenozKc',
491                 'ext': 'mp4',
492                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
493                 'uploader': 'Philipp Hagemeister',
494                 'uploader_id': 'phihag',
495                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
496                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
497                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
498                 'upload_date': '20121002',
499                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
500                 'categories': ['Science & Technology'],
501                 'tags': ['youtube-dl'],
502                 'duration': 10,
503                 'view_count': int,
504                 'like_count': int,
505                 'dislike_count': int,
506                 'start_time': 1,
507                 'end_time': 9,
508             }
509         },
510         {
511             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
512             'note': 'Embed-only video (#1746)',
513             'info_dict': {
514                 'id': 'yZIXLfi8CZQ',
515                 'ext': 'mp4',
516                 'upload_date': '20120608',
517                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
518                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
519                 'uploader': 'SET India',
520                 'uploader_id': 'setindia',
521                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
522                 'age_limit': 18,
523             }
524         },
525         {
526             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
527             'note': 'Use the first video ID in the URL',
528             'info_dict': {
529                 'id': 'BaW_jenozKc',
530                 'ext': 'mp4',
531                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
532                 'uploader': 'Philipp Hagemeister',
533                 'uploader_id': 'phihag',
534                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
535                 'upload_date': '20121002',
536                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
537                 'categories': ['Science & Technology'],
538                 'tags': ['youtube-dl'],
539                 'duration': 10,
540                 'view_count': int,
541                 'like_count': int,
542                 'dislike_count': int,
543             },
544             'params': {
545                 'skip_download': True,
546             },
547         },
548         {
549             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
550             'note': '256k DASH audio (format 141) via DASH manifest',
551             'info_dict': {
552                 'id': 'a9LDPn-MO4I',
553                 'ext': 'm4a',
554                 'upload_date': '20121002',
555                 'uploader_id': '8KVIDEO',
556                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
557                 'description': '',
558                 'uploader': '8KVIDEO',
559                 'title': 'UHDTV TEST 8K VIDEO.mp4'
560             },
561             'params': {
562                 'youtube_include_dash_manifest': True,
563                 'format': '141',
564             },
565             'skip': 'format 141 not served anymore',
566         },
567         # DASH manifest with encrypted signature
568         {
569             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
570             'info_dict': {
571                 'id': 'IB3lcPjvWLA',
572                 'ext': 'm4a',
573                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
574                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
575                 'duration': 244,
576                 'uploader': 'AfrojackVEVO',
577                 'uploader_id': 'AfrojackVEVO',
578                 'upload_date': '20131011',
579             },
580             'params': {
581                 'youtube_include_dash_manifest': True,
582                 'format': '141/bestaudio[ext=m4a]',
583             },
584         },
585         # Controversy video
586         {
587             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
588             'info_dict': {
589                 'id': 'T4XJQO3qol8',
590                 'ext': 'mp4',
591                 'duration': 219,
592                 'upload_date': '20100909',
593                 'uploader': 'Amazing Atheist',
594                 'uploader_id': 'TheAmazingAtheist',
595                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
596                 'title': 'Burning Everyone\'s Koran',
597                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
598             }
599         },
600         # Normal age-gate video (No vevo, embed allowed), available via embed page
601         {
602             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
603             'info_dict': {
604                 'id': 'HtVdAasjOgU',
605                 'ext': 'mp4',
606                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
607                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
608                 'duration': 142,
609                 'uploader': 'The Witcher',
610                 'uploader_id': 'WitcherGame',
611                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
612                 'upload_date': '20140605',
613                 'age_limit': 18,
614             },
615         },
616         {
617             # Age-gated video only available with authentication (unavailable
618             # via embed page workaround)
619             'url': 'XgnwCQzjau8',
620             'only_matching': True,
621         },
622         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
623         # YouTube Red ad is not captured for creator
624         {
625             'url': '__2ABJjxzNo',
626             'info_dict': {
627                 'id': '__2ABJjxzNo',
628                 'ext': 'mp4',
629                 'duration': 266,
630                 'upload_date': '20100430',
631                 'uploader_id': 'deadmau5',
632                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
633                 'creator': 'Dada Life, deadmau5',
634                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
635                 'uploader': 'deadmau5',
636                 'title': 'Deadmau5 - Some Chords (HD)',
637                 'alt_title': 'This Machine Kills Some Chords',
638             },
639             'expected_warnings': [
640                 'DASH manifest missing',
641             ]
642         },
643         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
644         {
645             'url': 'lqQg6PlCWgI',
646             'info_dict': {
647                 'id': 'lqQg6PlCWgI',
648                 'ext': 'mp4',
649                 'duration': 6085,
650                 'upload_date': '20150827',
651                 'uploader_id': 'olympic',
652                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
653                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
654                 'uploader': 'Olympic',
655                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
656             },
657             'params': {
658                 'skip_download': 'requires avconv',
659             }
660         },
661         # Non-square pixels
662         {
663             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
664             'info_dict': {
665                 'id': '_b-2C3KPAM0',
666                 'ext': 'mp4',
667                 'stretched_ratio': 16 / 9.,
668                 'duration': 85,
669                 'upload_date': '20110310',
670                 'uploader_id': 'AllenMeow',
671                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
672                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
673                 'uploader': '孫ᄋᄅ',
674                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
675             },
676         },
677         # url_encoded_fmt_stream_map is empty string
678         {
679             'url': 'qEJwOuvDf7I',
680             'info_dict': {
681                 'id': 'qEJwOuvDf7I',
682                 'ext': 'webm',
683                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
684                 'description': '',
685                 'upload_date': '20150404',
686                 'uploader_id': 'spbelect',
687                 'uploader': 'Наблюдатели Петербурга',
688             },
689             'params': {
690                 'skip_download': 'requires avconv',
691             },
692             'skip': 'This live event has ended.',
693         },
694         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
695         {
696             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
697             'info_dict': {
698                 'id': 'FIl7x6_3R5Y',
699                 'ext': 'webm',
700                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
701                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
702                 'duration': 220,
703                 'upload_date': '20150625',
704                 'uploader_id': 'dorappi2000',
705                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
706                 'uploader': 'dorappi2000',
707                 'formats': 'mincount:31',
708             },
709             'skip': 'not actual anymore',
710         },
711         # DASH manifest with segment_list
712         {
713             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
714             'md5': '8ce563a1d667b599d21064e982ab9e31',
715             'info_dict': {
716                 'id': 'CsmdDsKjzN8',
717                 'ext': 'mp4',
718                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
719                 'uploader': 'Airtek',
720                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
721                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
722                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
723             },
724             'params': {
725                 'youtube_include_dash_manifest': True,
726                 'format': '135',  # bestvideo
727             },
728             'skip': 'This live event has ended.',
729         },
730         {
731             # Multifeed videos (multiple cameras), URL is for Main Camera
732             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
733             'info_dict': {
734                 'id': 'jqWvoWXjCVs',
735                 'title': 'teamPGP: Rocket League Noob Stream',
736                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
737             },
738             'playlist': [{
739                 'info_dict': {
740                     'id': 'jqWvoWXjCVs',
741                     'ext': 'mp4',
742                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
743                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
744                     'duration': 7335,
745                     'upload_date': '20150721',
746                     'uploader': 'Beer Games Beer',
747                     'uploader_id': 'beergamesbeer',
748                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
749                     'license': 'Standard YouTube License',
750                 },
751             }, {
752                 'info_dict': {
753                     'id': '6h8e8xoXJzg',
754                     'ext': 'mp4',
755                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
756                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
757                     'duration': 7337,
758                     'upload_date': '20150721',
759                     'uploader': 'Beer Games Beer',
760                     'uploader_id': 'beergamesbeer',
761                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
762                     'license': 'Standard YouTube License',
763                 },
764             }, {
765                 'info_dict': {
766                     'id': 'PUOgX5z9xZw',
767                     'ext': 'mp4',
768                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
769                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
770                     'duration': 7337,
771                     'upload_date': '20150721',
772                     'uploader': 'Beer Games Beer',
773                     'uploader_id': 'beergamesbeer',
774                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
775                     'license': 'Standard YouTube License',
776                 },
777             }, {
778                 'info_dict': {
779                     'id': 'teuwxikvS5k',
780                     'ext': 'mp4',
781                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
782                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
783                     'duration': 7334,
784                     'upload_date': '20150721',
785                     'uploader': 'Beer Games Beer',
786                     'uploader_id': 'beergamesbeer',
787                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
788                     'license': 'Standard YouTube License',
789                 },
790             }],
791             'params': {
792                 'skip_download': True,
793             },
794             'skip': 'This video is not available.',
795         },
796         {
797             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
798             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
799             'info_dict': {
800                 'id': 'gVfLd0zydlo',
801                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
802             },
803             'playlist_count': 2,
804             'skip': 'Not multifeed anymore',
805         },
806         {
807             'url': 'https://vid.plus/FlRa-iH7PGw',
808             'only_matching': True,
809         },
810         {
811             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
812             'only_matching': True,
813         },
814         {
815             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
816             # Also tests cut-off URL expansion in video description (see
817             # https://github.com/ytdl-org/youtube-dl/issues/1892,
818             # https://github.com/ytdl-org/youtube-dl/issues/8164)
819             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
820             'info_dict': {
821                 'id': 'lsguqyKfVQg',
822                 'ext': 'mp4',
823                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
824                 'alt_title': 'Dark Walk - Position Music',
825                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
826                 'duration': 133,
827                 'upload_date': '20151119',
828                 'uploader_id': 'IronSoulElf',
829                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
830                 'uploader': 'IronSoulElf',
831                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
832                 'track': 'Dark Walk - Position Music',
833                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
834                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
835             },
836             'params': {
837                 'skip_download': True,
838             },
839         },
840         {
841             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
842             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
843             'only_matching': True,
844         },
845         {
846             # Video with yt:stretch=17:0
847             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
848             'info_dict': {
849                 'id': 'Q39EVAstoRM',
850                 'ext': 'mp4',
851                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
852                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
853                 'upload_date': '20151107',
854                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
855                 'uploader': 'CH GAMER DROID',
856             },
857             'params': {
858                 'skip_download': True,
859             },
860             'skip': 'This video does not exist.',
861         },
862         {
863             # Video licensed under Creative Commons
864             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
865             'info_dict': {
866                 'id': 'M4gD1WSo5mA',
867                 'ext': 'mp4',
868                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
869                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
870                 'duration': 721,
871                 'upload_date': '20150127',
872                 'uploader_id': 'BerkmanCenter',
873                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
874                 'uploader': 'The Berkman Klein Center for Internet & Society',
875                 'license': 'Creative Commons Attribution license (reuse allowed)',
876             },
877             'params': {
878                 'skip_download': True,
879             },
880         },
881         {
882             # Channel-like uploader_url
883             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
884             'info_dict': {
885                 'id': 'eQcmzGIKrzg',
886                 'ext': 'mp4',
887                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
888                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
889                 'duration': 4060,
890                 'upload_date': '20151119',
891                 'uploader': 'Bernie Sanders',
892                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
893                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
894                 'license': 'Creative Commons Attribution license (reuse allowed)',
895             },
896             'params': {
897                 'skip_download': True,
898             },
899         },
900         {
901             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
902             'only_matching': True,
903         },
904         {
905             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
906             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
907             'only_matching': True,
908         },
909         {
910             # Rental video preview
911             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
912             'info_dict': {
913                 'id': 'uGpuVWrhIzE',
914                 'ext': 'mp4',
915                 'title': 'Piku - Trailer',
916                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
917                 'upload_date': '20150811',
918                 'uploader': 'FlixMatrix',
919                 'uploader_id': 'FlixMatrixKaravan',
920                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
921                 'license': 'Standard YouTube License',
922             },
923             'params': {
924                 'skip_download': True,
925             },
926             'skip': 'This video is not available.',
927         },
928         {
929             # YouTube Red video with episode data
930             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
931             'info_dict': {
932                 'id': 'iqKdEhx-dD4',
933                 'ext': 'mp4',
934                 'title': 'Isolation - Mind Field (Ep 1)',
935                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
936                 'duration': 2085,
937                 'upload_date': '20170118',
938                 'uploader': 'Vsauce',
939                 'uploader_id': 'Vsauce',
940                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
941                 'series': 'Mind Field',
942                 'season_number': 1,
943                 'episode_number': 1,
944             },
945             'params': {
946                 'skip_download': True,
947             },
948             'expected_warnings': [
949                 'Skipping DASH manifest',
950             ],
951         },
952         {
953             # The following content has been identified by the YouTube community
954             # as inappropriate or offensive to some audiences.
955             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
956             'info_dict': {
957                 'id': '6SJNVb0GnPI',
958                 'ext': 'mp4',
959                 'title': 'Race Differences in Intelligence',
960                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
961                 'duration': 965,
962                 'upload_date': '20140124',
963                 'uploader': 'New Century Foundation',
964                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
965                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
966             },
967             'params': {
968                 'skip_download': True,
969             },
970         },
971         {
972             # itag 212
973             'url': '1t24XAntNCY',
974             'only_matching': True,
975         },
976         {
977             # geo restricted to JP
978             'url': 'sJL6WA-aGkQ',
979             'only_matching': True,
980         },
981         {
982             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
983             'only_matching': True,
984         },
985         {
986             # DRM protected
987             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
988             'only_matching': True,
989         },
990         {
991             # Video with unsupported adaptive stream type formats
992             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
993             'info_dict': {
994                 'id': 'Z4Vy8R84T1U',
995                 'ext': 'mp4',
996                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
997                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
998                 'duration': 433,
999                 'upload_date': '20130923',
1000                 'uploader': 'Amelia Putri Harwita',
1001                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1002                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1003                 'formats': 'maxcount:10',
1004             },
1005             'params': {
1006                 'skip_download': True,
1007                 'youtube_include_dash_manifest': False,
1008             },
1009             'skip': 'not actual anymore',
1010         },
1011         {
1012             # Youtube Music Auto-generated description
1013             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1014             'info_dict': {
1015                 'id': 'MgNrAu2pzNs',
1016                 'ext': 'mp4',
1017                 'title': 'Voyeur Girl',
1018                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1019                 'upload_date': '20190312',
1020                 'uploader': 'Stephen - Topic',
1021                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1022                 'artist': 'Stephen',
1023                 'track': 'Voyeur Girl',
1024                 'album': 'it\'s too much love to know my dear',
1025                 'release_date': '20190313',
1026                 'release_year': 2019,
1027             },
1028             'params': {
1029                 'skip_download': True,
1030             },
1031         },
1032         {
1033             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1034             'only_matching': True,
1035         },
1036         {
1037             # invalid -> valid video id redirection
1038             'url': 'DJztXj2GPfl',
1039             'info_dict': {
1040                 'id': 'DJztXj2GPfk',
1041                 'ext': 'mp4',
1042                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1043                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1044                 'upload_date': '20090125',
1045                 'uploader': 'Prochorowka',
1046                 'uploader_id': 'Prochorowka',
1047                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1048                 'artist': 'Panjabi MC',
1049                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1050                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1051             },
1052             'params': {
1053                 'skip_download': True,
1054             },
1055         },
1056         {
1057             # empty description results in an empty string
1058             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1059             'info_dict': {
1060                 'id': 'x41yOUIvK2k',
1061                 'ext': 'mp4',
1062                 'title': 'IMG 3456',
1063                 'description': '',
1064                 'upload_date': '20170613',
1065                 'uploader_id': 'ElevageOrVert',
1066                 'uploader': 'ElevageOrVert',
1067             },
1068             'params': {
1069                 'skip_download': True,
1070             },
1071         },
1072         {
1073             # with '};' inside yt initial data (see [1])
1074             # see [2] for an example with '};' inside ytInitialPlayerResponse
1075             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1076             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1077             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1078             'info_dict': {
1079                 'id': 'CHqg6qOn4no',
1080                 'ext': 'mp4',
1081                 'title': 'Part 77   Sort a list of simple types in c#',
1082                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1083                 'upload_date': '20130831',
1084                 'uploader_id': 'kudvenkat',
1085                 'uploader': 'kudvenkat',
1086             },
1087             'params': {
1088                 'skip_download': True,
1089             },
1090         },
1091     ]
1092
1093     def __init__(self, *args, **kwargs):
1094         super(YoutubeIE, self).__init__(*args, **kwargs)
1095         self._player_cache = {}
1096
1097     def report_video_info_webpage_download(self, video_id):
1098         """Report attempt to download video info webpage."""
1099         self.to_screen('%s: Downloading video info webpage' % video_id)
1100
1101     def report_information_extraction(self, video_id):
1102         """Report attempt to extract video information."""
1103         self.to_screen('%s: Extracting video information' % video_id)
1104
1105     def report_unavailable_format(self, video_id, format):
1106         """Report extracted video URL."""
1107         self.to_screen('%s: Format %s not available' % (video_id, format))
1108
1109     def report_rtmp_download(self):
1110         """Indicate the download will use the RTMP protocol."""
1111         self.to_screen('RTMP download detected')
1112
1113     def _signature_cache_id(self, example_sig):
1114         """ Return a string representation of a signature """
1115         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1116
1117     @classmethod
1118     def _extract_player_info(cls, player_url):
1119         for player_re in cls._PLAYER_INFO_RE:
1120             id_m = re.search(player_re, player_url)
1121             if id_m:
1122                 break
1123         else:
1124             raise ExtractorError('Cannot identify player %r' % player_url)
1125         return id_m.group('ext'), id_m.group('id')
1126
1127     def _extract_signature_function(self, video_id, player_url, example_sig):
1128         player_type, player_id = self._extract_player_info(player_url)
1129
1130         # Read from filesystem cache
1131         func_id = '%s_%s_%s' % (
1132             player_type, player_id, self._signature_cache_id(example_sig))
1133         assert os.path.basename(func_id) == func_id
1134
1135         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1136         if cache_spec is not None:
1137             return lambda s: ''.join(s[i] for i in cache_spec)
1138
1139         download_note = (
1140             'Downloading player %s' % player_url
1141             if self._downloader.params.get('verbose') else
1142             'Downloading %s player %s' % (player_type, player_id)
1143         )
1144         if player_type == 'js':
1145             code = self._download_webpage(
1146                 player_url, video_id,
1147                 note=download_note,
1148                 errnote='Download of %s failed' % player_url)
1149             res = self._parse_sig_js(code)
1150         elif player_type == 'swf':
1151             urlh = self._request_webpage(
1152                 player_url, video_id,
1153                 note=download_note,
1154                 errnote='Download of %s failed' % player_url)
1155             code = urlh.read()
1156             res = self._parse_sig_swf(code)
1157         else:
1158             assert False, 'Invalid player type %r' % player_type
1159
1160         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1161         cache_res = res(test_string)
1162         cache_spec = [ord(c) for c in cache_res]
1163
1164         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1165         return res
1166
1167     def _print_sig_code(self, func, example_sig):
1168         def gen_sig_code(idxs):
1169             def _genslice(start, end, step):
1170                 starts = '' if start == 0 else str(start)
1171                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1172                 steps = '' if step == 1 else (':%d' % step)
1173                 return 's[%s%s%s]' % (starts, ends, steps)
1174
1175             step = None
1176             # Quelch pyflakes warnings - start will be set when step is set
1177             start = '(Never used)'
1178             for i, prev in zip(idxs[1:], idxs[:-1]):
1179                 if step is not None:
1180                     if i - prev == step:
1181                         continue
1182                     yield _genslice(start, prev, step)
1183                     step = None
1184                     continue
1185                 if i - prev in [-1, 1]:
1186                     step = i - prev
1187                     start = prev
1188                     continue
1189                 else:
1190                     yield 's[%d]' % prev
1191             if step is None:
1192                 yield 's[%d]' % i
1193             else:
1194                 yield _genslice(start, i, step)
1195
1196         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1197         cache_res = func(test_string)
1198         cache_spec = [ord(c) for c in cache_res]
1199         expr_code = ' + '.join(gen_sig_code(cache_spec))
1200         signature_id_tuple = '(%s)' % (
1201             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1202         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1203                 '    return %s\n') % (signature_id_tuple, expr_code)
1204         self.to_screen('Extracted signature function:\n' + code)
1205
1206     def _parse_sig_js(self, jscode):
1207         funcname = self._search_regex(
1208             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1209              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1210              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1211              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1212              # Obsolete patterns
1213              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1214              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1215              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1216              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1217              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1218              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1219              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1220              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1221             jscode, 'Initial JS player signature function name', group='sig')
1222
1223         jsi = JSInterpreter(jscode)
1224         initial_function = jsi.extract_function(funcname)
1225         return lambda s: initial_function([s])
1226
1227     def _parse_sig_swf(self, file_contents):
1228         swfi = SWFInterpreter(file_contents)
1229         TARGET_CLASSNAME = 'SignatureDecipher'
1230         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1231         initial_function = swfi.extract_function(searched_class, 'decipher')
1232         return lambda s: initial_function([s])
1233
1234     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1235         """Turn the encrypted s field into a working signature"""
1236
1237         if player_url is None:
1238             raise ExtractorError('Cannot decrypt signature without player_url')
1239
1240         if player_url.startswith('//'):
1241             player_url = 'https:' + player_url
1242         elif not re.match(r'https?://', player_url):
1243             player_url = compat_urlparse.urljoin(
1244                 'https://www.youtube.com', player_url)
1245         try:
1246             player_id = (player_url, self._signature_cache_id(s))
1247             if player_id not in self._player_cache:
1248                 func = self._extract_signature_function(
1249                     video_id, player_url, s
1250                 )
1251                 self._player_cache[player_id] = func
1252             func = self._player_cache[player_id]
1253             if self._downloader.params.get('youtube_print_sig_code'):
1254                 self._print_sig_code(func, s)
1255             return func(s)
1256         except Exception as e:
1257             tb = traceback.format_exc()
1258             raise ExtractorError(
1259                 'Signature extraction failed: ' + tb, cause=e)
1260
1261     def _get_subtitles(self, video_id, webpage):
1262         try:
1263             subs_doc = self._download_xml(
1264                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1265                 video_id, note=False)
1266         except ExtractorError as err:
1267             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1268             return {}
1269
1270         sub_lang_list = {}
1271         for track in subs_doc.findall('track'):
1272             lang = track.attrib['lang_code']
1273             if lang in sub_lang_list:
1274                 continue
1275             sub_formats = []
1276             for ext in self._SUBTITLE_FORMATS:
1277                 params = compat_urllib_parse_urlencode({
1278                     'lang': lang,
1279                     'v': video_id,
1280                     'fmt': ext,
1281                     'name': track.attrib['name'].encode('utf-8'),
1282                 })
1283                 sub_formats.append({
1284                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1285                     'ext': ext,
1286                 })
1287             sub_lang_list[lang] = sub_formats
1288         if not sub_lang_list:
1289             self._downloader.report_warning('video doesn\'t have subtitles')
1290             return {}
1291         return sub_lang_list
1292
1293     def _get_ytplayer_config(self, video_id, webpage):
1294         patterns = (
1295             # User data may contain arbitrary character sequences that may affect
1296             # JSON extraction with regex, e.g. when '};' is contained the second
1297             # regex won't capture the whole JSON. Yet working around by trying more
1298             # concrete regex first keeping in mind proper quoted string handling
1299             # to be implemented in future that will replace this workaround (see
1300             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1301             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1302             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1303             r';ytplayer\.config\s*=\s*({.+?});',
1304         )
1305         config = self._search_regex(
1306             patterns, webpage, 'ytplayer.config', default=None)
1307         if config:
1308             return self._parse_json(
1309                 uppercase_escape(config), video_id, fatal=False)
1310
1311     def _get_automatic_captions(self, video_id, webpage):
1312         """We need the webpage for getting the captions url, pass it as an
1313            argument to speed up the process."""
1314         self.to_screen('%s: Looking for automatic captions' % video_id)
1315         player_config = self._get_ytplayer_config(video_id, webpage)
1316         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1317         if not player_config:
1318             self._downloader.report_warning(err_msg)
1319             return {}
1320         try:
1321             args = player_config['args']
1322             caption_url = args.get('ttsurl')
1323             if caption_url:
1324                 timestamp = args['timestamp']
1325                 # We get the available subtitles
1326                 list_params = compat_urllib_parse_urlencode({
1327                     'type': 'list',
1328                     'tlangs': 1,
1329                     'asrs': 1,
1330                 })
1331                 list_url = caption_url + '&' + list_params
1332                 caption_list = self._download_xml(list_url, video_id)
1333                 original_lang_node = caption_list.find('track')
1334                 if original_lang_node is None:
1335                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1336                     return {}
1337                 original_lang = original_lang_node.attrib['lang_code']
1338                 caption_kind = original_lang_node.attrib.get('kind', '')
1339
1340                 sub_lang_list = {}
1341                 for lang_node in caption_list.findall('target'):
1342                     sub_lang = lang_node.attrib['lang_code']
1343                     sub_formats = []
1344                     for ext in self._SUBTITLE_FORMATS:
1345                         params = compat_urllib_parse_urlencode({
1346                             'lang': original_lang,
1347                             'tlang': sub_lang,
1348                             'fmt': ext,
1349                             'ts': timestamp,
1350                             'kind': caption_kind,
1351                         })
1352                         sub_formats.append({
1353                             'url': caption_url + '&' + params,
1354                             'ext': ext,
1355                         })
1356                     sub_lang_list[sub_lang] = sub_formats
1357                 return sub_lang_list
1358
1359             def make_captions(sub_url, sub_langs):
1360                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1361                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1362                 captions = {}
1363                 for sub_lang in sub_langs:
1364                     sub_formats = []
1365                     for ext in self._SUBTITLE_FORMATS:
1366                         caption_qs.update({
1367                             'tlang': [sub_lang],
1368                             'fmt': [ext],
1369                         })
1370                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1371                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1372                         sub_formats.append({
1373                             'url': sub_url,
1374                             'ext': ext,
1375                         })
1376                     captions[sub_lang] = sub_formats
1377                 return captions
1378
1379             # New captions format as of 22.06.2017
1380             player_response = args.get('player_response')
1381             if player_response and isinstance(player_response, compat_str):
1382                 player_response = self._parse_json(
1383                     player_response, video_id, fatal=False)
1384                 if player_response:
1385                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1386                     base_url = renderer['captionTracks'][0]['baseUrl']
1387                     sub_lang_list = []
1388                     for lang in renderer['translationLanguages']:
1389                         lang_code = lang.get('languageCode')
1390                         if lang_code:
1391                             sub_lang_list.append(lang_code)
1392                     return make_captions(base_url, sub_lang_list)
1393
1394             # Some videos don't provide ttsurl but rather caption_tracks and
1395             # caption_translation_languages (e.g. 20LmZk1hakA)
1396             # Does not used anymore as of 22.06.2017
1397             caption_tracks = args['caption_tracks']
1398             caption_translation_languages = args['caption_translation_languages']
1399             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1400             sub_lang_list = []
1401             for lang in caption_translation_languages.split(','):
1402                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1403                 sub_lang = lang_qs.get('lc', [None])[0]
1404                 if sub_lang:
1405                     sub_lang_list.append(sub_lang)
1406             return make_captions(caption_url, sub_lang_list)
1407         # An extractor error can be raise by the download process if there are
1408         # no automatic captions but there are subtitles
1409         except (KeyError, IndexError, ExtractorError):
1410             self._downloader.report_warning(err_msg)
1411             return {}
1412
1413     def _mark_watched(self, video_id, video_info, player_response):
1414         playback_url = url_or_none(try_get(
1415             player_response,
1416             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1417             video_info, lambda x: x['videostats_playback_base_url'][0]))
1418         if not playback_url:
1419             return
1420         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1421         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1422
1423         # cpn generation algorithm is reverse engineered from base.js.
1424         # In fact it works even with dummy cpn.
1425         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1426         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1427
1428         qs.update({
1429             'ver': ['2'],
1430             'cpn': [cpn],
1431         })
1432         playback_url = compat_urlparse.urlunparse(
1433             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1434
1435         self._download_webpage(
1436             playback_url, video_id, 'Marking watched',
1437             'Unable to mark watched', fatal=False)
1438
1439     @staticmethod
1440     def _extract_urls(webpage):
1441         # Embedded YouTube player
1442         entries = [
1443             unescapeHTML(mobj.group('url'))
1444             for mobj in re.finditer(r'''(?x)
1445             (?:
1446                 <iframe[^>]+?src=|
1447                 data-video-url=|
1448                 <embed[^>]+?src=|
1449                 embedSWF\(?:\s*|
1450                 <object[^>]+data=|
1451                 new\s+SWFObject\(
1452             )
1453             (["\'])
1454                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1455                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1456             \1''', webpage)]
1457
1458         # lazyYT YouTube embed
1459         entries.extend(list(map(
1460             unescapeHTML,
1461             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1462
1463         # Wordpress "YouTube Video Importer" plugin
1464         matches = re.findall(r'''(?x)<div[^>]+
1465             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1466             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1467         entries.extend(m[-1] for m in matches)
1468
1469         return entries
1470
1471     @staticmethod
1472     def _extract_url(webpage):
1473         urls = YoutubeIE._extract_urls(webpage)
1474         return urls[0] if urls else None
1475
1476     @classmethod
1477     def extract_id(cls, url):
1478         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1479         if mobj is None:
1480             raise ExtractorError('Invalid URL: %s' % url)
1481         video_id = mobj.group(2)
1482         return video_id
1483
1484     def _extract_chapters_from_json(self, webpage, video_id, duration):
1485         if not webpage:
1486             return
1487         data = self._extract_yt_initial_data(video_id, webpage)
1488         if not data or not isinstance(data, dict):
1489             return
1490         chapters_list = try_get(
1491             data,
1492             lambda x: x['playerOverlays']
1493                        ['playerOverlayRenderer']
1494                        ['decoratedPlayerBarRenderer']
1495                        ['decoratedPlayerBarRenderer']
1496                        ['playerBar']
1497                        ['chapteredPlayerBarRenderer']
1498                        ['chapters'],
1499             list)
1500         if not chapters_list:
1501             return
1502
1503         def chapter_time(chapter):
1504             return float_or_none(
1505                 try_get(
1506                     chapter,
1507                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1508                     int),
1509                 scale=1000)
1510         chapters = []
1511         for next_num, chapter in enumerate(chapters_list, start=1):
1512             start_time = chapter_time(chapter)
1513             if start_time is None:
1514                 continue
1515             end_time = (chapter_time(chapters_list[next_num])
1516                         if next_num < len(chapters_list) else duration)
1517             if end_time is None:
1518                 continue
1519             title = try_get(
1520                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1521                 compat_str)
1522             chapters.append({
1523                 'start_time': start_time,
1524                 'end_time': end_time,
1525                 'title': title,
1526             })
1527         return chapters
1528
1529     @staticmethod
1530     def _extract_chapters_from_description(description, duration):
1531         if not description:
1532             return None
1533         chapter_lines = re.findall(
1534             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1535             description)
1536         if not chapter_lines:
1537             return None
1538         chapters = []
1539         for next_num, (chapter_line, time_point) in enumerate(
1540                 chapter_lines, start=1):
1541             start_time = parse_duration(time_point)
1542             if start_time is None:
1543                 continue
1544             if start_time > duration:
1545                 break
1546             end_time = (duration if next_num == len(chapter_lines)
1547                         else parse_duration(chapter_lines[next_num][1]))
1548             if end_time is None:
1549                 continue
1550             if end_time > duration:
1551                 end_time = duration
1552             if start_time > end_time:
1553                 break
1554             chapter_title = re.sub(
1555                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1556             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1557             chapters.append({
1558                 'start_time': start_time,
1559                 'end_time': end_time,
1560                 'title': chapter_title,
1561             })
1562         return chapters
1563
1564     def _extract_chapters(self, webpage, description, video_id, duration):
1565         return (self._extract_chapters_from_json(webpage, video_id, duration)
1566                 or self._extract_chapters_from_description(description, duration))
1567
1568     def _real_extract(self, url):
1569         url, smuggled_data = unsmuggle_url(url, {})
1570
1571         proto = (
1572             'http' if self._downloader.params.get('prefer_insecure', False)
1573             else 'https')
1574
1575         start_time = None
1576         end_time = None
1577         parsed_url = compat_urllib_parse_urlparse(url)
1578         for component in [parsed_url.fragment, parsed_url.query]:
1579             query = compat_parse_qs(component)
1580             if start_time is None and 't' in query:
1581                 start_time = parse_duration(query['t'][0])
1582             if start_time is None and 'start' in query:
1583                 start_time = parse_duration(query['start'][0])
1584             if end_time is None and 'end' in query:
1585                 end_time = parse_duration(query['end'][0])
1586
1587         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1588         mobj = re.search(self._NEXT_URL_RE, url)
1589         if mobj:
1590             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1591         video_id = self.extract_id(url)
1592
1593         # Get video webpage
1594         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1595         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1596
1597         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1598         video_id = qs.get('v', [None])[0] or video_id
1599
1600         # Attempt to extract SWF player URL
1601         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1602         if mobj is not None:
1603             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1604         else:
1605             player_url = None
1606
1607         dash_mpds = []
1608
1609         def add_dash_mpd(video_info):
1610             dash_mpd = video_info.get('dashmpd')
1611             if dash_mpd and dash_mpd[0] not in dash_mpds:
1612                 dash_mpds.append(dash_mpd[0])
1613
1614         def add_dash_mpd_pr(pl_response):
1615             dash_mpd = url_or_none(try_get(
1616                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1617                 compat_str))
1618             if dash_mpd and dash_mpd not in dash_mpds:
1619                 dash_mpds.append(dash_mpd)
1620
1621         is_live = None
1622         view_count = None
1623
1624         def extract_view_count(v_info):
1625             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1626
1627         def extract_player_response(player_response, video_id):
1628             pl_response = str_or_none(player_response)
1629             if not pl_response:
1630                 return
1631             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1632             if isinstance(pl_response, dict):
1633                 add_dash_mpd_pr(pl_response)
1634                 return pl_response
1635
1636         player_response = {}
1637
1638         # Get video info
1639         video_info = {}
1640         embed_webpage = None
1641
1642         if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
1643             age_gate = True
1644             # We simulate the access to the video from www.youtube.com/v/{video_id}
1645             # this can be viewed without login into Youtube
1646             url = proto + '://www.youtube.com/embed/%s' % video_id
1647             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1648             data = compat_urllib_parse_urlencode({
1649                 'video_id': video_id,
1650                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1651                 'sts': self._search_regex(
1652                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1653             })
1654             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1655             try:
1656                 video_info_webpage = self._download_webpage(
1657                     video_info_url, video_id,
1658                     note='Refetching age-gated info webpage',
1659                     errnote='unable to download video info webpage')
1660             except ExtractorError:
1661                 video_info_webpage = None
1662             if video_info_webpage:
1663                 video_info = compat_parse_qs(video_info_webpage)
1664                 pl_response = video_info.get('player_response', [None])[0]
1665                 player_response = extract_player_response(pl_response, video_id)
1666                 add_dash_mpd(video_info)
1667                 view_count = extract_view_count(video_info)
1668         else:
1669             age_gate = False
1670             # Try looking directly into the video webpage
1671             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1672             if ytplayer_config:
1673                 args = ytplayer_config['args']
1674                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1675                     # Convert to the same format returned by compat_parse_qs
1676                     video_info = dict((k, [v]) for k, v in args.items())
1677                     add_dash_mpd(video_info)
1678                 # Rental video is not rented but preview is available (e.g.
1679                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1680                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1681                 if not video_info and args.get('ypc_vid'):
1682                     return self.url_result(
1683                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1684                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1685                     is_live = True
1686                 if not player_response:
1687                     player_response = extract_player_response(args.get('player_response'), video_id)
1688             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1689                 add_dash_mpd_pr(player_response)
1690
1691         if not video_info and not player_response:
1692             player_response = extract_player_response(
1693                 self._search_regex(
1694                     (r'%s\s*(?:var\s+meta|</script|\n)' % self._YT_INITIAL_PLAYER_RESPONSE_RE,
1695                      self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
1696                     'initial player response', default='{}'),
1697                 video_id)
1698
1699         def extract_unavailable_message():
1700             messages = []
1701             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1702                 msg = self._html_search_regex(
1703                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1704                     video_webpage, 'unavailable %s' % kind, default=None)
1705                 if msg:
1706                     messages.append(msg)
1707             if messages:
1708                 return '\n'.join(messages)
1709
1710         if not video_info and not player_response:
1711             unavailable_message = extract_unavailable_message()
1712             if not unavailable_message:
1713                 unavailable_message = 'Unable to extract video data'
1714             raise ExtractorError(
1715                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1716
1717         if not isinstance(video_info, dict):
1718             video_info = {}
1719
1720         video_details = try_get(
1721             player_response, lambda x: x['videoDetails'], dict) or {}
1722
1723         microformat = try_get(
1724             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1725
1726         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1727         if not video_title:
1728             self._downloader.report_warning('Unable to extract video title')
1729             video_title = '_'
1730
1731         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1732         if video_description:
1733
1734             def replace_url(m):
1735                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1736                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1737                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1738                     qs = compat_parse_qs(parsed_redir_url.query)
1739                     q = qs.get('q')
1740                     if q and q[0]:
1741                         return q[0]
1742                 return redir_url
1743
1744             description_original = video_description = re.sub(r'''(?x)
1745                 <a\s+
1746                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1747                     (?:title|href)="([^"]+)"\s+
1748                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1749                     class="[^"]*"[^>]*>
1750                 [^<]+\.{3}\s*
1751                 </a>
1752             ''', replace_url, video_description)
1753             video_description = clean_html(video_description)
1754         else:
1755             video_description = video_details.get('shortDescription')
1756             if video_description is None:
1757                 video_description = self._html_search_meta('description', video_webpage)
1758
1759         if not smuggled_data.get('force_singlefeed', False):
1760             if not self._downloader.params.get('noplaylist'):
1761                 multifeed_metadata_list = try_get(
1762                     player_response,
1763                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1764                     compat_str) or try_get(
1765                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1766                 if multifeed_metadata_list:
1767                     entries = []
1768                     feed_ids = []
1769                     for feed in multifeed_metadata_list.split(','):
1770                         # Unquote should take place before split on comma (,) since textual
1771                         # fields may contain comma as well (see
1772                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1773                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1774
1775                         def feed_entry(name):
1776                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1777
1778                         feed_id = feed_entry('id')
1779                         if not feed_id:
1780                             continue
1781                         feed_title = feed_entry('title')
1782                         title = video_title
1783                         if feed_title:
1784                             title += ' (%s)' % feed_title
1785                         entries.append({
1786                             '_type': 'url_transparent',
1787                             'ie_key': 'Youtube',
1788                             'url': smuggle_url(
1789                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1790                                 {'force_singlefeed': True}),
1791                             'title': title,
1792                         })
1793                         feed_ids.append(feed_id)
1794                     self.to_screen(
1795                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1796                         % (', '.join(feed_ids), video_id))
1797                     return self.playlist_result(entries, video_id, video_title, video_description)
1798             else:
1799                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1800
1801         if view_count is None:
1802             view_count = extract_view_count(video_info)
1803         if view_count is None and video_details:
1804             view_count = int_or_none(video_details.get('viewCount'))
1805         if view_count is None and microformat:
1806             view_count = int_or_none(microformat.get('viewCount'))
1807
1808         if is_live is None:
1809             is_live = bool_or_none(video_details.get('isLive'))
1810
1811         # Check for "rental" videos
1812         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1813             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1814
1815         def _extract_filesize(media_url):
1816             return int_or_none(self._search_regex(
1817                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1818
1819         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1820         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1821
1822         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1823             self.report_rtmp_download()
1824             formats = [{
1825                 'format_id': '_rtmp',
1826                 'protocol': 'rtmp',
1827                 'url': video_info['conn'][0],
1828                 'player_url': player_url,
1829             }]
1830         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1831             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1832             if 'rtmpe%3Dyes' in encoded_url_map:
1833                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1834             formats = []
1835             formats_spec = {}
1836             fmt_list = video_info.get('fmt_list', [''])[0]
1837             if fmt_list:
1838                 for fmt in fmt_list.split(','):
1839                     spec = fmt.split('/')
1840                     if len(spec) > 1:
1841                         width_height = spec[1].split('x')
1842                         if len(width_height) == 2:
1843                             formats_spec[spec[0]] = {
1844                                 'resolution': spec[1],
1845                                 'width': int_or_none(width_height[0]),
1846                                 'height': int_or_none(width_height[1]),
1847                             }
1848             for fmt in streaming_formats:
1849                 itag = str_or_none(fmt.get('itag'))
1850                 if not itag:
1851                     continue
1852                 quality = fmt.get('quality')
1853                 quality_label = fmt.get('qualityLabel') or quality
1854                 formats_spec[itag] = {
1855                     'asr': int_or_none(fmt.get('audioSampleRate')),
1856                     'filesize': int_or_none(fmt.get('contentLength')),
1857                     'format_note': quality_label,
1858                     'fps': int_or_none(fmt.get('fps')),
1859                     'height': int_or_none(fmt.get('height')),
1860                     # bitrate for itag 43 is always 2147483647
1861                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1862                     'width': int_or_none(fmt.get('width')),
1863                 }
1864
1865             for fmt in streaming_formats:
1866                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
1867                     continue
1868                 url = url_or_none(fmt.get('url'))
1869
1870                 if not url:
1871                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
1872                     if not cipher:
1873                         continue
1874                     url_data = compat_parse_qs(cipher)
1875                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1876                     if not url:
1877                         continue
1878                 else:
1879                     cipher = None
1880                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1881
1882                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1883                 # Unsupported FORMAT_STREAM_TYPE_OTF
1884                 if stream_type == 3:
1885                     continue
1886
1887                 format_id = fmt.get('itag') or url_data['itag'][0]
1888                 if not format_id:
1889                     continue
1890                 format_id = compat_str(format_id)
1891
1892                 if cipher:
1893                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1894                         ASSETS_RE = (
1895                             r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
1896                             r'"jsUrl"\s*:\s*("[^"]+")',
1897                             r'"assets":.+?"js":\s*("[^"]+")')
1898                         jsplayer_url_json = self._search_regex(
1899                             ASSETS_RE,
1900                             embed_webpage if age_gate else video_webpage,
1901                             'JS player URL (1)', default=None)
1902                         if not jsplayer_url_json and not age_gate:
1903                             # We need the embed website after all
1904                             if embed_webpage is None:
1905                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1906                                 embed_webpage = self._download_webpage(
1907                                     embed_url, video_id, 'Downloading embed webpage')
1908                             jsplayer_url_json = self._search_regex(
1909                                 ASSETS_RE, embed_webpage, 'JS player URL')
1910
1911                         player_url = json.loads(jsplayer_url_json)
1912                         if player_url is None:
1913                             player_url_json = self._search_regex(
1914                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1915                                 video_webpage, 'age gate player URL')
1916                             player_url = json.loads(player_url_json)
1917
1918                     if 'sig' in url_data:
1919                         url += '&signature=' + url_data['sig'][0]
1920                     elif 's' in url_data:
1921                         encrypted_sig = url_data['s'][0]
1922
1923                         if self._downloader.params.get('verbose'):
1924                             if player_url is None:
1925                                 player_desc = 'unknown'
1926                             else:
1927                                 player_type, player_version = self._extract_player_info(player_url)
1928                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
1929                             parts_sizes = self._signature_cache_id(encrypted_sig)
1930                             self.to_screen('{%s} signature length %s, %s' %
1931                                            (format_id, parts_sizes, player_desc))
1932
1933                         signature = self._decrypt_signature(
1934                             encrypted_sig, video_id, player_url, age_gate)
1935                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
1936                         url += '&%s=%s' % (sp, signature)
1937                 if 'ratebypass' not in url:
1938                     url += '&ratebypass=yes'
1939
1940                 dct = {
1941                     'format_id': format_id,
1942                     'url': url,
1943                     'player_url': player_url,
1944                 }
1945                 if format_id in self._formats:
1946                     dct.update(self._formats[format_id])
1947                 if format_id in formats_spec:
1948                     dct.update(formats_spec[format_id])
1949
1950                 # Some itags are not included in DASH manifest thus corresponding formats will
1951                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
1952                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1953                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1954                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1955
1956                 if width is None:
1957                     width = int_or_none(fmt.get('width'))
1958                 if height is None:
1959                     height = int_or_none(fmt.get('height'))
1960
1961                 filesize = int_or_none(url_data.get(
1962                     'clen', [None])[0]) or _extract_filesize(url)
1963
1964                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
1965                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
1966
1967                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
1968                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
1969                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
1970
1971                 more_fields = {
1972                     'filesize': filesize,
1973                     'tbr': tbr,
1974                     'width': width,
1975                     'height': height,
1976                     'fps': fps,
1977                     'format_note': quality_label or quality,
1978                 }
1979                 for key, value in more_fields.items():
1980                     if value:
1981                         dct[key] = value
1982                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
1983                 if type_:
1984                     type_split = type_.split(';')
1985                     kind_ext = type_split[0].split('/')
1986                     if len(kind_ext) == 2:
1987                         kind, _ = kind_ext
1988                         dct['ext'] = mimetype2ext(type_split[0])
1989                         if kind in ('audio', 'video'):
1990                             codecs = None
1991                             for mobj in re.finditer(
1992                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1993                                 if mobj.group('key') == 'codecs':
1994                                     codecs = mobj.group('val')
1995                                     break
1996                             if codecs:
1997                                 dct.update(parse_codecs(codecs))
1998                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1999                     dct['downloader_options'] = {
2000                         # Youtube throttles chunks >~10M
2001                         'http_chunk_size': 10485760,
2002                     }
2003                 formats.append(dct)
2004         else:
2005             manifest_url = (
2006                 url_or_none(try_get(
2007                     player_response,
2008                     lambda x: x['streamingData']['hlsManifestUrl'],
2009                     compat_str))
2010                 or url_or_none(try_get(
2011                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2012             if manifest_url:
2013                 formats = []
2014                 m3u8_formats = self._extract_m3u8_formats(
2015                     manifest_url, video_id, 'mp4', fatal=False)
2016                 for a_format in m3u8_formats:
2017                     itag = self._search_regex(
2018                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2019                     if itag:
2020                         a_format['format_id'] = itag
2021                         if itag in self._formats:
2022                             dct = self._formats[itag].copy()
2023                             dct.update(a_format)
2024                             a_format = dct
2025                     a_format['player_url'] = player_url
2026                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2027                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2028                     formats.append(a_format)
2029             else:
2030                 error_message = extract_unavailable_message()
2031                 if not error_message:
2032                     reason_list = try_get(
2033                         player_response,
2034                         lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2035                         list) or []
2036                     for reason in reason_list:
2037                         if not isinstance(reason, dict):
2038                             continue
2039                         reason_text = try_get(reason, lambda x: x['text'], compat_str)
2040                         if reason_text:
2041                             if not error_message:
2042                                 error_message = ''
2043                             error_message += reason_text
2044                     if error_message:
2045                         error_message = clean_html(error_message)
2046                 if not error_message:
2047                     error_message = clean_html(try_get(
2048                         player_response, lambda x: x['playabilityStatus']['reason'],
2049                         compat_str))
2050                 if not error_message:
2051                     error_message = clean_html(
2052                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2053                 if error_message:
2054                     raise ExtractorError(error_message, expected=True)
2055                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2056
2057         # uploader
2058         video_uploader = try_get(
2059             video_info, lambda x: x['author'][0],
2060             compat_str) or str_or_none(video_details.get('author'))
2061         if video_uploader:
2062             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2063         else:
2064             self._downloader.report_warning('unable to extract uploader name')
2065
2066         # uploader_id
2067         video_uploader_id = None
2068         video_uploader_url = None
2069         mobj = re.search(
2070             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2071             video_webpage)
2072         if mobj is not None:
2073             video_uploader_id = mobj.group('uploader_id')
2074             video_uploader_url = mobj.group('uploader_url')
2075         else:
2076             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2077             if owner_profile_url:
2078                 video_uploader_id = self._search_regex(
2079                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2080                     default=None)
2081                 video_uploader_url = owner_profile_url
2082
2083         channel_id = (
2084             str_or_none(video_details.get('channelId'))
2085             or self._html_search_meta(
2086                 'channelId', video_webpage, 'channel id', default=None)
2087             or self._search_regex(
2088                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2089                 video_webpage, 'channel id', default=None, group='id'))
2090         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2091
2092         thumbnails = []
2093         thumbnails_list = try_get(
2094             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2095         for t in thumbnails_list:
2096             if not isinstance(t, dict):
2097                 continue
2098             thumbnail_url = url_or_none(t.get('url'))
2099             if not thumbnail_url:
2100                 continue
2101             thumbnails.append({
2102                 'url': thumbnail_url,
2103                 'width': int_or_none(t.get('width')),
2104                 'height': int_or_none(t.get('height')),
2105             })
2106
2107         if not thumbnails:
2108             video_thumbnail = None
2109             # We try first to get a high quality image:
2110             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2111                                 video_webpage, re.DOTALL)
2112             if m_thumb is not None:
2113                 video_thumbnail = m_thumb.group(1)
2114             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2115             if thumbnail_url:
2116                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2117             if video_thumbnail:
2118                 thumbnails.append({'url': video_thumbnail})
2119
2120         # upload date
2121         upload_date = self._html_search_meta(
2122             'datePublished', video_webpage, 'upload date', default=None)
2123         if not upload_date:
2124             upload_date = self._search_regex(
2125                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2126                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2127                 video_webpage, 'upload date', default=None)
2128         if not upload_date:
2129             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2130         upload_date = unified_strdate(upload_date)
2131
2132         video_license = self._html_search_regex(
2133             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2134             video_webpage, 'license', default=None)
2135
2136         m_music = re.search(
2137             r'''(?x)
2138                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2139                 <ul[^>]*>\s*
2140                 <li>(?P<title>.+?)
2141                 by (?P<creator>.+?)
2142                 (?:
2143                     \(.+?\)|
2144                     <a[^>]*
2145                         (?:
2146                             \bhref=["\']/red[^>]*>|             # drop possible
2147                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2148                         )
2149                     .*?
2150                 )?</li
2151             ''',
2152             video_webpage)
2153         if m_music:
2154             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2155             video_creator = clean_html(m_music.group('creator'))
2156         else:
2157             video_alt_title = video_creator = None
2158
2159         def extract_meta(field):
2160             return self._html_search_regex(
2161                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2162                 video_webpage, field, default=None)
2163
2164         track = extract_meta('Song')
2165         artist = extract_meta('Artist')
2166         album = extract_meta('Album')
2167
2168         # Youtube Music Auto-generated description
2169         release_date = release_year = None
2170         if video_description:
2171             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2172             if mobj:
2173                 if not track:
2174                     track = mobj.group('track').strip()
2175                 if not artist:
2176                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2177                 if not album:
2178                     album = mobj.group('album'.strip())
2179                 release_year = mobj.group('release_year')
2180                 release_date = mobj.group('release_date')
2181                 if release_date:
2182                     release_date = release_date.replace('-', '')
2183                     if not release_year:
2184                         release_year = int(release_date[:4])
2185                 if release_year:
2186                     release_year = int(release_year)
2187
2188         yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
2189         contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2190         for content in contents:
2191             rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
2192             multiple_songs = False
2193             for row in rows:
2194                 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2195                     multiple_songs = True
2196                     break
2197             for row in rows:
2198                 mrr = row.get('metadataRowRenderer') or {}
2199                 mrr_title = try_get(
2200                     mrr, lambda x: x['title']['simpleText'], compat_str)
2201                 mrr_contents = try_get(
2202                     mrr, lambda x: x['contents'][0], dict) or {}
2203                 mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
2204                 if not (mrr_title and mrr_contents_text):
2205                     continue
2206                 if mrr_title == 'License':
2207                     video_license = mrr_contents_text
2208                 elif not multiple_songs:
2209                     if mrr_title == 'Album':
2210                         album = mrr_contents_text
2211                     elif mrr_title == 'Artist':
2212                         artist = mrr_contents_text
2213                     elif mrr_title == 'Song':
2214                         track = mrr_contents_text
2215
2216         m_episode = re.search(
2217             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2218             video_webpage)
2219         if m_episode:
2220             series = unescapeHTML(m_episode.group('series'))
2221             season_number = int(m_episode.group('season'))
2222             episode_number = int(m_episode.group('episode'))
2223         else:
2224             series = season_number = episode_number = None
2225
2226         m_cat_container = self._search_regex(
2227             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2228             video_webpage, 'categories', default=None)
2229         category = None
2230         if m_cat_container:
2231             category = self._html_search_regex(
2232                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2233                 default=None)
2234         if not category:
2235             category = try_get(
2236                 microformat, lambda x: x['category'], compat_str)
2237         video_categories = None if category is None else [category]
2238
2239         video_tags = [
2240             unescapeHTML(m.group('content'))
2241             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2242         if not video_tags:
2243             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2244
2245         def _extract_count(count_name):
2246             return str_to_int(self._search_regex(
2247                 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2248                  r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
2249                 video_webpage, count_name, default=None))
2250
2251         like_count = _extract_count('like')
2252         dislike_count = _extract_count('dislike')
2253
2254         if view_count is None:
2255             view_count = str_to_int(self._search_regex(
2256                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2257                 'view count', default=None))
2258
2259         average_rating = (
2260             float_or_none(video_details.get('averageRating'))
2261             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2262
2263         # subtitles
2264         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2265         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2266
2267         video_duration = try_get(
2268             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2269         if not video_duration:
2270             video_duration = int_or_none(video_details.get('lengthSeconds'))
2271         if not video_duration:
2272             video_duration = parse_duration(self._html_search_meta(
2273                 'duration', video_webpage, 'video duration'))
2274
2275         # annotations
2276         video_annotations = None
2277         if self._downloader.params.get('writeannotations', False):
2278             xsrf_token = self._search_regex(
2279                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2280                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2281             invideo_url = try_get(
2282                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2283             if xsrf_token and invideo_url:
2284                 xsrf_field_name = self._search_regex(
2285                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2286                     video_webpage, 'xsrf field name',
2287                     group='xsrf_field_name', default='session_token')
2288                 video_annotations = self._download_webpage(
2289                     self._proto_relative_url(invideo_url),
2290                     video_id, note='Downloading annotations',
2291                     errnote='Unable to download video annotations', fatal=False,
2292                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2293
2294         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2295
2296         # Look for the DASH manifest
2297         if self._downloader.params.get('youtube_include_dash_manifest', True):
2298             dash_mpd_fatal = True
2299             for mpd_url in dash_mpds:
2300                 dash_formats = {}
2301                 try:
2302                     def decrypt_sig(mobj):
2303                         s = mobj.group(1)
2304                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2305                         return '/signature/%s' % dec_s
2306
2307                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2308
2309                     for df in self._extract_mpd_formats(
2310                             mpd_url, video_id, fatal=dash_mpd_fatal,
2311                             formats_dict=self._formats):
2312                         if not df.get('filesize'):
2313                             df['filesize'] = _extract_filesize(df['url'])
2314                         # Do not overwrite DASH format found in some previous DASH manifest
2315                         if df['format_id'] not in dash_formats:
2316                             dash_formats[df['format_id']] = df
2317                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2318                         # allow them to fail without bug report message if we already have
2319                         # some DASH manifest succeeded. This is temporary workaround to reduce
2320                         # burst of bug reports until we figure out the reason and whether it
2321                         # can be fixed at all.
2322                         dash_mpd_fatal = False
2323                 except (ExtractorError, KeyError) as e:
2324                     self.report_warning(
2325                         'Skipping DASH manifest: %r' % e, video_id)
2326                 if dash_formats:
2327                     # Remove the formats we found through non-DASH, they
2328                     # contain less info and it can be wrong, because we use
2329                     # fixed values (for example the resolution). See
2330                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2331                     # example.
2332                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2333                     formats.extend(dash_formats.values())
2334
2335         # Check for malformed aspect ratio
2336         stretched_m = re.search(
2337             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2338             video_webpage)
2339         if stretched_m:
2340             w = float(stretched_m.group('w'))
2341             h = float(stretched_m.group('h'))
2342             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2343             # We will only process correct ratios.
2344             if w > 0 and h > 0:
2345                 ratio = w / h
2346                 for f in formats:
2347                     if f.get('vcodec') != 'none':
2348                         f['stretched_ratio'] = ratio
2349
2350         if not formats:
2351             if 'reason' in video_info:
2352                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2353                     regions_allowed = self._html_search_meta(
2354                         'regionsAllowed', video_webpage, default=None)
2355                     countries = regions_allowed.split(',') if regions_allowed else None
2356                     self.raise_geo_restricted(
2357                         msg=video_info['reason'][0], countries=countries)
2358                 reason = video_info['reason'][0]
2359                 if 'Invalid parameters' in reason:
2360                     unavailable_message = extract_unavailable_message()
2361                     if unavailable_message:
2362                         reason = unavailable_message
2363                 raise ExtractorError(
2364                     'YouTube said: %s' % reason,
2365                     expected=True, video_id=video_id)
2366             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2367                 raise ExtractorError('This video is DRM protected.', expected=True)
2368
2369         self._sort_formats(formats)
2370
2371         self.mark_watched(video_id, video_info, player_response)
2372
2373         return {
2374             'id': video_id,
2375             'uploader': video_uploader,
2376             'uploader_id': video_uploader_id,
2377             'uploader_url': video_uploader_url,
2378             'channel_id': channel_id,
2379             'channel_url': channel_url,
2380             'upload_date': upload_date,
2381             'license': video_license,
2382             'creator': video_creator or artist,
2383             'title': video_title,
2384             'alt_title': video_alt_title or track,
2385             'thumbnails': thumbnails,
2386             'description': video_description,
2387             'categories': video_categories,
2388             'tags': video_tags,
2389             'subtitles': video_subtitles,
2390             'automatic_captions': automatic_captions,
2391             'duration': video_duration,
2392             'age_limit': 18 if age_gate else 0,
2393             'annotations': video_annotations,
2394             'chapters': chapters,
2395             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2396             'view_count': view_count,
2397             'like_count': like_count,
2398             'dislike_count': dislike_count,
2399             'average_rating': average_rating,
2400             'formats': formats,
2401             'is_live': is_live,
2402             'start_time': start_time,
2403             'end_time': end_time,
2404             'series': series,
2405             'season_number': season_number,
2406             'episode_number': episode_number,
2407             'track': track,
2408             'artist': artist,
2409             'album': album,
2410             'release_date': release_date,
2411             'release_year': release_year,
2412         }
2413
2414
2415 class YoutubeTabIE(YoutubeBaseInfoExtractor):
2416     IE_DESC = 'YouTube.com tab'
2417     _VALID_URL = r'''(?x)
2418                     https?://
2419                         (?:\w+\.)?
2420                         (?:
2421                             youtube(?:kids)?\.com|
2422                             invidio\.us
2423                         )/
2424                         (?:
2425                             (?:channel|c|user|feed)/|
2426                             (?:playlist|watch)\?.*?\blist=
2427                         )
2428                         (?P<id>[^/?\#&]+)
2429                     '''
2430     IE_NAME = 'youtube:tab'
2431
2432     _TESTS = [{
2433         # playlists, multipage
2434         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2435         'playlist_mincount': 94,
2436         'info_dict': {
2437             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2438             'title': 'Игорь Клейнер - Playlists',
2439             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2440         },
2441     }, {
2442         # playlists, multipage, different order
2443         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2444         'playlist_mincount': 94,
2445         'info_dict': {
2446             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2447             'title': 'Игорь Клейнер - Playlists',
2448             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2449         },
2450     }, {
2451         # playlists, singlepage
2452         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2453         'playlist_mincount': 4,
2454         'info_dict': {
2455             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2456             'title': 'ThirstForScience - Playlists',
2457             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2458         }
2459     }, {
2460         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2461         'only_matching': True,
2462     }, {
2463         # basic, single video playlist
2464         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2465         'info_dict': {
2466             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2467             'uploader': 'Sergey M.',
2468             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2469             'title': 'youtube-dl public playlist',
2470         },
2471         'playlist_count': 1,
2472     }, {
2473         # empty playlist
2474         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2475         'info_dict': {
2476             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2477             'uploader': 'Sergey M.',
2478             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2479             'title': 'youtube-dl empty playlist',
2480         },
2481         'playlist_count': 0,
2482     }, {
2483         # Home tab
2484         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
2485         'info_dict': {
2486             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2487             'title': 'lex will - Home',
2488             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2489         },
2490         'playlist_mincount': 2,
2491     }, {
2492         # Videos tab
2493         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
2494         'info_dict': {
2495             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2496             'title': 'lex will - Videos',
2497             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2498         },
2499         'playlist_mincount': 975,
2500     }, {
2501         # Videos tab, sorted by popular
2502         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
2503         'info_dict': {
2504             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2505             'title': 'lex will - Videos',
2506             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2507         },
2508         'playlist_mincount': 199,
2509     }, {
2510         # Playlists tab
2511         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
2512         'info_dict': {
2513             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2514             'title': 'lex will - Playlists',
2515             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2516         },
2517         'playlist_mincount': 17,
2518     }, {
2519         # Community tab
2520         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
2521         'info_dict': {
2522             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2523             'title': 'lex will - Community',
2524             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2525         },
2526         'playlist_mincount': 18,
2527     }, {
2528         # Channels tab
2529         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
2530         'info_dict': {
2531             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2532             'title': 'lex will - Channels',
2533             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2534         },
2535         'playlist_mincount': 138,
2536     }, {
2537         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2538         'only_matching': True,
2539     }, {
2540         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2541         'only_matching': True,
2542     }, {
2543         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2544         'only_matching': True,
2545     }, {
2546         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2547         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2548         'info_dict': {
2549             'title': '29C3: Not my department',
2550             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2551             'uploader': 'Christiaan008',
2552             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2553         },
2554         'playlist_count': 96,
2555     }, {
2556         'note': 'Large playlist',
2557         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2558         'info_dict': {
2559             'title': 'Uploads from Cauchemar',
2560             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2561             'uploader': 'Cauchemar',
2562             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
2563         },
2564         'playlist_mincount': 1123,
2565     }, {
2566         # even larger playlist, 8832 videos
2567         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2568         'only_matching': True,
2569     }, {
2570         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2571         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2572         'info_dict': {
2573             'title': 'Uploads from Interstellar Movie',
2574             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2575             'uploader': 'Interstellar Movie',
2576             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
2577         },
2578         'playlist_mincount': 21,
2579     }, {
2580         # https://github.com/ytdl-org/youtube-dl/issues/21844
2581         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2582         'info_dict': {
2583             'title': 'Data Analysis with Dr Mike Pound',
2584             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2585             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2586             'uploader': 'Computerphile',
2587         },
2588         'playlist_mincount': 11,
2589     }, {
2590         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2591         'only_matching': True,
2592     }, {
2593         # Playlist URL that does not actually serve a playlist
2594         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2595         'info_dict': {
2596             'id': 'FqZTN594JQw',
2597             'ext': 'webm',
2598             'title': "Smiley's People 01 detective, Adventure Series, Action",
2599             'uploader': 'STREEM',
2600             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2601             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2602             'upload_date': '20150526',
2603             'license': 'Standard YouTube License',
2604             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2605             'categories': ['People & Blogs'],
2606             'tags': list,
2607             'view_count': int,
2608             'like_count': int,
2609             'dislike_count': int,
2610         },
2611         'params': {
2612             'skip_download': True,
2613         },
2614         'skip': 'This video is not available.',
2615         'add_ie': [YoutubeIE.ie_key()],
2616     }, {
2617         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2618         'only_matching': True,
2619     }, {
2620         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
2621         'only_matching': True,
2622     }, {
2623         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2624         'info_dict': {
2625             'id': '9Auq9mYxFEE',
2626             'ext': 'mp4',
2627             'title': 'Watch Sky News live',
2628             'uploader': 'Sky News',
2629             'uploader_id': 'skynews',
2630             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2631             'upload_date': '20191102',
2632             'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2633             'categories': ['News & Politics'],
2634             'tags': list,
2635             'like_count': int,
2636             'dislike_count': int,
2637         },
2638         'params': {
2639             'skip_download': True,
2640         },
2641     }, {
2642         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2643         'info_dict': {
2644             'id': 'a48o2S1cPoo',
2645             'ext': 'mp4',
2646             'title': 'The Young Turks - Live Main Show',
2647             'uploader': 'The Young Turks',
2648             'uploader_id': 'TheYoungTurks',
2649             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2650             'upload_date': '20150715',
2651             'license': 'Standard YouTube License',
2652             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2653             'categories': ['News & Politics'],
2654             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2655             'like_count': int,
2656             'dislike_count': int,
2657         },
2658         'params': {
2659             'skip_download': True,
2660         },
2661         'only_matching': True,
2662     }, {
2663         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2664         'only_matching': True,
2665     }, {
2666         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2667         'only_matching': True,
2668     }, {
2669         'url': 'https://www.youtube.com/feed/trending',
2670         'only_matching': True,
2671     }, {
2672         # needs auth
2673         'url': 'https://www.youtube.com/feed/library',
2674         'only_matching': True,
2675     }, {
2676         # needs auth
2677         'url': 'https://www.youtube.com/feed/history',
2678         'only_matching': True,
2679     }, {
2680         # needs auth
2681         'url': 'https://www.youtube.com/feed/subscriptions',
2682         'only_matching': True,
2683     }, {
2684         # needs auth
2685         'url': 'https://www.youtube.com/feed/watch_later',
2686         'only_matching': True,
2687     }, {
2688         # no longer available?
2689         'url': 'https://www.youtube.com/feed/recommended',
2690         'only_matching': True,
2691     }, {
2692         # inline playlist with not always working continuations
2693         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2694         'only_matching': True,
2695     }
2696         # TODO
2697         # {
2698         #     'url': 'https://www.youtube.com/TheYoungTurks/live',
2699         #     'only_matching': True,
2700         # }
2701     ]
2702
2703     def _extract_channel_id(self, webpage):
2704         channel_id = self._html_search_meta(
2705             'channelId', webpage, 'channel id', default=None)
2706         if channel_id:
2707             return channel_id
2708         channel_url = self._html_search_meta(
2709             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2710              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2711              'twitter:app:url:googleplay'), webpage, 'channel url')
2712         return self._search_regex(
2713             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2714             channel_url, 'channel id')
2715
2716     @staticmethod
2717     def _extract_grid_item_renderer(item):
2718         for item_kind in ('Playlist', 'Video', 'Channel'):
2719             renderer = item.get('grid%sRenderer' % item_kind)
2720             if renderer:
2721                 return renderer
2722
2723     def _extract_video(self, renderer):
2724         video_id = renderer.get('videoId')
2725         title = try_get(
2726             renderer,
2727             (lambda x: x['title']['runs'][0]['text'],
2728              lambda x: x['title']['simpleText']), compat_str)
2729         description = try_get(
2730             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
2731             compat_str)
2732         duration = parse_duration(try_get(
2733             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
2734         view_count_text = try_get(
2735             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
2736         view_count = str_to_int(self._search_regex(
2737             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
2738             'view count', default=None))
2739         uploader = try_get(
2740             renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
2741         return {
2742             '_type': 'url_transparent',
2743             'ie_key': YoutubeIE.ie_key(),
2744             'id': video_id,
2745             'url': video_id,
2746             'title': title,
2747             'description': description,
2748             'duration': duration,
2749             'view_count': view_count,
2750             'uploader': uploader,
2751         }
2752
2753     def _grid_entries(self, grid_renderer):
2754         for item in grid_renderer['items']:
2755             if not isinstance(item, dict):
2756                 continue
2757             renderer = self._extract_grid_item_renderer(item)
2758             if not isinstance(renderer, dict):
2759                 continue
2760             title = try_get(
2761                 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2762             # playlist
2763             playlist_id = renderer.get('playlistId')
2764             if playlist_id:
2765                 yield self.url_result(
2766                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
2767                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2768                     video_title=title)
2769             # video
2770             video_id = renderer.get('videoId')
2771             if video_id:
2772                 yield self._extract_video(renderer)
2773             # channel
2774             channel_id = renderer.get('channelId')
2775             if channel_id:
2776                 title = try_get(
2777                     renderer, lambda x: x['title']['simpleText'], compat_str)
2778                 yield self.url_result(
2779                     'https://www.youtube.com/channel/%s' % channel_id,
2780                     ie=YoutubeTabIE.ie_key(), video_title=title)
2781
2782     def _shelf_entries_from_content(self, shelf_renderer):
2783         content = shelf_renderer.get('content')
2784         if not isinstance(content, dict):
2785             return
2786         renderer = content.get('gridRenderer')
2787         if renderer:
2788             # TODO: add support for nested playlists so each shelf is processed
2789             # as separate playlist
2790             # TODO: this includes only first N items
2791             for entry in self._grid_entries(renderer):
2792                 yield entry
2793         renderer = content.get('horizontalListRenderer')
2794         if renderer:
2795             # TODO
2796             pass
2797
2798     def _shelf_entries(self, shelf_renderer, skip_channels=False):
2799         ep = try_get(
2800             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2801             compat_str)
2802         shelf_url = urljoin('https://www.youtube.com', ep)
2803         if shelf_url:
2804             # Skipping links to another channels, note that checking for
2805             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2806             # will not work
2807             if skip_channels and '/channels?' in shelf_url:
2808                 return
2809             title = try_get(
2810                 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2811             yield self.url_result(shelf_url, video_title=title)
2812         # Shelf may not contain shelf URL, fallback to extraction from content
2813         for entry in self._shelf_entries_from_content(shelf_renderer):
2814             yield entry
2815
2816     def _playlist_entries(self, video_list_renderer):
2817         for content in video_list_renderer['contents']:
2818             if not isinstance(content, dict):
2819                 continue
2820             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2821             if not isinstance(renderer, dict):
2822                 continue
2823             video_id = renderer.get('videoId')
2824             if not video_id:
2825                 continue
2826             yield self._extract_video(renderer)
2827
2828     def _video_entry(self, video_renderer):
2829         video_id = video_renderer.get('videoId')
2830         if video_id:
2831             return self._extract_video(video_renderer)
2832
2833     def _post_thread_entries(self, post_thread_renderer):
2834         post_renderer = try_get(
2835             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2836         if not post_renderer:
2837             return
2838         # video attachment
2839         video_renderer = try_get(
2840             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2841         video_id = None
2842         if video_renderer:
2843             entry = self._video_entry(video_renderer)
2844             if entry:
2845                 yield entry
2846         # inline video links
2847         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2848         for run in runs:
2849             if not isinstance(run, dict):
2850                 continue
2851             ep_url = try_get(
2852                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2853             if not ep_url:
2854                 continue
2855             if not YoutubeIE.suitable(ep_url):
2856                 continue
2857             ep_video_id = YoutubeIE._match_id(ep_url)
2858             if video_id == ep_video_id:
2859                 continue
2860             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
2861
2862     def _post_thread_continuation_entries(self, post_thread_continuation):
2863         contents = post_thread_continuation.get('contents')
2864         if not isinstance(contents, list):
2865             return
2866         for content in contents:
2867             renderer = content.get('backstagePostThreadRenderer')
2868             if not isinstance(renderer, dict):
2869                 continue
2870             for entry in self._post_thread_entries(renderer):
2871                 yield entry
2872
2873     @staticmethod
2874     def _build_continuation_query(continuation, ctp=None):
2875         query = {
2876             'ctoken': continuation,
2877             'continuation': continuation,
2878         }
2879         if ctp:
2880             query['itct'] = ctp
2881         return query
2882
2883     @staticmethod
2884     def _extract_next_continuation_data(renderer):
2885         next_continuation = try_get(
2886             renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2887         if not next_continuation:
2888             return
2889         continuation = next_continuation.get('continuation')
2890         if not continuation:
2891             return
2892         ctp = next_continuation.get('clickTrackingParams')
2893         return YoutubeTabIE._build_continuation_query(continuation, ctp)
2894
2895     @classmethod
2896     def _extract_continuation(cls, renderer):
2897         next_continuation = cls._extract_next_continuation_data(renderer)
2898         if next_continuation:
2899             return next_continuation
2900         contents = renderer.get('contents')
2901         if not isinstance(contents, list):
2902             return
2903         for content in contents:
2904             if not isinstance(content, dict):
2905                 continue
2906             continuation_ep = try_get(
2907                 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2908                 dict)
2909             if not continuation_ep:
2910                 continue
2911             continuation = try_get(
2912                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2913             if not continuation:
2914                 continue
2915             ctp = continuation_ep.get('clickTrackingParams')
2916             return YoutubeTabIE._build_continuation_query(continuation, ctp)
2917
2918     def _entries(self, tab, identity_token):
2919         tab_content = try_get(tab, lambda x: x['content'], dict)
2920         if not tab_content:
2921             return
2922         slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2923         if not slr_renderer:
2924             return
2925         is_channels_tab = tab.get('title') == 'Channels'
2926         continuation = None
2927         slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
2928         for slr_content in slr_contents:
2929             if not isinstance(slr_content, dict):
2930                 continue
2931             is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
2932             if not is_renderer:
2933                 continue
2934             isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2935             for isr_content in isr_contents:
2936                 if not isinstance(isr_content, dict):
2937                     continue
2938                 renderer = isr_content.get('playlistVideoListRenderer')
2939                 if renderer:
2940                     for entry in self._playlist_entries(renderer):
2941                         yield entry
2942                     continuation = self._extract_continuation(renderer)
2943                     continue
2944                 renderer = isr_content.get('gridRenderer')
2945                 if renderer:
2946                     for entry in self._grid_entries(renderer):
2947                         yield entry
2948                     continuation = self._extract_continuation(renderer)
2949                     continue
2950                 renderer = isr_content.get('shelfRenderer')
2951                 if renderer:
2952                     for entry in self._shelf_entries(renderer, not is_channels_tab):
2953                         yield entry
2954                     continue
2955                 renderer = isr_content.get('backstagePostThreadRenderer')
2956                 if renderer:
2957                     for entry in self._post_thread_entries(renderer):
2958                         yield entry
2959                     continuation = self._extract_continuation(renderer)
2960                     continue
2961                 renderer = isr_content.get('videoRenderer')
2962                 if renderer:
2963                     entry = self._video_entry(renderer)
2964                     if entry:
2965                         yield entry
2966
2967             if not continuation:
2968                 continuation = self._extract_continuation(is_renderer)
2969
2970         if not continuation:
2971             continuation = self._extract_continuation(slr_renderer)
2972
2973         headers = {
2974             'x-youtube-client-name': '1',
2975             'x-youtube-client-version': '2.20201112.04.01',
2976         }
2977         if identity_token:
2978             headers['x-youtube-identity-token'] = identity_token
2979
2980         for page_num in itertools.count(1):
2981             if not continuation:
2982                 break
2983             browse = self._download_json(
2984                 'https://www.youtube.com/browse_ajax', None,
2985                 'Downloading page %d' % page_num,
2986                 headers=headers, query=continuation, fatal=False)
2987             if not browse:
2988                 break
2989             response = try_get(browse, lambda x: x[1]['response'], dict)
2990             if not response:
2991                 break
2992
2993             continuation_contents = try_get(
2994                 response, lambda x: x['continuationContents'], dict)
2995             if continuation_contents:
2996                 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
2997                 if continuation_renderer:
2998                     for entry in self._playlist_entries(continuation_renderer):
2999                         yield entry
3000                     continuation = self._extract_continuation(continuation_renderer)
3001                     continue
3002                 continuation_renderer = continuation_contents.get('gridContinuation')
3003                 if continuation_renderer:
3004                     for entry in self._grid_entries(continuation_renderer):
3005                         yield entry
3006                     continuation = self._extract_continuation(continuation_renderer)
3007                     continue
3008                 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3009                 if continuation_renderer:
3010                     for entry in self._post_thread_continuation_entries(continuation_renderer):
3011                         yield entry
3012                     continuation = self._extract_continuation(continuation_renderer)
3013                     continue
3014
3015             continuation_items = try_get(
3016                 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3017             if continuation_items:
3018                 continuation_item = continuation_items[0]
3019                 if not isinstance(continuation_item, dict):
3020                     continue
3021                 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
3022                 if renderer:
3023                     video_list_renderer = {'contents': continuation_items}
3024                     for entry in self._playlist_entries(video_list_renderer):
3025                         yield entry
3026                     continuation = self._extract_continuation(video_list_renderer)
3027                     continue
3028
3029             break
3030
3031     @staticmethod
3032     def _extract_selected_tab(tabs):
3033         for tab in tabs:
3034             if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3035                 return tab['tabRenderer']
3036         else:
3037             raise ExtractorError('Unable to find selected tab')
3038
3039     @staticmethod
3040     def _extract_uploader(data):
3041         uploader = {}
3042         sidebar_renderer = try_get(
3043             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3044         if sidebar_renderer:
3045             for item in sidebar_renderer:
3046                 if not isinstance(item, dict):
3047                     continue
3048                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3049                 if not isinstance(renderer, dict):
3050                     continue
3051                 owner = try_get(
3052                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3053                 if owner:
3054                     uploader['uploader'] = owner.get('text')
3055                     uploader['uploader_id'] = try_get(
3056                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3057                     uploader['uploader_url'] = urljoin(
3058                         'https://www.youtube.com/',
3059                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3060         return uploader
3061
3062     @staticmethod
3063     def _extract_alert(data):
3064         alerts = []
3065         for alert in try_get(data, lambda x: x['alerts'], list) or []:
3066             if not isinstance(alert, dict):
3067                 continue
3068             alert_text = try_get(
3069                 alert, lambda x: x['alertRenderer']['text'], dict)
3070             if not alert_text:
3071                 continue
3072             text = try_get(
3073                 alert_text,
3074                 (lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
3075                 compat_str)
3076             if text:
3077                 alerts.append(text)
3078         return '\n'.join(alerts)
3079
3080     def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3081         selected_tab = self._extract_selected_tab(tabs)
3082         renderer = try_get(
3083             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3084         playlist_id = title = description = None
3085         if renderer:
3086             channel_title = renderer.get('title') or item_id
3087             tab_title = selected_tab.get('title')
3088             title = channel_title or item_id
3089             if tab_title:
3090                 title += ' - %s' % tab_title
3091             description = renderer.get('description')
3092             playlist_id = renderer.get('externalId')
3093         renderer = try_get(
3094             data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3095         if renderer:
3096             title = renderer.get('title')
3097             description = None
3098             playlist_id = item_id
3099         playlist = self.playlist_result(
3100             self._entries(selected_tab, identity_token),
3101             playlist_id=playlist_id, playlist_title=title,
3102             playlist_description=description)
3103         playlist.update(self._extract_uploader(data))
3104         return playlist
3105
3106     def _extract_from_playlist(self, item_id, url, data, playlist):
3107         title = playlist.get('title') or try_get(
3108             data, lambda x: x['titleText']['simpleText'], compat_str)
3109         playlist_id = playlist.get('playlistId') or item_id
3110         # Inline playlist rendition continuation does not always work
3111         # at Youtube side, so delegating regular tab-based playlist URL
3112         # processing whenever possible.
3113         playlist_url = urljoin(url, try_get(
3114             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3115             compat_str))
3116         if playlist_url and playlist_url != url:
3117             return self.url_result(
3118                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3119                 video_title=title)
3120         return self.playlist_result(
3121             self._playlist_entries(playlist), playlist_id=playlist_id,
3122             playlist_title=title)
3123
3124     def _real_extract(self, url):
3125         item_id = self._match_id(url)
3126         url = compat_urlparse.urlunparse(
3127             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3128         # Handle both video/playlist URLs
3129         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3130         video_id = qs.get('v', [None])[0]
3131         playlist_id = qs.get('list', [None])[0]
3132         if video_id and playlist_id:
3133             if self._downloader.params.get('noplaylist'):
3134                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3135                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3136             self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
3137         webpage = self._download_webpage(url, item_id)
3138         identity_token = self._search_regex(
3139             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3140             'identity token', default=None)
3141         data = self._extract_yt_initial_data(item_id, webpage)
3142         tabs = try_get(
3143             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3144         if tabs:
3145             return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3146         playlist = try_get(
3147             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3148         if playlist:
3149             return self._extract_from_playlist(item_id, url, data, playlist)
3150         # Fallback to video extraction if no playlist alike page is recognized.
3151         # First check for the current video then try the v attribute of URL query.
3152         video_id = try_get(
3153             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3154             compat_str) or video_id
3155         if video_id:
3156             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3157         # Capture and output alerts
3158         alert = self._extract_alert(data)
3159         if alert:
3160             raise ExtractorError(alert, expected=True)
3161         # Failed to recognize
3162         raise ExtractorError('Unable to recognize tab page')
3163
3164
3165 class YoutubePlaylistIE(InfoExtractor):
3166     IE_DESC = 'YouTube.com playlists'
3167     _VALID_URL = r'''(?x)(?:
3168                         (?:https?://)?
3169                         (?:\w+\.)?
3170                         (?:
3171                             (?:
3172                                 youtube(?:kids)?\.com|
3173                                 invidio\.us
3174                             )
3175                             /.*?\?.*?\blist=
3176                         )?
3177                         (?P<id>%(playlist_id)s)
3178                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3179     IE_NAME = 'youtube:playlist'
3180     _TESTS = [{
3181         'note': 'issue #673',
3182         'url': 'PLBB231211A4F62143',
3183         'info_dict': {
3184             'title': '[OLD]Team Fortress 2 (Class-based LP)',
3185             'id': 'PLBB231211A4F62143',
3186             'uploader': 'Wickydoo',
3187             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3188         },
3189         'playlist_mincount': 29,
3190     }, {
3191         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3192         'info_dict': {
3193             'title': 'YDL_safe_search',
3194             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3195         },
3196         'playlist_count': 2,
3197         'skip': 'This playlist is private',
3198     }, {
3199         'note': 'embedded',
3200         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3201         'playlist_count': 4,
3202         'info_dict': {
3203             'title': 'JODA15',
3204             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3205             'uploader': 'milan',
3206             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
3207         }
3208     }, {
3209         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3210         'playlist_mincount': 982,
3211         'info_dict': {
3212             'title': '2018 Chinese New Singles (11/6 updated)',
3213             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3214             'uploader': 'LBK',
3215             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3216         }
3217     }, {
3218         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3219         'only_matching': True,
3220     }, {
3221         # music album playlist
3222         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3223         'only_matching': True,
3224     }]
3225
3226     @classmethod
3227     def suitable(cls, url):
3228         return False if YoutubeTabIE.suitable(url) else super(
3229             YoutubePlaylistIE, cls).suitable(url)
3230
3231     def _real_extract(self, url):
3232         playlist_id = self._match_id(url)
3233         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3234         if not qs:
3235             qs = {'list': playlist_id}
3236         return self.url_result(
3237             update_url_query('https://www.youtube.com/playlist', qs),
3238             ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3239
3240
3241 class YoutubeYtBeIE(InfoExtractor):
3242     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3243     _TESTS = [{
3244         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3245         'info_dict': {
3246             'id': 'yeWKywCrFtk',
3247             'ext': 'mp4',
3248             'title': 'Small Scale Baler and Braiding Rugs',
3249             'uploader': 'Backus-Page House Museum',
3250             'uploader_id': 'backuspagemuseum',
3251             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3252             'upload_date': '20161008',
3253             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3254             'categories': ['Nonprofits & Activism'],
3255             'tags': list,
3256             'like_count': int,
3257             'dislike_count': int,
3258         },
3259         'params': {
3260             'noplaylist': True,
3261             'skip_download': True,
3262         },
3263     }, {
3264         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
3265         'only_matching': True,
3266     }]
3267
3268     def _real_extract(self, url):
3269         mobj = re.match(self._VALID_URL, url)
3270         video_id = mobj.group('id')
3271         playlist_id = mobj.group('playlist_id')
3272         return self.url_result(
3273             update_url_query('https://www.youtube.com/watch', {
3274                 'v': video_id,
3275                 'list': playlist_id,
3276                 'feature': 'youtu.be',
3277             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3278
3279
3280 class YoutubeYtUserIE(InfoExtractor):
3281     _VALID_URL = r'ytuser:(?P<id>.+)'
3282     _TESTS = [{
3283         'url': 'ytuser:phihag',
3284         'only_matching': True,
3285     }]
3286
3287     def _real_extract(self, url):
3288         user_id = self._match_id(url)
3289         return self.url_result(
3290             'https://www.youtube.com/user/%s' % user_id,
3291             ie=YoutubeTabIE.ie_key(), video_id=user_id)
3292
3293
3294 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3295     IE_NAME = 'youtube:favorites'
3296     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3297     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3298     _LOGIN_REQUIRED = True
3299     _TESTS = [{
3300         'url': ':ytfav',
3301         'only_matching': True,
3302     }, {
3303         'url': ':ytfavorites',
3304         'only_matching': True,
3305     }]
3306
3307     def _real_extract(self, url):
3308         return self.url_result(
3309             'https://www.youtube.com/playlist?list=LL',
3310             ie=YoutubeTabIE.ie_key())
3311
3312
3313 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
3314     IE_DESC = 'YouTube.com searches'
3315     # there doesn't appear to be a real limit, for example if you search for
3316     # 'python' you get more than 8.000.000 results
3317     _MAX_RESULTS = float('inf')
3318     IE_NAME = 'youtube:search'
3319     _SEARCH_KEY = 'ytsearch'
3320     _SEARCH_PARAMS = None
3321     _TESTS = []
3322
3323     def _entries(self, query, n):
3324         data = {
3325             'context': {
3326                 'client': {
3327                     'clientName': 'WEB',
3328                     'clientVersion': '2.20201021.03.00',
3329                 }
3330             },
3331             'query': query,
3332         }
3333         if self._SEARCH_PARAMS:
3334             data['params'] = self._SEARCH_PARAMS
3335         total = 0
3336         for page_num in itertools.count(1):
3337             search = self._download_json(
3338                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3339                 video_id='query "%s"' % query,
3340                 note='Downloading page %s' % page_num,
3341                 errnote='Unable to download API page', fatal=False,
3342                 data=json.dumps(data).encode('utf8'),
3343                 headers={'content-type': 'application/json'})
3344             if not search:
3345                 break
3346             slr_contents = try_get(
3347                 search,
3348                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3349                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3350                 list)
3351             if not slr_contents:
3352                 break
3353             isr_contents = try_get(
3354                 slr_contents,
3355                 lambda x: x[0]['itemSectionRenderer']['contents'],
3356                 list)
3357             if not isr_contents:
3358                 break
3359             for content in isr_contents:
3360                 if not isinstance(content, dict):
3361                     continue
3362                 video = content.get('videoRenderer')
3363                 if not isinstance(video, dict):
3364                     continue
3365                 video_id = video.get('videoId')
3366                 if not video_id:
3367                     continue
3368                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3369                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3370                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3371                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3372                 view_count = int_or_none(self._search_regex(
3373                     r'^(\d+)', re.sub(r'\s', '', view_count_text),
3374                     'view count', default=None))
3375                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3376                 total += 1
3377                 yield {
3378                     '_type': 'url_transparent',
3379                     'ie_key': YoutubeIE.ie_key(),
3380                     'id': video_id,
3381                     'url': video_id,
3382                     'title': title,
3383                     'description': description,
3384                     'duration': duration,
3385                     'view_count': view_count,
3386                     'uploader': uploader,
3387                 }
3388                 if total == n:
3389                     return
3390             token = try_get(
3391                 slr_contents,
3392                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3393                 compat_str)
3394             if not token:
3395                 break
3396             data['continuation'] = token
3397
3398     def _get_n_results(self, query, n):
3399         """Get a specified number of results for a query"""
3400         return self.playlist_result(self._entries(query, n), query)
3401
3402
3403 class YoutubeSearchDateIE(YoutubeSearchIE):
3404     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3405     _SEARCH_KEY = 'ytsearchdate'
3406     IE_DESC = 'YouTube.com searches, newest videos first'
3407     _SEARCH_PARAMS = 'CAI%3D'
3408
3409
3410 r"""
3411 class YoutubeSearchURLIE(YoutubeSearchIE):
3412     IE_DESC = 'YouTube.com search URLs'
3413     IE_NAME = 'youtube:search_url'
3414     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3415     _TESTS = [{
3416         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3417         'playlist_mincount': 5,
3418         'info_dict': {
3419             'title': 'youtube-dl test video',
3420         }
3421     }, {
3422         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3423         'only_matching': True,
3424     }]
3425
3426     def _real_extract(self, url):
3427         mobj = re.match(self._VALID_URL, url)
3428         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3429         webpage = self._download_webpage(url, query)
3430         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3431 """
3432
3433
3434 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
3435     """
3436     Base class for feed extractors
3437     Subclasses must define the _FEED_NAME property.
3438     """
3439     _LOGIN_REQUIRED = True
3440
3441     @property
3442     def IE_NAME(self):
3443         return 'youtube:%s' % self._FEED_NAME
3444
3445     def _real_initialize(self):
3446         self._login()
3447
3448     def _real_extract(self, url):
3449         return self.url_result(
3450             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3451             ie=YoutubeTabIE.ie_key())
3452
3453
3454 class YoutubeWatchLaterIE(InfoExtractor):
3455     IE_NAME = 'youtube:watchlater'
3456     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3457     _VALID_URL = r':ytwatchlater'
3458     _TESTS = [{
3459         'url': ':ytwatchlater',
3460         'only_matching': True,
3461     }]
3462
3463     def _real_extract(self, url):
3464         return self.url_result(
3465             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3466
3467
3468 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3469     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3470     _VALID_URL = r':ytrec(?:ommended)?'
3471     _FEED_NAME = 'recommended'
3472     _TESTS = [{
3473         'url': ':ytrec',
3474         'only_matching': True,
3475     }, {
3476         'url': ':ytrecommended',
3477         'only_matching': True,
3478     }]
3479
3480
3481 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3482     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3483     _VALID_URL = r':ytsubs(?:criptions)?'
3484     _FEED_NAME = 'subscriptions'
3485     _TESTS = [{
3486         'url': ':ytsubs',
3487         'only_matching': True,
3488     }, {
3489         'url': ':ytsubscriptions',
3490         'only_matching': True,
3491     }]
3492
3493
3494 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3495     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3496     _VALID_URL = r':ythistory'
3497     _FEED_NAME = 'history'
3498     _TESTS = [{
3499         'url': ':ythistory',
3500         'only_matching': True,
3501     }]
3502
3503
3504 class YoutubeTruncatedURLIE(InfoExtractor):
3505     IE_NAME = 'youtube:truncated_url'
3506     IE_DESC = False  # Do not list
3507     _VALID_URL = r'''(?x)
3508         (?:https?://)?
3509         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3510         (?:watch\?(?:
3511             feature=[a-z_]+|
3512             annotation_id=annotation_[^&]+|
3513             x-yt-cl=[0-9]+|
3514             hl=[^&]*|
3515             t=[0-9]+
3516         )?
3517         |
3518             attribution_link\?a=[^&]+
3519         )
3520         $
3521     '''
3522
3523     _TESTS = [{
3524         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3525         'only_matching': True,
3526     }, {
3527         'url': 'https://www.youtube.com/watch?',
3528         'only_matching': True,
3529     }, {
3530         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3531         'only_matching': True,
3532     }, {
3533         'url': 'https://www.youtube.com/watch?feature=foo',
3534         'only_matching': True,
3535     }, {
3536         'url': 'https://www.youtube.com/watch?hl=en-GB',
3537         'only_matching': True,
3538     }, {
3539         'url': 'https://www.youtube.com/watch?t=2372',
3540         'only_matching': True,
3541     }]
3542
3543     def _real_extract(self, url):
3544         raise ExtractorError(
3545             'Did you forget to quote the URL? Remember that & is a meta '
3546             'character in most shells, so you want to put the URL in quotes, '
3547             'like  youtube-dl '
3548             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3549             ' or simply  youtube-dl BaW_jenozKc  .',
3550             expected=True)
3551
3552
3553 class YoutubeTruncatedIDIE(InfoExtractor):
3554     IE_NAME = 'youtube:truncated_id'
3555     IE_DESC = False  # Do not list
3556     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3557
3558     _TESTS = [{
3559         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3560         'only_matching': True,
3561     }]
3562
3563     def _real_extract(self, url):
3564         video_id = self._match_id(url)
3565         raise ExtractorError(
3566             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3567             expected=True)