[youtube:tab] Improve identity token extraction (closes #27197)

author Sergey M․ <dstftw@gmail.com>

Tue, 8 Dec 2020 21:17:47 +0000 (04:17 +0700)

committer Sergey M․ <dstftw@gmail.com>

Tue, 8 Dec 2020 21:17:47 +0000 (04:17 +0700)
author Sergey M․ <dstftw@gmail.com>
Tue, 8 Dec 2020 21:17:47 +0000 (04:17 +0700)
committer Sergey M․ <dstftw@gmail.com>
Tue, 8 Dec 2020 21:17:47 +0000 (04:17 +0700)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 052926ee53bde137eabfb4b91c9fbcab9fc0b468..1e7af3fdd8f1f3f95b2dc024c879f9d9bf5b9ecf 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -3121,6 +3121,19 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
              self._playlist_entries(playlist), playlist_id=playlist_id,
              playlist_title=title)
  
+    def _extract_identity_token(self, webpage, item_id):
+        ytcfg = self._parse_json(
+            self._search_regex(
+                r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
+                default='{}'), item_id, fatal=False)
+        if ytcfg:
+            token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
+            if token:
+                return token
+        return self._search_regex(
+            r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
+            'identity token', default=None)
+
      def _real_extract(self, url):
          item_id = self._match_id(url)
          url = compat_urlparse.urlunparse(
@@ -3135,9 +3148,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                  return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
              self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
          webpage = self._download_webpage(url, item_id)
-        identity_token = self._search_regex(
-            r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
-            'identity token', default=None)
+        identity_token = self._extract_identity_token(webpage, item_id)
          data = self._extract_yt_initial_data(item_id, webpage)
          tabs = try_get(
              data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
author	Sergey M․ <dstftw@gmail.com>
	Tue, 8 Dec 2020 21:17:47 +0000 (04:17 +0700)
committer	Sergey M․ <dstftw@gmail.com>
	Tue, 8 Dec 2020 21:17:47 +0000 (04:17 +0700)