youtube_dl/extractor/tiktok.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     compat_str,
   7     ExtractorError,
   8     float_or_none,
   9     int_or_none,
  10     str_or_none,
  11     try_get,
  12     url_or_none,
  13 )
  14
  15
  16 class TikTokBaseIE(InfoExtractor):
  17     def _extract_video(self, data, video_id=None):
  18         video = data['video']
  19         description = str_or_none(try_get(data, lambda x: x['desc']))
  20         width = int_or_none(try_get(data, lambda x: video['width']))
  21         height = int_or_none(try_get(data, lambda x: video['height']))
  22
  23         format_urls = set()
  24         formats = []
  25         for format_id in ('download', 'play'):
  26             format_url = url_or_none(video.get('%sAddr' % format_id))
  27             if not format_url:
  28                 continue
  29             if format_url in format_urls:
  30                 continue
  31             format_urls.add(format_url)
  32             formats.append({
  33                 'url': format_url,
  34                 'ext': 'mp4',
  35                 'height': height,
  36                 'width': width,
  37                 'http_headers': {
  38                     'Referer': 'https://www.tiktok.com/',
  39                 }
  40             })
  41         self._sort_formats(formats)
  42
  43         thumbnail = url_or_none(video.get('cover'))
  44         duration = float_or_none(video.get('duration'))
  45
  46         uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
  47         uploader_id = try_get(data, lambda x: x['author']['id'], compat_str)
  48
  49         timestamp = int_or_none(data.get('createTime'))
  50
  51         def stats(key):
  52             return int_or_none(try_get(
  53                 data, lambda x: x['stats']['%sCount' % key]))
  54
  55         view_count = stats('play')
  56         like_count = stats('digg')
  57         comment_count = stats('comment')
  58         repost_count = stats('share')
  59
  60         aweme_id = data.get('id') or video_id
  61
  62         return {
  63             'id': aweme_id,
  64             'title': uploader or aweme_id,
  65             'description': description,
  66             'thumbnail': thumbnail,
  67             'duration': duration,
  68             'uploader': uploader,
  69             'uploader_id': uploader_id,
  70             'timestamp': timestamp,
  71             'view_count': view_count,
  72             'like_count': like_count,
  73             'comment_count': comment_count,
  74             'repost_count': repost_count,
  75             'formats': formats,
  76         }
  77
  78
  79 class TikTokIE(TikTokBaseIE):
  80     _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@[^/]+/video/(?P<id>\d+)'
  81     _TESTS = [{
  82         'url': 'https://www.tiktok.com/@zureeal/video/6606727368545406213',
  83         'md5': '163ceff303bb52de60e6887fe399e6cd',
  84         'info_dict': {
  85             'id': '6606727368545406213',
  86             'ext': 'mp4',
  87             'title': 'Zureeal',
  88             'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
  89             'thumbnail': r're:^https?://.*',
  90             'duration': 15,
  91             'uploader': 'Zureeal',
  92             'uploader_id': '188294915489964032',
  93             'timestamp': 1538248586,
  94             'upload_date': '20180929',
  95             'view_count': int,
  96             'like_count': int,
  97             'comment_count': int,
  98             'repost_count': int,
  99         }
 100     }]
 101
 102     def _real_initialize(self):
 103         # Setup session (will set necessary cookies)
 104         self._request_webpage(
 105             'https://www.tiktok.com/', None, note='Setting up session')
 106
 107     def _real_extract(self, url):
 108         video_id = self._match_id(url)
 109         webpage = self._download_webpage(url, video_id)
 110         page_props = self._parse_json(self._search_regex(
 111             r'<script[^>]+\bid=["\']__NEXT_DATA__[^>]+>\s*({.+?})\s*</script',
 112             webpage, 'data'), video_id)['props']['pageProps']
 113         data = try_get(page_props, lambda x: x['itemInfo']['itemStruct'], dict)
 114         if not data and page_props.get('statusCode') == 10216:
 115             raise ExtractorError('This video is private', expected=True)
 116         return self._extract_video(data, video_id)
 117
 118
 119 class TikTokUserIE(TikTokBaseIE):
 120     _VALID_URL = r'https://(?:www\.)?tiktok\.com/@(?P<id>[^/?#&]+)'
 121     _TESTS = [{
 122         'url': 'https://www.tiktok.com/@zureeal',
 123         'info_dict': {
 124             'id': '188294915489964032',
 125         },
 126         'playlist_mincount': 24,
 127     }]
 128     _WORKING = False
 129
 130     @classmethod
 131     def suitable(cls, url):
 132         return False if TikTokIE.suitable(url) else super(TikTokUserIE, cls).suitable(url)
 133
 134     def _real_extract(self, url):
 135         user_id = self._match_id(url)
 136         data = self._download_json(
 137             'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id,
 138             query={'_signature': '_'})
 139         entries = []
 140         for aweme in data['aweme_list']:
 141             try:
 142                 entry = self._extract_video(aweme)
 143             except ExtractorError:
 144                 continue
 145             entry['extractor_key'] = TikTokIE.ie_key()
 146             entries.append(entry)
 147         return self.playlist_result(entries, user_id)