]> asedeno.scripts.mit.edu Git - youtube-dl.git/blob - youtube_dl/extractor/cbssports.py
[twitch:clips] Add access token query to download URLs (closes #29136)
[youtube-dl.git] / youtube_dl / extractor / cbssports.py
1 from __future__ import unicode_literals
2
3 import re
4
5 # from .cbs import CBSBaseIE
6 from .common import InfoExtractor
7 from ..utils import (
8     int_or_none,
9     try_get,
10 )
11
12
13 # class CBSSportsEmbedIE(CBSBaseIE):
14 class CBSSportsEmbedIE(InfoExtractor):
15     IE_NAME = 'cbssports:embed'
16     _VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
17         (?:
18             ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
19             pcid%3D(?P<pcid>\d+)
20         )'''
21     _TESTS = [{
22         'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
23         'only_matching': True,
24     }, {
25         'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
26         'only_matching': True,
27     }]
28
29     # def _extract_video_info(self, filter_query, video_id):
30     #     return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
31
32     def _real_extract(self, url):
33         uuid, pcid = re.match(self._VALID_URL, url).groups()
34         query = {'id': uuid} if uuid else {'pcid': pcid}
35         video = self._download_json(
36             'https://www.cbssports.com/api/content/video/',
37             uuid or pcid, query=query)[0]
38         video_id = video['id']
39         title = video['title']
40         metadata = video.get('metaData') or {}
41         # return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
42         # return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
43
44         formats = self._extract_m3u8_formats(
45             metadata['files'][0]['url'], video_id, 'mp4',
46             'm3u8_native', m3u8_id='hls', fatal=False)
47         self._sort_formats(formats)
48
49         image = video.get('image')
50         thumbnails = None
51         if image:
52             image_path = image.get('path')
53             if image_path:
54                 thumbnails = [{
55                     'url': image_path,
56                     'width': int_or_none(image.get('width')),
57                     'height': int_or_none(image.get('height')),
58                     'filesize': int_or_none(image.get('size')),
59                 }]
60
61         return {
62             'id': video_id,
63             'title': title,
64             'formats': formats,
65             'thumbnails': thumbnails,
66             'description': video.get('description'),
67             'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
68             'duration': int_or_none(metadata.get('duration')),
69         }
70
71
72 class CBSSportsBaseIE(InfoExtractor):
73     def _real_extract(self, url):
74         display_id = self._match_id(url)
75         webpage = self._download_webpage(url, display_id)
76         iframe_url = self._search_regex(
77             r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
78             webpage, 'embed url')
79         return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
80
81
82 class CBSSportsIE(CBSSportsBaseIE):
83     IE_NAME = 'cbssports'
84     _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
85     _TESTS = [{
86         'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
87         'info_dict': {
88             'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
89             'ext': 'mp4',
90             'title': 'Cover 3: Stanford Spring Gleaning',
91             'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
92             'timestamp': 1617218398,
93             'upload_date': '20210331',
94             'duration': 502,
95         },
96     }]
97
98
99 class TwentyFourSevenSportsIE(CBSSportsBaseIE):
100     IE_NAME = '247sports'
101     _VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
102     _TESTS = [{
103         'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
104         'info_dict': {
105             'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
106             'ext': 'mp4',
107             'title': '2021 QB Jake Garcia senior highlights through five games',
108             'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
109             'timestamp': 1607114223,
110             'upload_date': '20201204',
111             'duration': 208,
112         },
113     }]