]> asedeno.scripts.mit.edu Git - youtube-dl.git/blobdiff - youtube_dl/extractor/nrk.py
[twitch:clips] Add access token query to download URLs (closes #29136)
[youtube-dl.git] / youtube_dl / extractor / nrk.py
index 4fb7df9595ef4f7064b5dc205b544af2868976b1..40dee2162f5867ca3bc94ca56508329c7905d0b1 100644 (file)
@@ -220,8 +220,15 @@ class NRKIE(NRKBaseIE):
                 'url': sub_url,
             })
 
-        age_limit = int_or_none(try_get(
-            data, lambda x: x['legalAge']['body']['rating']['code']))
+        legal_age = try_get(
+            data, lambda x: x['legalAge']['body']['rating']['code'], compat_str)
+        # https://en.wikipedia.org/wiki/Norwegian_Media_Authority
+        age_limit = None
+        if legal_age:
+            if legal_age == 'A':
+                age_limit = 0
+            elif legal_age.isdigit():
+                age_limit = int_or_none(legal_age)
 
         is_series = try_get(data, lambda x: x['_links']['series']['name']) == 'series'
 
@@ -238,19 +245,37 @@ class NRKIE(NRKBaseIE):
         }
 
         if is_series:
-            series = title
+            series = season_id = season_number = episode = episode_number = None
+            programs = self._call_api(
+                'programs/%s' % video_id, video_id, 'programs', fatal=False)
+            if programs and isinstance(programs, dict):
+                series = str_or_none(programs.get('seriesTitle'))
+                season_id = str_or_none(programs.get('seasonId'))
+                season_number = int_or_none(programs.get('seasonNumber'))
+                episode = str_or_none(programs.get('episodeTitle'))
+                episode_number = int_or_none(programs.get('episodeNumber'))
+            if not series:
+                series = title
             if alt_title:
                 title += ' - %s' % alt_title
-            season_number = int_or_none(self._search_regex(
-                r'Sesong\s+(\d+)', description or '', 'season number',
-                default=None))
-            episode = alt_title if is_series else None
-            episode_number = int_or_none(self._search_regex(
-                r'(\d+)\.\s+episode', episode or '', 'episode number',
-                default=None))
+            if not season_number:
+                season_number = int_or_none(self._search_regex(
+                    r'Sesong\s+(\d+)', description or '', 'season number',
+                    default=None))
+            if not episode:
+                episode = alt_title if is_series else None
+            if not episode_number:
+                episode_number = int_or_none(self._search_regex(
+                    r'^(\d+)\.', episode or '', 'episode number',
+                    default=None))
+            if not episode_number:
+                episode_number = int_or_none(self._search_regex(
+                    r'\((\d+)\s*:\s*\d+\)', description or '',
+                    'episode number', default=None))
             info.update({
                 'title': title,
                 'series': series,
+                'season_id': season_id,
                 'season_number': season_number,
                 'episode': episode,
                 'episode_number': episode_number,
@@ -273,6 +298,14 @@ class NRKTVIE(InfoExtractor):
             'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
             'duration': 2223.44,
             'age_limit': 6,
+            'subtitles': {
+                'nb-nor': [{
+                    'ext': 'vtt',
+                }],
+                'nb-ttv': [{
+                    'ext': 'vtt',
+                }]
+            },
         },
     }, {
         'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
@@ -286,6 +319,7 @@ class NRKTVIE(InfoExtractor):
             'duration': 1741,
             'series': '20 spørsmål',
             'episode': '23. mai 2014',
+            'age_limit': 0,
         },
     }, {
         'url': 'https://tv.nrk.no/program/mdfp15000514',
@@ -297,6 +331,7 @@ class NRKTVIE(InfoExtractor):
             'duration': 4605.08,
             'series': 'Kunnskapskanalen',
             'episode': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
+            'age_limit': 0,
         },
         'params': {
             'skip_download': True,
@@ -309,6 +344,7 @@ class NRKTVIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
             'description': 'md5:c03aba1e917561eface5214020551b7a',
+            'age_limit': 0,
         },
         'params': {
             'skip_download': True,
@@ -322,6 +358,7 @@ class NRKTVIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
             'description': 'md5:c03aba1e917561eface5214020551b7a',
+            'age_limit': 0,
         },
         'expected_warnings': ['Failed to download m3u8 information'],
         'skip': 'Ikke tilgjengelig utenfor Norge',
@@ -337,6 +374,7 @@ class NRKTVIE(InfoExtractor):
             'episode': '13. episode',
             'season_number': 3,
             'episode_number': 13,
+            'age_limit': 0,
         },
         'params': {
             'skip_download': True,
@@ -351,6 +389,7 @@ class NRKTVIE(InfoExtractor):
             'duration': 1796,
             'series': 'Nytt på nytt',
             'episode': '27.01.2017',
+            'age_limit': 0,
         },
         'params': {
             'skip_download': True,
@@ -374,19 +413,19 @@ class NRKTVIE(InfoExtractor):
 
 
 class NRKTVEpisodeIE(InfoExtractor):
-    _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
+    _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/(?P<season_number>\d+)/episode/(?P<episode_number>\d+))'
     _TESTS = [{
         'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
         'info_dict': {
-            'id': 'MUHH36005220BA',
+            'id': 'MUHH36005220',
             'ext': 'mp4',
-            'title': 'Kro, krig og kjærlighet 2:6',
-            'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350',
-            'duration': 1563,
+            'title': 'Hellums kro - 2. Kro, krig og kjærlighet',
+            'description': 'md5:ad92ddffc04cea8ce14b415deef81787',
+            'duration': 1563.92,
             'series': 'Hellums kro',
             'season_number': 1,
             'episode_number': 2,
-            'episode': '2:6',
+            'episode': '2. Kro, krig og kjærlighet',
             'age_limit': 6,
         },
         'params': {
@@ -395,15 +434,16 @@ class NRKTVEpisodeIE(InfoExtractor):
     }, {
         'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
         'info_dict': {
-            'id': 'MSUI14000816AA',
+            'id': 'MSUI14000816',
             'ext': 'mp4',
-            'title': 'Backstage 8:30',
+            'title': 'Backstage - 8. episode',
             'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
             'duration': 1320,
             'series': 'Backstage',
             'season_number': 1,
             'episode_number': 8,
-            'episode': '8:30',
+            'episode': '8. episode',
+            'age_limit': 0,
         },
         'params': {
             'skip_download': True,
@@ -412,7 +452,7 @@ class NRKTVEpisodeIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
+        display_id, season_number, episode_number = re.match(self._VALID_URL, url).groups()
 
         webpage = self._download_webpage(url, display_id)
 
@@ -424,10 +464,12 @@ class NRKTVEpisodeIE(InfoExtractor):
         assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
 
         info.update({
-            '_type': 'url_transparent',
+            '_type': 'url',
             'id': nrk_id,
             'url': 'nrk:%s' % nrk_id,
             'ie_key': NRKIE.ie_key(),
+            'season_number': int(season_number),
+            'episode_number': int(episode_number),
         })
         return info