Return the item itself if playlist has one entry

[youtube-dl.git] / youtube_dl / extractor / instagram.py
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py

index 3f5648c0c593eedb72168cc265338a90044a83eb..12e10143cdc100877f3f0233243f80d224599863 100644 (file)
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -12,6 +12,7 @@ from ..compat import (
  )
  from ..utils import (
      ExtractorError,
+    float_or_none,
      get_element_by_attribute,
      int_or_none,
      lowercase_escape,
@@ -22,7 +23,7 @@ from ..utils import (
  
  
  class InstagramIE(InfoExtractor):
-    _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
      _TESTS = [{
          'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
          'md5': '0d2da106a9d2631273e192b372806516',
@@ -32,10 +33,11 @@ class InstagramIE(InfoExtractor):
              'title': 'Video by naomipq',
              'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
              'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 0,
              'timestamp': 1371748545,
              'upload_date': '20130620',
              'uploader_id': 'naomipq',
-            'uploader': 'Naomi Leonor Phan-Quang',
+            'uploader': 'B E A U T Y  F O R  A S H E S',
              'like_count': int,
              'comment_count': int,
              'comments': list,
@@ -48,6 +50,7 @@ class InstagramIE(InfoExtractor):
              'ext': 'mp4',
              'title': 'Video by britneyspears',
              'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 0,
              'timestamp': 1453760977,
              'upload_date': '20160125',
              'uploader_id': 'britneyspears',
@@ -86,6 +89,24 @@ class InstagramIE(InfoExtractor):
              'title': 'Post by instagram',
              'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
          },
+    }, {
+        # IGTV
+        'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
+        'info_dict': {
+            'id': 'BkfuX9UB-eK',
+            'ext': 'mp4',
+            'title': 'Fingerboarding Tricks with @cass.fb',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 53.83,
+            'timestamp': 1530032919,
+            'upload_date': '20180626',
+            'uploader_id': 'instagram',
+            'uploader': 'Instagram',
+            'like_count': int,
+            'comment_count': int,
+            'comments': list,
+            'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
+        }
      }, {
          'url': 'https://instagram.com/p/-Cmh1cukG2/',
          'only_matching': True,
@@ -95,6 +116,9 @@ class InstagramIE(InfoExtractor):
      }, {
          'url': 'https://www.instagram.com/tv/aye83DjauH/',
          'only_matching': True,
+    }, {
+        'url': 'https://www.instagram.com/reel/CDUMkliABpa/',
+        'only_matching': True,
      }]
  
      @staticmethod
@@ -156,17 +180,25 @@ class InstagramIE(InfoExtractor):
              description = try_get(
                  media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
                  compat_str) or media.get('caption')
-            thumbnail = media.get('display_src')
+            title = media.get('title')
+            thumbnail = media.get('display_src') or media.get('display_url')
+            duration = float_or_none(media.get('video_duration'))
              timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
              uploader = media.get('owner', {}).get('full_name')
              uploader_id = media.get('owner', {}).get('username')
  
-            def get_count(key, kind):
-                return int_or_none(try_get(
-                    media, (lambda x: x['edge_media_%s' % key]['count'],
-                            lambda x: x['%ss' % kind]['count'])))
+            def get_count(keys, kind):
+                if not isinstance(keys, (list, tuple)):
+                    keys = [keys]
+                for key in keys:
+                    count = int_or_none(try_get(
+                        media, (lambda x: x['edge_media_%s' % key]['count'],
+                                lambda x: x['%ss' % kind]['count'])))
+                    if count is not None:
+                        return count
              like_count = get_count('preview_like', 'like')
-            comment_count = get_count('to_comment', 'comment')
+            comment_count = get_count(
+                ('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
  
              comments = [{
                  'author': comment.get('user', {}).get('username'),
@@ -191,9 +223,10 @@ class InstagramIE(InfoExtractor):
                              continue
                          entries.append({
                              'id': node.get('shortcode') or node['id'],
-                            'title': 'Video %d' % edge_num,
+                            'title': node.get('title') or 'Video %d' % edge_num,
                              'url': node_video_url,
                              'thumbnail': node.get('display_url'),
+                            'duration': float_or_none(node.get('video_duration')),
                              'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
                              'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
                              'view_count': int_or_none(node.get('video_view_count')),
@@ -230,8 +263,9 @@ class InstagramIE(InfoExtractor):
              'id': video_id,
              'formats': formats,
              'ext': 'mp4',
-            'title': 'Video by %s' % uploader_id,
+            'title': title or 'Video by %s' % uploader_id,
              'description': description,
+            'duration': duration,
              'thumbnail': thumbnail,
              'timestamp': timestamp,
              'uploader_id': uploader_id,