2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
28 compat_urllib_request,
49 UnavailableVideoError,
56 from .extractor import get_info_extractor, gen_extractors
57 from .downloader import get_suitable_downloader
58 from .postprocessor import FFmpegMergerPP
59 from .version import __version__
62 class YoutubeDL(object):
65 YoutubeDL objects are the ones responsible of downloading the
66 actual video file and writing it to disk if the user has requested
67 it, among some other tasks. In most cases there should be one per
68 program. As, given a video URL, the downloader doesn't know how to
69 extract all the needed information, task that InfoExtractors do, it
70 has to pass the URL to one of them.
72 For this, YoutubeDL objects have a method that allows
73 InfoExtractors to be registered in a given order. When it is passed
74 a URL, the YoutubeDL object handles it to the first InfoExtractor it
75 finds that reports being able to handle it. The InfoExtractor extracts
76 all the information about the video or videos the URL refers to, and
77 YoutubeDL process the extracted information, possibly using a File
78 Downloader to download the video.
80 YoutubeDL objects accept a lot of parameters. In order not to saturate
81 the object constructor with arguments, it receives a dictionary of
82 options instead. These options are available through the params
83 attribute for the InfoExtractors to use. The YoutubeDL also
84 registers itself as the downloader in charge for the InfoExtractors
85 that are added to it, so this is a "mutual registration".
89 username: Username for authentication purposes.
90 password: Password for authentication purposes.
91 videopassword: Password for acces a video.
92 usenetrc: Use netrc for authentication instead.
93 verbose: Print additional info to stdout.
94 quiet: Do not print messages to stdout.
95 forceurl: Force printing final URL.
96 forcetitle: Force printing title.
97 forceid: Force printing ID.
98 forcethumbnail: Force printing thumbnail URL.
99 forcedescription: Force printing description.
100 forcefilename: Force printing final filename.
101 forceduration: Force printing duration.
102 forcejson: Force printing info_dict as JSON.
103 simulate: Do not download the video files.
104 format: Video format code.
105 format_limit: Highest quality format to try.
106 outtmpl: Template for output names.
107 restrictfilenames: Do not allow "&" and spaces in file names
108 ignoreerrors: Do not stop on download errors.
109 nooverwrites: Prevent overwriting files.
110 playliststart: Playlist item to start at.
111 playlistend: Playlist item to end at.
112 matchtitle: Download only matching titles.
113 rejecttitle: Reject downloads for matching titles.
114 logger: Log messages to a logging.Logger instance.
115 logtostderr: Log messages to stderr instead of stdout.
116 writedescription: Write the video description to a .description file
117 writeinfojson: Write the video description to a .info.json file
118 writeannotations: Write the video annotations to a .annotations.xml file
119 writethumbnail: Write the thumbnail image to a file
120 writesubtitles: Write the video subtitles to a file
121 writeautomaticsub: Write the automatic subtitles to a file
122 allsubtitles: Downloads all the subtitles of the video
123 (requires writesubtitles or writeautomaticsub)
124 listsubtitles: Lists all available subtitles for the video
125 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
126 subtitleslangs: List of languages of the subtitles to download
127 keepvideo: Keep the video file after post-processing
128 daterange: A DateRange object, download only if the upload_date is in the range.
129 skip_download: Skip the actual download of the video file
130 cachedir: Location of the cache files in the filesystem.
131 None to disable filesystem cache.
132 noplaylist: Download single video instead of a playlist if in doubt.
133 age_limit: An integer representing the user's age in years.
134 Unsuitable videos for the given age are skipped.
135 min_views: An integer representing the minimum view count the video
136 must have in order to not be skipped.
137 Videos without view count information are always
138 downloaded. None for no limit.
139 max_views: An integer representing the maximum view count.
140 Videos that are more popular than that are not
142 Videos without view count information are always
143 downloaded. None for no limit.
144 download_archive: File name of a file where all downloads are recorded.
145 Videos already present in the file are not downloaded
147 cookiefile: File name where cookies should be read from and dumped to.
148 nocheckcertificate:Do not verify SSL certificates
149 proxy: URL of the proxy server to use
150 socket_timeout: Time to wait for unresponsive hosts, in seconds
151 bidi_workaround: Work around buggy terminals without bidirectional text
152 support, using fridibi
153 debug_printtraffic:Print out sent and received HTTP traffic
154 include_ads: Download ads as well
155 default_search: Prepend this string if an input url is not valid.
156 'auto' for elaborate guessing
158 The following parameters are not used by YoutubeDL itself, they are used by
160 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
161 noresizebuffer, retries, continuedl, noprogress, consoletitle
163 The following options are used by the post processors:
164 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
165 otherwise prefer avconv.
171 _download_retcode = None
172 _num_downloads = None
175 def __init__(self, params=None):
176 """Create a FileDownloader object with the given options."""
180 self._ies_instances = {}
182 self._progress_hooks = []
183 self._download_retcode = 0
184 self._num_downloads = 0
185 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
186 self._err_file = sys.stderr
189 if params.get('bidi_workaround', False):
192 master, slave = pty.openpty()
193 width = get_term_width()
197 width_args = ['-w', str(width)]
199 stdin=subprocess.PIPE,
201 stderr=self._err_file)
203 self._output_process = subprocess.Popen(
204 ['bidiv'] + width_args, **sp_kwargs
207 self._output_process = subprocess.Popen(
208 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
209 self._output_channel = os.fdopen(master, 'rb')
210 except OSError as ose:
212 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
216 if (sys.version_info >= (3,) and sys.platform != 'win32' and
217 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
218 and not params['restrictfilenames']):
219 # On Python 3, the Unicode filesystem API will throw errors (#1474)
221 'Assuming --restrict-filenames since file system encoding '
222 'cannot encode all charactes. '
223 'Set the LC_ALL environment variable to fix this.')
224 self.params['restrictfilenames'] = True
226 if '%(stitle)s' in self.params.get('outtmpl', ''):
227 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
231 def add_info_extractor(self, ie):
232 """Add an InfoExtractor object to the end of the list."""
234 self._ies_instances[ie.ie_key()] = ie
235 ie.set_downloader(self)
237 def get_info_extractor(self, ie_key):
239 Get an instance of an IE with name ie_key, it will try to get one from
240 the _ies list, if there's no instance it will create a new one and add
241 it to the extractor list.
243 ie = self._ies_instances.get(ie_key)
245 ie = get_info_extractor(ie_key)()
246 self.add_info_extractor(ie)
249 def add_default_info_extractors(self):
251 Add the InfoExtractors returned by gen_extractors to the end of the list
253 for ie in gen_extractors():
254 self.add_info_extractor(ie)
256 def add_post_processor(self, pp):
257 """Add a PostProcessor object to the end of the chain."""
259 pp.set_downloader(self)
261 def add_progress_hook(self, ph):
262 """Add the progress hook (currently only for the file downloader)"""
263 self._progress_hooks.append(ph)
265 def _bidi_workaround(self, message):
266 if not hasattr(self, '_output_channel'):
269 assert hasattr(self, '_output_process')
270 assert type(message) == type('')
271 line_count = message.count('\n') + 1
272 self._output_process.stdin.write((message + '\n').encode('utf-8'))
273 self._output_process.stdin.flush()
274 res = ''.join(self._output_channel.readline().decode('utf-8')
275 for _ in range(line_count))
276 return res[:-len('\n')]
278 def to_screen(self, message, skip_eol=False):
279 """Print message to stdout if not in quiet mode."""
280 return self.to_stdout(message, skip_eol, check_quiet=True)
282 def to_stdout(self, message, skip_eol=False, check_quiet=False):
283 """Print message to stdout if not in quiet mode."""
284 if self.params.get('logger'):
285 self.params['logger'].debug(message)
286 elif not check_quiet or not self.params.get('quiet', False):
287 message = self._bidi_workaround(message)
288 terminator = ['\n', ''][skip_eol]
289 output = message + terminator
291 write_string(output, self._screen_file)
293 def to_stderr(self, message):
294 """Print message to stderr."""
295 assert type(message) == type('')
296 if self.params.get('logger'):
297 self.params['logger'].error(message)
299 message = self._bidi_workaround(message)
300 output = message + '\n'
301 write_string(output, self._err_file)
303 def to_console_title(self, message):
304 if not self.params.get('consoletitle', False):
306 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
307 # c_wchar_p() might not be necessary if `message` is
308 # already of type unicode()
309 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
310 elif 'TERM' in os.environ:
311 write_string('\033]0;%s\007' % message, self._screen_file)
313 def save_console_title(self):
314 if not self.params.get('consoletitle', False):
316 if 'TERM' in os.environ:
317 # Save the title on stack
318 write_string('\033[22;0t', self._screen_file)
320 def restore_console_title(self):
321 if not self.params.get('consoletitle', False):
323 if 'TERM' in os.environ:
324 # Restore the title from stack
325 write_string('\033[23;0t', self._screen_file)
328 self.save_console_title()
331 def __exit__(self, *args):
332 self.restore_console_title()
334 if self.params.get('cookiefile') is not None:
335 self.cookiejar.save()
337 def trouble(self, message=None, tb=None):
338 """Determine action to take when a download problem appears.
340 Depending on if the downloader has been configured to ignore
341 download errors or not, this method may throw an exception or
342 not when errors are found, after printing the message.
344 tb, if given, is additional traceback information.
346 if message is not None:
347 self.to_stderr(message)
348 if self.params.get('verbose'):
350 if sys.exc_info()[0]: # if .trouble has been called from an except block
352 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
353 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
354 tb += compat_str(traceback.format_exc())
356 tb_data = traceback.format_list(traceback.extract_stack())
357 tb = ''.join(tb_data)
359 if not self.params.get('ignoreerrors', False):
360 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
361 exc_info = sys.exc_info()[1].exc_info
363 exc_info = sys.exc_info()
364 raise DownloadError(message, exc_info)
365 self._download_retcode = 1
367 def report_warning(self, message):
369 Print the message to stderr, it will be prefixed with 'WARNING:'
370 If stderr is a tty file the 'WARNING:' will be colored
372 if self._err_file.isatty() and os.name != 'nt':
373 _msg_header = '\033[0;33mWARNING:\033[0m'
375 _msg_header = 'WARNING:'
376 warning_message = '%s %s' % (_msg_header, message)
377 self.to_stderr(warning_message)
379 def report_error(self, message, tb=None):
381 Do the same as trouble, but prefixes the message with 'ERROR:', colored
382 in red if stderr is a tty file.
384 if self._err_file.isatty() and os.name != 'nt':
385 _msg_header = '\033[0;31mERROR:\033[0m'
387 _msg_header = 'ERROR:'
388 error_message = '%s %s' % (_msg_header, message)
389 self.trouble(error_message, tb)
391 def report_file_already_downloaded(self, file_name):
392 """Report file has already been fully downloaded."""
394 self.to_screen('[download] %s has already been downloaded' % file_name)
395 except UnicodeEncodeError:
396 self.to_screen('[download] The file has already been downloaded')
398 def increment_downloads(self):
399 """Increment the ordinal that assigns a number to each file."""
400 self._num_downloads += 1
402 def prepare_filename(self, info_dict):
403 """Generate the output filename."""
405 template_dict = dict(info_dict)
407 template_dict['epoch'] = int(time.time())
408 autonumber_size = self.params.get('autonumber_size')
409 if autonumber_size is None:
411 autonumber_templ = '%0' + str(autonumber_size) + 'd'
412 template_dict['autonumber'] = autonumber_templ % self._num_downloads
413 if template_dict.get('playlist_index') is not None:
414 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
416 sanitize = lambda k, v: sanitize_filename(
418 restricted=self.params.get('restrictfilenames'),
420 template_dict = dict((k, sanitize(k, v))
421 for k, v in template_dict.items()
423 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
425 tmpl = os.path.expanduser(self.params['outtmpl'])
426 filename = tmpl % template_dict
428 except ValueError as err:
429 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
432 def _match_entry(self, info_dict):
433 """ Returns None iff the file should be downloaded """
435 video_title = info_dict.get('title', info_dict.get('id', 'video'))
436 if 'title' in info_dict:
437 # This can happen when we're just evaluating the playlist
438 title = info_dict['title']
439 matchtitle = self.params.get('matchtitle', False)
441 if not re.search(matchtitle, title, re.IGNORECASE):
442 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
443 rejecttitle = self.params.get('rejecttitle', False)
445 if re.search(rejecttitle, title, re.IGNORECASE):
446 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
447 date = info_dict.get('upload_date', None)
449 dateRange = self.params.get('daterange', DateRange())
450 if date not in dateRange:
451 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
452 view_count = info_dict.get('view_count', None)
453 if view_count is not None:
454 min_views = self.params.get('min_views')
455 if min_views is not None and view_count < min_views:
456 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
457 max_views = self.params.get('max_views')
458 if max_views is not None and view_count > max_views:
459 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
460 age_limit = self.params.get('age_limit')
461 if age_limit is not None:
462 if age_limit < info_dict.get('age_limit', 0):
463 return 'Skipping "' + title + '" because it is age restricted'
464 if self.in_download_archive(info_dict):
465 return '%s has already been recorded in archive' % video_title
469 def add_extra_info(info_dict, extra_info):
470 '''Set the keys from extra_info in info dict if they are missing'''
471 for key, value in extra_info.items():
472 info_dict.setdefault(key, value)
474 def extract_info(self, url, download=True, ie_key=None, extra_info={},
477 Returns a list with a dictionary for each video we find.
478 If 'download', also downloads the videos.
479 extra_info is a dict containing the extra values to add to each result
483 ies = [self.get_info_extractor(ie_key)]
488 if not ie.suitable(url):
492 self.report_warning('The program functionality for this site has been marked as broken, '
493 'and will probably not work.')
496 ie_result = ie.extract(url)
497 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
499 if isinstance(ie_result, list):
500 # Backwards compatibility: old IE result format
502 '_type': 'compat_list',
503 'entries': ie_result,
505 self.add_extra_info(ie_result,
507 'extractor': ie.IE_NAME,
509 'webpage_url_basename': url_basename(url),
510 'extractor_key': ie.ie_key(),
513 return self.process_ie_result(ie_result, download, extra_info)
516 except ExtractorError as de: # An error we somewhat expected
517 self.report_error(compat_str(de), de.format_traceback())
519 except Exception as e:
520 if self.params.get('ignoreerrors', False):
521 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
526 self.report_error('no suitable InfoExtractor: %s' % url)
528 def process_ie_result(self, ie_result, download=True, extra_info={}):
530 Take the result of the ie(may be modified) and resolve all unresolved
531 references (URLs, playlist items).
533 It will also download the videos if 'download'.
534 Returns the resolved ie_result.
537 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
538 if result_type == 'video':
539 self.add_extra_info(ie_result, extra_info)
540 return self.process_video_result(ie_result, download=download)
541 elif result_type == 'url':
542 # We have to add extra_info to the results because it may be
543 # contained in a playlist
544 return self.extract_info(ie_result['url'],
546 ie_key=ie_result.get('ie_key'),
547 extra_info=extra_info)
548 elif result_type == 'url_transparent':
549 # Use the information from the embedding page
550 info = self.extract_info(
551 ie_result['url'], ie_key=ie_result.get('ie_key'),
552 extra_info=extra_info, download=False, process=False)
554 def make_result(embedded_info):
555 new_result = ie_result.copy()
556 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
557 'entries', 'ie_key', 'duration',
558 'subtitles', 'annotations', 'format',
559 'thumbnail', 'thumbnails'):
562 if f in embedded_info:
563 new_result[f] = embedded_info[f]
565 new_result = make_result(info)
567 assert new_result.get('_type') != 'url_transparent'
568 if new_result.get('_type') == 'compat_list':
569 new_result['entries'] = [
570 make_result(e) for e in new_result['entries']]
572 return self.process_ie_result(
573 new_result, download=download, extra_info=extra_info)
574 elif result_type == 'playlist':
575 # We process each entry in the playlist
576 playlist = ie_result.get('title', None) or ie_result.get('id', None)
577 self.to_screen('[download] Downloading playlist: %s' % playlist)
579 playlist_results = []
581 n_all_entries = len(ie_result['entries'])
582 playliststart = self.params.get('playliststart', 1) - 1
583 playlistend = self.params.get('playlistend', None)
584 # For backwards compatibility, interpret -1 as whole list
585 if playlistend == -1:
588 entries = ie_result['entries'][playliststart:playlistend]
589 n_entries = len(entries)
592 "[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
593 (ie_result['extractor'], playlist, n_all_entries, n_entries))
595 for i, entry in enumerate(entries, 1):
596 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
598 'playlist': playlist,
599 'playlist_index': i + playliststart,
600 'extractor': ie_result['extractor'],
601 'webpage_url': ie_result['webpage_url'],
602 'webpage_url_basename': url_basename(ie_result['webpage_url']),
603 'extractor_key': ie_result['extractor_key'],
606 reason = self._match_entry(entry)
607 if reason is not None:
608 self.to_screen('[download] ' + reason)
611 entry_result = self.process_ie_result(entry,
614 playlist_results.append(entry_result)
615 ie_result['entries'] = playlist_results
617 elif result_type == 'compat_list':
619 self.add_extra_info(r,
621 'extractor': ie_result['extractor'],
622 'webpage_url': ie_result['webpage_url'],
623 'webpage_url_basename': url_basename(ie_result['webpage_url']),
624 'extractor_key': ie_result['extractor_key'],
627 ie_result['entries'] = [
628 self.process_ie_result(_fixup(r), download, extra_info)
629 for r in ie_result['entries']
633 raise Exception('Invalid result type: %s' % result_type)
635 def select_format(self, format_spec, available_formats):
636 if format_spec == 'best' or format_spec is None:
637 return available_formats[-1]
638 elif format_spec == 'worst':
639 return available_formats[0]
640 elif format_spec == 'bestaudio':
642 f for f in available_formats
643 if f.get('vcodec') == 'none']
645 return audio_formats[-1]
646 elif format_spec == 'worstaudio':
648 f for f in available_formats
649 if f.get('vcodec') == 'none']
651 return audio_formats[0]
653 extensions = ['mp4', 'flv', 'webm', '3gp']
654 if format_spec in extensions:
655 filter_f = lambda f: f['ext'] == format_spec
657 filter_f = lambda f: f['format_id'] == format_spec
658 matches = list(filter(filter_f, available_formats))
663 def process_video_result(self, info_dict, download=True):
664 assert info_dict.get('_type', 'video') == 'video'
666 if 'playlist' not in info_dict:
667 # It isn't part of a playlist
668 info_dict['playlist'] = None
669 info_dict['playlist_index'] = None
671 # This extractors handle format selection themselves
672 if info_dict['extractor'] in ['Youku']:
674 self.process_info(info_dict)
677 # We now pick which formats have to be downloaded
678 if info_dict.get('formats') is None:
679 # There's only one format available
680 formats = [info_dict]
682 formats = info_dict['formats']
684 # We check that all the formats have the format and format_id fields
685 for (i, format) in enumerate(formats):
686 if format.get('format_id') is None:
687 format['format_id'] = compat_str(i)
688 if format.get('format') is None:
689 format['format'] = '{id} - {res}{note}'.format(
690 id=format['format_id'],
691 res=self.format_resolution(format),
692 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
694 # Automatically determine file extension if missing
695 if 'ext' not in format:
696 format['ext'] = determine_ext(format['url'])
698 format_limit = self.params.get('format_limit', None)
700 formats = list(takewhile_inclusive(
701 lambda f: f['format_id'] != format_limit, formats
704 # TODO Central sorting goes here
706 if formats[0] is not info_dict:
707 # only set the 'formats' fields if the original info_dict list them
708 # otherwise we end up with a circular reference, the first (and unique)
709 # element in the 'formats' field in info_dict is info_dict itself,
710 # wich can't be exported to json
711 info_dict['formats'] = formats
712 if self.params.get('listformats', None):
713 self.list_formats(info_dict)
716 req_format = self.params.get('format')
717 if req_format is None:
719 formats_to_download = []
720 # The -1 is for supporting YoutubeIE
721 if req_format in ('-1', 'all'):
722 formats_to_download = formats
724 # We can accept formats requested in the format: 34/5/best, we pick
725 # the first that is available, starting from left
726 req_formats = req_format.split('/')
727 for rf in req_formats:
728 if re.match(r'.+?\+.+?', rf) is not None:
729 # Two formats have been requested like '137+139'
730 format_1, format_2 = rf.split('+')
731 formats_info = (self.select_format(format_1, formats),
732 self.select_format(format_2, formats))
733 if all(formats_info):
735 'requested_formats': formats_info,
737 'ext': formats_info[0]['ext'],
740 selected_format = None
742 selected_format = self.select_format(rf, formats)
743 if selected_format is not None:
744 formats_to_download = [selected_format]
746 if not formats_to_download:
747 raise ExtractorError('requested format not available',
751 if len(formats_to_download) > 1:
752 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
753 for format in formats_to_download:
754 new_info = dict(info_dict)
755 new_info.update(format)
756 self.process_info(new_info)
757 # We update the info dict with the best quality format (backwards compatibility)
758 info_dict.update(formats_to_download[-1])
761 def process_info(self, info_dict):
762 """Process a single resolved IE result."""
764 assert info_dict.get('_type', 'video') == 'video'
765 #We increment the download the download count here to match the previous behaviour.
766 self.increment_downloads()
768 info_dict['fulltitle'] = info_dict['title']
769 if len(info_dict['title']) > 200:
770 info_dict['title'] = info_dict['title'][:197] + '...'
772 # Keep for backwards compatibility
773 info_dict['stitle'] = info_dict['title']
775 if not 'format' in info_dict:
776 info_dict['format'] = info_dict['ext']
778 reason = self._match_entry(info_dict)
779 if reason is not None:
780 self.to_screen('[download] ' + reason)
783 max_downloads = self.params.get('max_downloads')
784 if max_downloads is not None:
785 if self._num_downloads > int(max_downloads):
786 raise MaxDownloadsReached()
788 filename = self.prepare_filename(info_dict)
791 if self.params.get('forcetitle', False):
792 self.to_stdout(info_dict['fulltitle'])
793 if self.params.get('forceid', False):
794 self.to_stdout(info_dict['id'])
795 if self.params.get('forceurl', False):
796 # For RTMP URLs, also include the playpath
797 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
798 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
799 self.to_stdout(info_dict['thumbnail'])
800 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
801 self.to_stdout(info_dict['description'])
802 if self.params.get('forcefilename', False) and filename is not None:
803 self.to_stdout(filename)
804 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
805 self.to_stdout(formatSeconds(info_dict['duration']))
806 if self.params.get('forceformat', False):
807 self.to_stdout(info_dict['format'])
808 if self.params.get('forcejson', False):
809 info_dict['_filename'] = filename
810 self.to_stdout(json.dumps(info_dict))
812 # Do nothing else if in simulate mode
813 if self.params.get('simulate', False):
820 dn = os.path.dirname(encodeFilename(filename))
821 if dn != '' and not os.path.exists(dn):
823 except (OSError, IOError) as err:
824 self.report_error('unable to create directory ' + compat_str(err))
827 if self.params.get('writedescription', False):
828 descfn = filename + '.description'
829 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
830 self.to_screen('[info] Video description is already present')
833 self.to_screen('[info] Writing video description to: ' + descfn)
834 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
835 descfile.write(info_dict['description'])
836 except (KeyError, TypeError):
837 self.report_warning('There\'s no description to write.')
838 except (OSError, IOError):
839 self.report_error('Cannot write description file ' + descfn)
842 if self.params.get('writeannotations', False):
843 annofn = filename + '.annotations.xml'
844 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
845 self.to_screen('[info] Video annotations are already present')
848 self.to_screen('[info] Writing video annotations to: ' + annofn)
849 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
850 annofile.write(info_dict['annotations'])
851 except (KeyError, TypeError):
852 self.report_warning('There are no annotations to write.')
853 except (OSError, IOError):
854 self.report_error('Cannot write annotations file: ' + annofn)
857 subtitles_are_requested = any([self.params.get('writesubtitles', False),
858 self.params.get('writeautomaticsub')])
860 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
861 # subtitles download errors are already managed as troubles in relevant IE
862 # that way it will silently go on when used with unsupporting IE
863 subtitles = info_dict['subtitles']
864 sub_format = self.params.get('subtitlesformat', 'srt')
865 for sub_lang in subtitles.keys():
866 sub = subtitles[sub_lang]
870 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
871 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
872 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
874 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
875 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
877 except (OSError, IOError):
878 self.report_error('Cannot write subtitles file ' + descfn)
881 if self.params.get('writeinfojson', False):
882 infofn = os.path.splitext(filename)[0] + '.info.json'
883 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
884 self.to_screen('[info] Video description metadata is already present')
886 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
888 write_json_file(info_dict, encodeFilename(infofn))
889 except (OSError, IOError):
890 self.report_error('Cannot write metadata to JSON file ' + infofn)
893 if self.params.get('writethumbnail', False):
894 if info_dict.get('thumbnail') is not None:
895 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
896 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
897 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
898 self.to_screen('[%s] %s: Thumbnail is already present' %
899 (info_dict['extractor'], info_dict['id']))
901 self.to_screen('[%s] %s: Downloading thumbnail ...' %
902 (info_dict['extractor'], info_dict['id']))
904 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
905 with open(thumb_filename, 'wb') as thumbf:
906 shutil.copyfileobj(uf, thumbf)
907 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
908 (info_dict['extractor'], info_dict['id'], thumb_filename))
909 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
910 self.report_warning('Unable to download thumbnail "%s": %s' %
911 (info_dict['thumbnail'], compat_str(err)))
913 if not self.params.get('skip_download', False):
914 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
919 fd = get_suitable_downloader(info)(self, self.params)
920 for ph in self._progress_hooks:
921 fd.add_progress_hook(ph)
922 return fd.download(name, info)
923 if info_dict.get('requested_formats') is not None:
926 merger = FFmpegMergerPP(self)
927 if not merger._get_executable():
929 self.report_warning('You have requested multiple '
930 'formats but ffmpeg or avconv are not installed.'
931 ' The formats won\'t be merged')
933 postprocessors = [merger]
934 for f in info_dict['requested_formats']:
935 new_info = dict(info_dict)
937 fname = self.prepare_filename(new_info)
938 fname = prepend_extension(fname, 'f%s' % f['format_id'])
939 downloaded.append(fname)
940 partial_success = dl(fname, new_info)
941 success = success and partial_success
942 info_dict['__postprocessors'] = postprocessors
943 info_dict['__files_to_merge'] = downloaded
946 success = dl(filename, info_dict)
947 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
948 self.report_error('unable to download video data: %s' % str(err))
950 except (OSError, IOError) as err:
951 raise UnavailableVideoError(err)
952 except (ContentTooShortError, ) as err:
953 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
958 self.post_process(filename, info_dict)
959 except (PostProcessingError) as err:
960 self.report_error('postprocessing: %s' % str(err))
963 self.record_download_archive(info_dict)
965 def download(self, url_list):
966 """Download a given list of URLs."""
967 if (len(url_list) > 1 and
968 '%' not in self.params['outtmpl']
969 and self.params.get('max_downloads') != 1):
970 raise SameFileError(self.params['outtmpl'])
974 #It also downloads the videos
975 self.extract_info(url)
976 except UnavailableVideoError:
977 self.report_error('unable to download video')
978 except MaxDownloadsReached:
979 self.to_screen('[info] Maximum number of downloaded files reached.')
982 return self._download_retcode
984 def download_with_info_file(self, info_filename):
985 with io.open(info_filename, 'r', encoding='utf-8') as f:
988 self.process_ie_result(info, download=True)
989 except DownloadError:
990 webpage_url = info.get('webpage_url')
991 if webpage_url is not None:
992 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
993 return self.download([webpage_url])
996 return self._download_retcode
998 def post_process(self, filename, ie_info):
999 """Run all the postprocessors on the given file."""
1000 info = dict(ie_info)
1001 info['filepath'] = filename
1004 if ie_info.get('__postprocessors') is not None:
1005 pps_chain.extend(ie_info['__postprocessors'])
1006 pps_chain.extend(self._pps)
1007 for pp in pps_chain:
1009 keep_video_wish, new_info = pp.run(info)
1010 if keep_video_wish is not None:
1012 keep_video = keep_video_wish
1013 elif keep_video is None:
1014 # No clear decision yet, let IE decide
1015 keep_video = keep_video_wish
1016 except PostProcessingError as e:
1017 self.report_error(e.msg)
1018 if keep_video is False and not self.params.get('keepvideo', False):
1020 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1021 os.remove(encodeFilename(filename))
1022 except (IOError, OSError):
1023 self.report_warning('Unable to remove downloaded video file')
1025 def _make_archive_id(self, info_dict):
1026 # Future-proof against any change in case
1027 # and backwards compatibility with prior versions
1028 extractor = info_dict.get('extractor_key')
1029 if extractor is None:
1030 if 'id' in info_dict:
1031 extractor = info_dict.get('ie_key') # key in a playlist
1032 if extractor is None:
1033 return None # Incomplete video information
1034 return extractor.lower() + ' ' + info_dict['id']
1036 def in_download_archive(self, info_dict):
1037 fn = self.params.get('download_archive')
1041 vid_id = self._make_archive_id(info_dict)
1043 return False # Incomplete video information
1046 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1047 for line in archive_file:
1048 if line.strip() == vid_id:
1050 except IOError as ioe:
1051 if ioe.errno != errno.ENOENT:
1055 def record_download_archive(self, info_dict):
1056 fn = self.params.get('download_archive')
1059 vid_id = self._make_archive_id(info_dict)
1061 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1062 archive_file.write(vid_id + '\n')
1065 def format_resolution(format, default='unknown'):
1066 if format.get('vcodec') == 'none':
1068 if format.get('resolution') is not None:
1069 return format['resolution']
1070 if format.get('height') is not None:
1071 if format.get('width') is not None:
1072 res = '%sx%s' % (format['width'], format['height'])
1074 res = '%sp' % format['height']
1075 elif format.get('width') is not None:
1076 res = '?x%d' % format['width']
1081 def list_formats(self, info_dict):
1082 def format_note(fdict):
1084 if fdict.get('ext') in ['f4f', 'f4m']:
1085 res += '(unsupported) '
1086 if fdict.get('format_note') is not None:
1087 res += fdict['format_note'] + ' '
1088 if fdict.get('tbr') is not None:
1089 res += '%4dk ' % fdict['tbr']
1090 if (fdict.get('vcodec') is not None and
1091 fdict.get('vcodec') != 'none'):
1092 res += '%-5s' % fdict['vcodec']
1093 if fdict.get('vbr') is not None:
1095 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1097 if fdict.get('vbr') is not None:
1098 res += '%4dk' % fdict['vbr']
1099 if fdict.get('acodec') is not None:
1102 res += '%-5s' % fdict['acodec']
1103 elif fdict.get('abr') is not None:
1107 if fdict.get('abr') is not None:
1108 res += '@%3dk' % fdict['abr']
1109 if fdict.get('asr') is not None:
1110 res += ' (%5dHz)' % fdict['asr']
1111 if fdict.get('filesize') is not None:
1114 res += format_bytes(fdict['filesize'])
1117 def line(format, idlen=20):
1118 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1119 format['format_id'],
1121 self.format_resolution(format),
1122 format_note(format),
1125 formats = info_dict.get('formats', [info_dict])
1126 idlen = max(len('format code'),
1127 max(len(f['format_id']) for f in formats))
1128 formats_s = [line(f, idlen) for f in formats]
1129 if len(formats) > 1:
1130 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1131 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1133 header_line = line({
1134 'format_id': 'format code', 'ext': 'extension',
1135 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1136 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1137 (info_dict['id'], header_line, '\n'.join(formats_s)))
1139 def urlopen(self, req):
1140 """ Start an HTTP download """
1141 return self._opener.open(req)
1143 def print_debug_header(self):
1144 if not self.params.get('verbose'):
1146 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1148 sp = subprocess.Popen(
1149 ['git', 'rev-parse', '--short', 'HEAD'],
1150 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1151 cwd=os.path.dirname(os.path.abspath(__file__)))
1152 out, err = sp.communicate()
1153 out = out.decode().strip()
1154 if re.match('[0-9a-f]+', out):
1155 write_string('[debug] Git HEAD: ' + out + '\n')
1161 write_string('[debug] Python version %s - %s' %
1162 (platform.python_version(), platform_name()) + '\n')
1165 for handler in self._opener.handlers:
1166 if hasattr(handler, 'proxies'):
1167 proxy_map.update(handler.proxies)
1168 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1170 def _setup_opener(self):
1171 timeout_val = self.params.get('socket_timeout')
1172 timeout = 600 if timeout_val is None else float(timeout_val)
1174 opts_cookiefile = self.params.get('cookiefile')
1175 opts_proxy = self.params.get('proxy')
1177 if opts_cookiefile is None:
1178 self.cookiejar = compat_cookiejar.CookieJar()
1180 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1182 if os.access(opts_cookiefile, os.R_OK):
1183 self.cookiejar.load()
1185 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1187 if opts_proxy is not None:
1188 if opts_proxy == '':
1191 proxies = {'http': opts_proxy, 'https': opts_proxy}
1193 proxies = compat_urllib_request.getproxies()
1194 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1195 if 'http' in proxies and 'https' not in proxies:
1196 proxies['https'] = proxies['http']
1197 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1199 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1200 https_handler = make_HTTPS_handler(
1201 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1202 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1203 opener = compat_urllib_request.build_opener(
1204 https_handler, proxy_handler, cookie_processor, ydlh)
1205 # Delete the default user-agent header, which would otherwise apply in
1206 # cases where our custom HTTP handler doesn't come into play
1207 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1208 opener.addheaders = []
1209 self._opener = opener
1211 # TODO remove this global modification
1212 compat_urllib_request.install_opener(opener)
1213 socket.setdefaulttimeout(timeout)