2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
33 compat_urllib_request,
59 UnavailableVideoError,
67 from .cache import Cache
68 from .extractor import get_info_extractor, gen_extractors
69 from .downloader import get_suitable_downloader
70 from .downloader.rtmp import rtmpdump_version
71 from .postprocessor import (
76 from .version import __version__
79 class YoutubeDL(object):
82 YoutubeDL objects are the ones responsible of downloading the
83 actual video file and writing it to disk if the user has requested
84 it, among some other tasks. In most cases there should be one per
85 program. As, given a video URL, the downloader doesn't know how to
86 extract all the needed information, task that InfoExtractors do, it
87 has to pass the URL to one of them.
89 For this, YoutubeDL objects have a method that allows
90 InfoExtractors to be registered in a given order. When it is passed
91 a URL, the YoutubeDL object handles it to the first InfoExtractor it
92 finds that reports being able to handle it. The InfoExtractor extracts
93 all the information about the video or videos the URL refers to, and
94 YoutubeDL process the extracted information, possibly using a File
95 Downloader to download the video.
97 YoutubeDL objects accept a lot of parameters. In order not to saturate
98 the object constructor with arguments, it receives a dictionary of
99 options instead. These options are available through the params
100 attribute for the InfoExtractors to use. The YoutubeDL also
101 registers itself as the downloader in charge for the InfoExtractors
102 that are added to it, so this is a "mutual registration".
106 username: Username for authentication purposes.
107 password: Password for authentication purposes.
108 videopassword: Password for acces a video.
109 usenetrc: Use netrc for authentication instead.
110 verbose: Print additional info to stdout.
111 quiet: Do not print messages to stdout.
112 no_warnings: Do not print out anything for warnings.
113 forceurl: Force printing final URL.
114 forcetitle: Force printing title.
115 forceid: Force printing ID.
116 forcethumbnail: Force printing thumbnail URL.
117 forcedescription: Force printing description.
118 forcefilename: Force printing final filename.
119 forceduration: Force printing duration.
120 forcejson: Force printing info_dict as JSON.
121 dump_single_json: Force printing the info_dict of the whole playlist
122 (or video) as a single JSON line.
123 simulate: Do not download the video files.
124 format: Video format code. See options.py for more information.
125 format_limit: Highest quality format to try.
126 outtmpl: Template for output names.
127 restrictfilenames: Do not allow "&" and spaces in file names
128 ignoreerrors: Do not stop on download errors.
129 nooverwrites: Prevent overwriting files.
130 playliststart: Playlist item to start at.
131 playlistend: Playlist item to end at.
132 playlistreverse: Download playlist items in reverse order.
133 matchtitle: Download only matching titles.
134 rejecttitle: Reject downloads for matching titles.
135 logger: Log messages to a logging.Logger instance.
136 logtostderr: Log messages to stderr instead of stdout.
137 writedescription: Write the video description to a .description file
138 writeinfojson: Write the video description to a .info.json file
139 writeannotations: Write the video annotations to a .annotations.xml file
140 writethumbnail: Write the thumbnail image to a file
141 writesubtitles: Write the video subtitles to a file
142 writeautomaticsub: Write the automatic subtitles to a file
143 allsubtitles: Downloads all the subtitles of the video
144 (requires writesubtitles or writeautomaticsub)
145 listsubtitles: Lists all available subtitles for the video
146 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
147 subtitleslangs: List of languages of the subtitles to download
148 keepvideo: Keep the video file after post-processing
149 daterange: A DateRange object, download only if the upload_date is in the range.
150 skip_download: Skip the actual download of the video file
151 cachedir: Location of the cache files in the filesystem.
152 False to disable filesystem cache.
153 noplaylist: Download single video instead of a playlist if in doubt.
154 age_limit: An integer representing the user's age in years.
155 Unsuitable videos for the given age are skipped.
156 min_views: An integer representing the minimum view count the video
157 must have in order to not be skipped.
158 Videos without view count information are always
159 downloaded. None for no limit.
160 max_views: An integer representing the maximum view count.
161 Videos that are more popular than that are not
163 Videos without view count information are always
164 downloaded. None for no limit.
165 download_archive: File name of a file where all downloads are recorded.
166 Videos already present in the file are not downloaded
168 cookiefile: File name where cookies should be read from and dumped to.
169 nocheckcertificate:Do not verify SSL certificates
170 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
171 At the moment, this is only supported by YouTube.
172 proxy: URL of the proxy server to use
173 socket_timeout: Time to wait for unresponsive hosts, in seconds
174 bidi_workaround: Work around buggy terminals without bidirectional text
175 support, using fridibi
176 debug_printtraffic:Print out sent and received HTTP traffic
177 include_ads: Download ads as well
178 default_search: Prepend this string if an input url is not valid.
179 'auto' for elaborate guessing
180 encoding: Use this encoding instead of the system-specified.
181 extract_flat: Do not resolve URLs, return the immediate result.
182 Pass in 'in_playlist' to only show this behavior for
184 postprocessors: A list of dictionaries, each with an entry
185 * key: The name of the postprocessor. See
186 youtube_dl/postprocessor/__init__.py for a list.
187 as well as any further keyword arguments for the
189 progress_hooks: A list of functions that get called on download
190 progress, with a dictionary with the entries
191 * filename: The final filename
192 * status: One of "downloading" and "finished"
194 The dict may also have some of the following entries:
196 * downloaded_bytes: Bytes on disk
197 * total_bytes: Size of the whole file, None if unknown
198 * tmpfilename: The filename we're currently writing to
199 * eta: The estimated time in seconds, None if unknown
200 * speed: The download speed in bytes/second, None if
203 Progress hooks are guaranteed to be called at least once
204 (with status "finished") if the download is successful.
207 The following parameters are not used by YoutubeDL itself, they are used by
209 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
210 noresizebuffer, retries, continuedl, noprogress, consoletitle
212 The following options are used by the post processors:
213 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
214 otherwise prefer avconv.
215 exec_cmd: Arbitrary command to run after downloading
221 _download_retcode = None
222 _num_downloads = None
225 def __init__(self, params=None, auto_init=True):
226 """Create a FileDownloader object with the given options."""
230 self._ies_instances = {}
232 self._progress_hooks = []
233 self._download_retcode = 0
234 self._num_downloads = 0
235 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
236 self._err_file = sys.stderr
238 self.cache = Cache(self)
240 if params.get('bidi_workaround', False):
243 master, slave = pty.openpty()
244 width = get_term_width()
248 width_args = ['-w', str(width)]
250 stdin=subprocess.PIPE,
252 stderr=self._err_file)
254 self._output_process = subprocess.Popen(
255 ['bidiv'] + width_args, **sp_kwargs
258 self._output_process = subprocess.Popen(
259 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
260 self._output_channel = os.fdopen(master, 'rb')
261 except OSError as ose:
263 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
267 if (sys.version_info >= (3,) and sys.platform != 'win32' and
268 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
269 and not params.get('restrictfilenames', False)):
270 # On Python 3, the Unicode filesystem API will throw errors (#1474)
272 'Assuming --restrict-filenames since file system encoding '
273 'cannot encode all characters. '
274 'Set the LC_ALL environment variable to fix this.')
275 self.params['restrictfilenames'] = True
277 if '%(stitle)s' in self.params.get('outtmpl', ''):
278 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
283 self.print_debug_header()
284 self.add_default_info_extractors()
286 for pp_def_raw in self.params.get('postprocessors', []):
287 pp_class = get_postprocessor(pp_def_raw['key'])
288 pp_def = dict(pp_def_raw)
290 pp = pp_class(self, **compat_kwargs(pp_def))
291 self.add_post_processor(pp)
293 for ph in self.params.get('progress_hooks', []):
294 self.add_progress_hook(ph)
296 def warn_if_short_id(self, argv):
297 # short YouTube ID starting with dash?
299 i for i, a in enumerate(argv)
300 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
304 [a for i, a in enumerate(argv) if i not in idxs] +
305 ['--'] + [argv[i] for i in idxs]
308 'Long argument string detected. '
309 'Use -- to separate parameters and URLs, like this:\n%s\n' %
310 args_to_str(correct_argv))
312 def add_info_extractor(self, ie):
313 """Add an InfoExtractor object to the end of the list."""
315 self._ies_instances[ie.ie_key()] = ie
316 ie.set_downloader(self)
318 def get_info_extractor(self, ie_key):
320 Get an instance of an IE with name ie_key, it will try to get one from
321 the _ies list, if there's no instance it will create a new one and add
322 it to the extractor list.
324 ie = self._ies_instances.get(ie_key)
326 ie = get_info_extractor(ie_key)()
327 self.add_info_extractor(ie)
330 def add_default_info_extractors(self):
332 Add the InfoExtractors returned by gen_extractors to the end of the list
334 for ie in gen_extractors():
335 self.add_info_extractor(ie)
337 def add_post_processor(self, pp):
338 """Add a PostProcessor object to the end of the chain."""
340 pp.set_downloader(self)
342 def add_progress_hook(self, ph):
343 """Add the progress hook (currently only for the file downloader)"""
344 self._progress_hooks.append(ph)
346 def _bidi_workaround(self, message):
347 if not hasattr(self, '_output_channel'):
350 assert hasattr(self, '_output_process')
351 assert isinstance(message, compat_str)
352 line_count = message.count('\n') + 1
353 self._output_process.stdin.write((message + '\n').encode('utf-8'))
354 self._output_process.stdin.flush()
355 res = ''.join(self._output_channel.readline().decode('utf-8')
356 for _ in range(line_count))
357 return res[:-len('\n')]
359 def to_screen(self, message, skip_eol=False):
360 """Print message to stdout if not in quiet mode."""
361 return self.to_stdout(message, skip_eol, check_quiet=True)
363 def _write_string(self, s, out=None):
364 write_string(s, out=out, encoding=self.params.get('encoding'))
366 def to_stdout(self, message, skip_eol=False, check_quiet=False):
367 """Print message to stdout if not in quiet mode."""
368 if self.params.get('logger'):
369 self.params['logger'].debug(message)
370 elif not check_quiet or not self.params.get('quiet', False):
371 message = self._bidi_workaround(message)
372 terminator = ['\n', ''][skip_eol]
373 output = message + terminator
375 self._write_string(output, self._screen_file)
377 def to_stderr(self, message):
378 """Print message to stderr."""
379 assert isinstance(message, compat_str)
380 if self.params.get('logger'):
381 self.params['logger'].error(message)
383 message = self._bidi_workaround(message)
384 output = message + '\n'
385 self._write_string(output, self._err_file)
387 def to_console_title(self, message):
388 if not self.params.get('consoletitle', False):
390 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
391 # c_wchar_p() might not be necessary if `message` is
392 # already of type unicode()
393 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
394 elif 'TERM' in os.environ:
395 self._write_string('\033]0;%s\007' % message, self._screen_file)
397 def save_console_title(self):
398 if not self.params.get('consoletitle', False):
400 if 'TERM' in os.environ:
401 # Save the title on stack
402 self._write_string('\033[22;0t', self._screen_file)
404 def restore_console_title(self):
405 if not self.params.get('consoletitle', False):
407 if 'TERM' in os.environ:
408 # Restore the title from stack
409 self._write_string('\033[23;0t', self._screen_file)
412 self.save_console_title()
415 def __exit__(self, *args):
416 self.restore_console_title()
418 if self.params.get('cookiefile') is not None:
419 self.cookiejar.save()
421 def trouble(self, message=None, tb=None):
422 """Determine action to take when a download problem appears.
424 Depending on if the downloader has been configured to ignore
425 download errors or not, this method may throw an exception or
426 not when errors are found, after printing the message.
428 tb, if given, is additional traceback information.
430 if message is not None:
431 self.to_stderr(message)
432 if self.params.get('verbose'):
434 if sys.exc_info()[0]: # if .trouble has been called from an except block
436 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
437 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
438 tb += compat_str(traceback.format_exc())
440 tb_data = traceback.format_list(traceback.extract_stack())
441 tb = ''.join(tb_data)
443 if not self.params.get('ignoreerrors', False):
444 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
445 exc_info = sys.exc_info()[1].exc_info
447 exc_info = sys.exc_info()
448 raise DownloadError(message, exc_info)
449 self._download_retcode = 1
451 def report_warning(self, message):
453 Print the message to stderr, it will be prefixed with 'WARNING:'
454 If stderr is a tty file the 'WARNING:' will be colored
456 if self.params.get('logger') is not None:
457 self.params['logger'].warning(message)
459 if self.params.get('no_warnings'):
461 if self._err_file.isatty() and os.name != 'nt':
462 _msg_header = '\033[0;33mWARNING:\033[0m'
464 _msg_header = 'WARNING:'
465 warning_message = '%s %s' % (_msg_header, message)
466 self.to_stderr(warning_message)
468 def report_error(self, message, tb=None):
470 Do the same as trouble, but prefixes the message with 'ERROR:', colored
471 in red if stderr is a tty file.
473 if self._err_file.isatty() and os.name != 'nt':
474 _msg_header = '\033[0;31mERROR:\033[0m'
476 _msg_header = 'ERROR:'
477 error_message = '%s %s' % (_msg_header, message)
478 self.trouble(error_message, tb)
480 def report_file_already_downloaded(self, file_name):
481 """Report file has already been fully downloaded."""
483 self.to_screen('[download] %s has already been downloaded' % file_name)
484 except UnicodeEncodeError:
485 self.to_screen('[download] The file has already been downloaded')
487 def prepare_filename(self, info_dict):
488 """Generate the output filename."""
490 template_dict = dict(info_dict)
492 template_dict['epoch'] = int(time.time())
493 autonumber_size = self.params.get('autonumber_size')
494 if autonumber_size is None:
496 autonumber_templ = '%0' + str(autonumber_size) + 'd'
497 template_dict['autonumber'] = autonumber_templ % self._num_downloads
498 if template_dict.get('playlist_index') is not None:
499 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
500 if template_dict.get('resolution') is None:
501 if template_dict.get('width') and template_dict.get('height'):
502 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
503 elif template_dict.get('height'):
504 template_dict['resolution'] = '%sp' % template_dict['height']
505 elif template_dict.get('width'):
506 template_dict['resolution'] = '?x%d' % template_dict['width']
508 sanitize = lambda k, v: sanitize_filename(
510 restricted=self.params.get('restrictfilenames'),
512 template_dict = dict((k, sanitize(k, v))
513 for k, v in template_dict.items()
515 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
517 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
518 tmpl = compat_expanduser(outtmpl)
519 filename = tmpl % template_dict
521 except ValueError as err:
522 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
525 def _match_entry(self, info_dict):
526 """ Returns None iff the file should be downloaded """
528 video_title = info_dict.get('title', info_dict.get('id', 'video'))
529 if 'title' in info_dict:
530 # This can happen when we're just evaluating the playlist
531 title = info_dict['title']
532 matchtitle = self.params.get('matchtitle', False)
534 if not re.search(matchtitle, title, re.IGNORECASE):
535 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
536 rejecttitle = self.params.get('rejecttitle', False)
538 if re.search(rejecttitle, title, re.IGNORECASE):
539 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
540 date = info_dict.get('upload_date', None)
542 dateRange = self.params.get('daterange', DateRange())
543 if date not in dateRange:
544 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
545 view_count = info_dict.get('view_count', None)
546 if view_count is not None:
547 min_views = self.params.get('min_views')
548 if min_views is not None and view_count < min_views:
549 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
550 max_views = self.params.get('max_views')
551 if max_views is not None and view_count > max_views:
552 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
553 age_limit = self.params.get('age_limit')
554 if age_limit is not None:
555 actual_age_limit = info_dict.get('age_limit')
556 if actual_age_limit is None:
558 if age_limit < actual_age_limit:
559 return 'Skipping "' + title + '" because it is age restricted'
560 if self.in_download_archive(info_dict):
561 return '%s has already been recorded in archive' % video_title
565 def add_extra_info(info_dict, extra_info):
566 '''Set the keys from extra_info in info dict if they are missing'''
567 for key, value in extra_info.items():
568 info_dict.setdefault(key, value)
570 def extract_info(self, url, download=True, ie_key=None, extra_info={},
573 Returns a list with a dictionary for each video we find.
574 If 'download', also downloads the videos.
575 extra_info is a dict containing the extra values to add to each result
579 ies = [self.get_info_extractor(ie_key)]
584 if not ie.suitable(url):
588 self.report_warning('The program functionality for this site has been marked as broken, '
589 'and will probably not work.')
592 ie_result = ie.extract(url)
593 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
595 if isinstance(ie_result, list):
596 # Backwards compatibility: old IE result format
598 '_type': 'compat_list',
599 'entries': ie_result,
601 self.add_default_extra_info(ie_result, ie, url)
603 return self.process_ie_result(ie_result, download, extra_info)
606 except ExtractorError as de: # An error we somewhat expected
607 self.report_error(compat_str(de), de.format_traceback())
609 except MaxDownloadsReached:
611 except Exception as e:
612 if self.params.get('ignoreerrors', False):
613 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
618 self.report_error('no suitable InfoExtractor for URL %s' % url)
620 def add_default_extra_info(self, ie_result, ie, url):
621 self.add_extra_info(ie_result, {
622 'extractor': ie.IE_NAME,
624 'webpage_url_basename': url_basename(url),
625 'extractor_key': ie.ie_key(),
628 def process_ie_result(self, ie_result, download=True, extra_info={}):
630 Take the result of the ie(may be modified) and resolve all unresolved
631 references (URLs, playlist items).
633 It will also download the videos if 'download'.
634 Returns the resolved ie_result.
637 result_type = ie_result.get('_type', 'video')
639 if result_type in ('url', 'url_transparent'):
640 extract_flat = self.params.get('extract_flat', False)
641 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
642 extract_flat is True):
643 if self.params.get('forcejson', False):
644 self.to_stdout(json.dumps(ie_result))
647 if result_type == 'video':
648 self.add_extra_info(ie_result, extra_info)
649 return self.process_video_result(ie_result, download=download)
650 elif result_type == 'url':
651 # We have to add extra_info to the results because it may be
652 # contained in a playlist
653 return self.extract_info(ie_result['url'],
655 ie_key=ie_result.get('ie_key'),
656 extra_info=extra_info)
657 elif result_type == 'url_transparent':
658 # Use the information from the embedding page
659 info = self.extract_info(
660 ie_result['url'], ie_key=ie_result.get('ie_key'),
661 extra_info=extra_info, download=False, process=False)
663 force_properties = dict(
664 (k, v) for k, v in ie_result.items() if v is not None)
665 for f in ('_type', 'url'):
666 if f in force_properties:
667 del force_properties[f]
668 new_result = info.copy()
669 new_result.update(force_properties)
671 assert new_result.get('_type') != 'url_transparent'
673 return self.process_ie_result(
674 new_result, download=download, extra_info=extra_info)
675 elif result_type == 'playlist' or result_type == 'multi_video':
676 # We process each entry in the playlist
677 playlist = ie_result.get('title', None) or ie_result.get('id', None)
678 self.to_screen('[download] Downloading playlist: %s' % playlist)
680 playlist_results = []
682 playliststart = self.params.get('playliststart', 1) - 1
683 playlistend = self.params.get('playlistend', None)
684 # For backwards compatibility, interpret -1 as whole list
685 if playlistend == -1:
688 ie_entries = ie_result['entries']
689 if isinstance(ie_entries, list):
690 n_all_entries = len(ie_entries)
691 entries = ie_entries[playliststart:playlistend]
692 n_entries = len(entries)
694 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
695 (ie_result['extractor'], playlist, n_all_entries, n_entries))
696 elif isinstance(ie_entries, PagedList):
697 entries = ie_entries.getslice(
698 playliststart, playlistend)
699 n_entries = len(entries)
701 "[%s] playlist %s: Downloading %d videos" %
702 (ie_result['extractor'], playlist, n_entries))
704 entries = list(itertools.islice(
705 ie_entries, playliststart, playlistend))
706 n_entries = len(entries)
708 "[%s] playlist %s: Downloading %d videos" %
709 (ie_result['extractor'], playlist, n_entries))
711 if self.params.get('playlistreverse', False):
712 entries = entries[::-1]
714 for i, entry in enumerate(entries, 1):
715 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
717 'n_entries': n_entries,
718 'playlist': playlist,
719 'playlist_id': ie_result.get('id'),
720 'playlist_title': ie_result.get('title'),
721 'playlist_index': i + playliststart,
722 'extractor': ie_result['extractor'],
723 'webpage_url': ie_result['webpage_url'],
724 'webpage_url_basename': url_basename(ie_result['webpage_url']),
725 'extractor_key': ie_result['extractor_key'],
728 reason = self._match_entry(entry)
729 if reason is not None:
730 self.to_screen('[download] ' + reason)
733 entry_result = self.process_ie_result(entry,
736 playlist_results.append(entry_result)
737 ie_result['entries'] = playlist_results
739 elif result_type == 'compat_list':
741 'Extractor %s returned a compat_list result. '
742 'It needs to be updated.' % ie_result.get('extractor'))
748 'extractor': ie_result['extractor'],
749 'webpage_url': ie_result['webpage_url'],
750 'webpage_url_basename': url_basename(ie_result['webpage_url']),
751 'extractor_key': ie_result['extractor_key'],
755 ie_result['entries'] = [
756 self.process_ie_result(_fixup(r), download, extra_info)
757 for r in ie_result['entries']
761 raise Exception('Invalid result type: %s' % result_type)
763 def select_format(self, format_spec, available_formats):
764 if format_spec == 'best' or format_spec is None:
765 return available_formats[-1]
766 elif format_spec == 'worst':
767 return available_formats[0]
768 elif format_spec == 'bestaudio':
770 f for f in available_formats
771 if f.get('vcodec') == 'none']
773 return audio_formats[-1]
774 elif format_spec == 'worstaudio':
776 f for f in available_formats
777 if f.get('vcodec') == 'none']
779 return audio_formats[0]
780 elif format_spec == 'bestvideo':
782 f for f in available_formats
783 if f.get('acodec') == 'none']
785 return video_formats[-1]
786 elif format_spec == 'worstvideo':
788 f for f in available_formats
789 if f.get('acodec') == 'none']
791 return video_formats[0]
793 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
794 if format_spec in extensions:
795 filter_f = lambda f: f['ext'] == format_spec
797 filter_f = lambda f: f['format_id'] == format_spec
798 matches = list(filter(filter_f, available_formats))
803 def process_video_result(self, info_dict, download=True):
804 assert info_dict.get('_type', 'video') == 'video'
806 if 'id' not in info_dict:
807 raise ExtractorError('Missing "id" field in extractor result')
808 if 'title' not in info_dict:
809 raise ExtractorError('Missing "title" field in extractor result')
811 if 'playlist' not in info_dict:
812 # It isn't part of a playlist
813 info_dict['playlist'] = None
814 info_dict['playlist_index'] = None
816 thumbnails = info_dict.get('thumbnails')
818 thumbnails.sort(key=lambda t: (
819 t.get('width'), t.get('height'), t.get('url')))
821 if 'width' in t and 'height' in t:
822 t['resolution'] = '%dx%d' % (t['width'], t['height'])
824 if thumbnails and 'thumbnail' not in info_dict:
825 info_dict['thumbnail'] = thumbnails[-1]['url']
827 if 'display_id' not in info_dict and 'id' in info_dict:
828 info_dict['display_id'] = info_dict['id']
830 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
831 # Working around negative timestamps in Windows
832 # (see http://bugs.python.org/issue1646728)
833 if info_dict['timestamp'] < 0 and os.name == 'nt':
834 info_dict['timestamp'] = 0
835 upload_date = datetime.datetime.utcfromtimestamp(
836 info_dict['timestamp'])
837 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
839 # This extractors handle format selection themselves
840 if info_dict['extractor'] in ['Youku']:
842 self.process_info(info_dict)
845 # We now pick which formats have to be downloaded
846 if info_dict.get('formats') is None:
847 # There's only one format available
848 formats = [info_dict]
850 formats = info_dict['formats']
853 raise ExtractorError('No video formats found!')
855 # We check that all the formats have the format and format_id fields
856 for i, format in enumerate(formats):
857 if 'url' not in format:
858 raise ExtractorError('Missing "url" key in result (index %d)' % i)
860 if format.get('format_id') is None:
861 format['format_id'] = compat_str(i)
862 if format.get('format') is None:
863 format['format'] = '{id} - {res}{note}'.format(
864 id=format['format_id'],
865 res=self.format_resolution(format),
866 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
868 # Automatically determine file extension if missing
869 if 'ext' not in format:
870 format['ext'] = determine_ext(format['url']).lower()
872 format_limit = self.params.get('format_limit', None)
874 formats = list(takewhile_inclusive(
875 lambda f: f['format_id'] != format_limit, formats
878 # TODO Central sorting goes here
880 if formats[0] is not info_dict:
881 # only set the 'formats' fields if the original info_dict list them
882 # otherwise we end up with a circular reference, the first (and unique)
883 # element in the 'formats' field in info_dict is info_dict itself,
884 # wich can't be exported to json
885 info_dict['formats'] = formats
886 if self.params.get('listformats', None):
887 self.list_formats(info_dict)
890 req_format = self.params.get('format')
891 if req_format is None:
893 formats_to_download = []
894 # The -1 is for supporting YoutubeIE
895 if req_format in ('-1', 'all'):
896 formats_to_download = formats
898 for rfstr in req_format.split(','):
899 # We can accept formats requested in the format: 34/5/best, we pick
900 # the first that is available, starting from left
901 req_formats = rfstr.split('/')
902 for rf in req_formats:
903 if re.match(r'.+?\+.+?', rf) is not None:
904 # Two formats have been requested like '137+139'
905 format_1, format_2 = rf.split('+')
906 formats_info = (self.select_format(format_1, formats),
907 self.select_format(format_2, formats))
908 if all(formats_info):
909 # The first format must contain the video and the
911 if formats_info[0].get('vcodec') == 'none':
912 self.report_error('The first format must '
913 'contain the video, try using '
914 '"-f %s+%s"' % (format_2, format_1))
917 'requested_formats': formats_info,
919 'ext': formats_info[0]['ext'],
922 selected_format = None
924 selected_format = self.select_format(rf, formats)
925 if selected_format is not None:
926 formats_to_download.append(selected_format)
928 if not formats_to_download:
929 raise ExtractorError('requested format not available',
933 if len(formats_to_download) > 1:
934 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
935 for format in formats_to_download:
936 new_info = dict(info_dict)
937 new_info.update(format)
938 self.process_info(new_info)
939 # We update the info dict with the best quality format (backwards compatibility)
940 info_dict.update(formats_to_download[-1])
943 def process_info(self, info_dict):
944 """Process a single resolved IE result."""
946 assert info_dict.get('_type', 'video') == 'video'
948 max_downloads = self.params.get('max_downloads')
949 if max_downloads is not None:
950 if self._num_downloads >= int(max_downloads):
951 raise MaxDownloadsReached()
953 info_dict['fulltitle'] = info_dict['title']
954 if len(info_dict['title']) > 200:
955 info_dict['title'] = info_dict['title'][:197] + '...'
957 # Keep for backwards compatibility
958 info_dict['stitle'] = info_dict['title']
960 if 'format' not in info_dict:
961 info_dict['format'] = info_dict['ext']
963 reason = self._match_entry(info_dict)
964 if reason is not None:
965 self.to_screen('[download] ' + reason)
968 self._num_downloads += 1
970 filename = self.prepare_filename(info_dict)
973 if self.params.get('forcetitle', False):
974 self.to_stdout(info_dict['fulltitle'])
975 if self.params.get('forceid', False):
976 self.to_stdout(info_dict['id'])
977 if self.params.get('forceurl', False):
978 if info_dict.get('requested_formats') is not None:
979 for f in info_dict['requested_formats']:
980 self.to_stdout(f['url'] + f.get('play_path', ''))
982 # For RTMP URLs, also include the playpath
983 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
984 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
985 self.to_stdout(info_dict['thumbnail'])
986 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
987 self.to_stdout(info_dict['description'])
988 if self.params.get('forcefilename', False) and filename is not None:
989 self.to_stdout(filename)
990 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
991 self.to_stdout(formatSeconds(info_dict['duration']))
992 if self.params.get('forceformat', False):
993 self.to_stdout(info_dict['format'])
994 if self.params.get('forcejson', False):
995 info_dict['_filename'] = filename
996 self.to_stdout(json.dumps(info_dict))
997 if self.params.get('dump_single_json', False):
998 info_dict['_filename'] = filename
1000 # Do nothing else if in simulate mode
1001 if self.params.get('simulate', False):
1004 if filename is None:
1008 dn = os.path.dirname(encodeFilename(filename))
1009 if dn and not os.path.exists(dn):
1011 except (OSError, IOError) as err:
1012 self.report_error('unable to create directory ' + compat_str(err))
1015 if self.params.get('writedescription', False):
1016 descfn = filename + '.description'
1017 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1018 self.to_screen('[info] Video description is already present')
1019 elif info_dict.get('description') is None:
1020 self.report_warning('There\'s no description to write.')
1023 self.to_screen('[info] Writing video description to: ' + descfn)
1024 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1025 descfile.write(info_dict['description'])
1026 except (OSError, IOError):
1027 self.report_error('Cannot write description file ' + descfn)
1030 if self.params.get('writeannotations', False):
1031 annofn = filename + '.annotations.xml'
1032 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1033 self.to_screen('[info] Video annotations are already present')
1036 self.to_screen('[info] Writing video annotations to: ' + annofn)
1037 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1038 annofile.write(info_dict['annotations'])
1039 except (KeyError, TypeError):
1040 self.report_warning('There are no annotations to write.')
1041 except (OSError, IOError):
1042 self.report_error('Cannot write annotations file: ' + annofn)
1045 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1046 self.params.get('writeautomaticsub')])
1048 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1049 # subtitles download errors are already managed as troubles in relevant IE
1050 # that way it will silently go on when used with unsupporting IE
1051 subtitles = info_dict['subtitles']
1052 sub_format = self.params.get('subtitlesformat', 'srt')
1053 for sub_lang in subtitles.keys():
1054 sub = subtitles[sub_lang]
1058 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1059 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1060 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1062 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1063 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1065 except (OSError, IOError):
1066 self.report_error('Cannot write subtitles file ' + sub_filename)
1069 if self.params.get('writeinfojson', False):
1070 infofn = os.path.splitext(filename)[0] + '.info.json'
1071 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1072 self.to_screen('[info] Video description metadata is already present')
1074 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1076 write_json_file(info_dict, infofn)
1077 except (OSError, IOError):
1078 self.report_error('Cannot write metadata to JSON file ' + infofn)
1081 if self.params.get('writethumbnail', False):
1082 if info_dict.get('thumbnail') is not None:
1083 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1084 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1085 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1086 self.to_screen('[%s] %s: Thumbnail is already present' %
1087 (info_dict['extractor'], info_dict['id']))
1089 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1090 (info_dict['extractor'], info_dict['id']))
1092 uf = self.urlopen(info_dict['thumbnail'])
1093 with open(thumb_filename, 'wb') as thumbf:
1094 shutil.copyfileobj(uf, thumbf)
1095 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1096 (info_dict['extractor'], info_dict['id'], thumb_filename))
1097 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1098 self.report_warning('Unable to download thumbnail "%s": %s' %
1099 (info_dict['thumbnail'], compat_str(err)))
1101 if not self.params.get('skip_download', False):
1102 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1107 fd = get_suitable_downloader(info)(self, self.params)
1108 for ph in self._progress_hooks:
1109 fd.add_progress_hook(ph)
1110 if self.params.get('verbose'):
1111 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1112 return fd.download(name, info)
1113 if info_dict.get('requested_formats') is not None:
1116 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1117 if not merger._executable:
1119 self.report_warning('You have requested multiple '
1120 'formats but ffmpeg or avconv are not installed.'
1121 ' The formats won\'t be merged')
1123 postprocessors = [merger]
1124 for f in info_dict['requested_formats']:
1125 new_info = dict(info_dict)
1127 fname = self.prepare_filename(new_info)
1128 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1129 downloaded.append(fname)
1130 partial_success = dl(fname, new_info)
1131 success = success and partial_success
1132 info_dict['__postprocessors'] = postprocessors
1133 info_dict['__files_to_merge'] = downloaded
1135 # Just a single file
1136 success = dl(filename, info_dict)
1137 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1138 self.report_error('unable to download video data: %s' % str(err))
1140 except (OSError, IOError) as err:
1141 raise UnavailableVideoError(err)
1142 except (ContentTooShortError, ) as err:
1143 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1148 self.post_process(filename, info_dict)
1149 except (PostProcessingError) as err:
1150 self.report_error('postprocessing: %s' % str(err))
1152 self.record_download_archive(info_dict)
1154 def download(self, url_list):
1155 """Download a given list of URLs."""
1156 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1157 if (len(url_list) > 1 and
1159 and self.params.get('max_downloads') != 1):
1160 raise SameFileError(outtmpl)
1162 for url in url_list:
1164 # It also downloads the videos
1165 res = self.extract_info(url)
1166 except UnavailableVideoError:
1167 self.report_error('unable to download video')
1168 except MaxDownloadsReached:
1169 self.to_screen('[info] Maximum number of downloaded files reached.')
1172 if self.params.get('dump_single_json', False):
1173 self.to_stdout(json.dumps(res))
1175 return self._download_retcode
1177 def download_with_info_file(self, info_filename):
1178 with io.open(info_filename, 'r', encoding='utf-8') as f:
1181 self.process_ie_result(info, download=True)
1182 except DownloadError:
1183 webpage_url = info.get('webpage_url')
1184 if webpage_url is not None:
1185 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1186 return self.download([webpage_url])
1189 return self._download_retcode
1191 def post_process(self, filename, ie_info):
1192 """Run all the postprocessors on the given file."""
1193 info = dict(ie_info)
1194 info['filepath'] = filename
1197 if ie_info.get('__postprocessors') is not None:
1198 pps_chain.extend(ie_info['__postprocessors'])
1199 pps_chain.extend(self._pps)
1200 for pp in pps_chain:
1202 keep_video_wish, new_info = pp.run(info)
1203 if keep_video_wish is not None:
1205 keep_video = keep_video_wish
1206 elif keep_video is None:
1207 # No clear decision yet, let IE decide
1208 keep_video = keep_video_wish
1209 except PostProcessingError as e:
1210 self.report_error(e.msg)
1211 if keep_video is False and not self.params.get('keepvideo', False):
1213 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1214 os.remove(encodeFilename(filename))
1215 except (IOError, OSError):
1216 self.report_warning('Unable to remove downloaded video file')
1218 def _make_archive_id(self, info_dict):
1219 # Future-proof against any change in case
1220 # and backwards compatibility with prior versions
1221 extractor = info_dict.get('extractor_key')
1222 if extractor is None:
1223 if 'id' in info_dict:
1224 extractor = info_dict.get('ie_key') # key in a playlist
1225 if extractor is None:
1226 return None # Incomplete video information
1227 return extractor.lower() + ' ' + info_dict['id']
1229 def in_download_archive(self, info_dict):
1230 fn = self.params.get('download_archive')
1234 vid_id = self._make_archive_id(info_dict)
1236 return False # Incomplete video information
1239 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1240 for line in archive_file:
1241 if line.strip() == vid_id:
1243 except IOError as ioe:
1244 if ioe.errno != errno.ENOENT:
1248 def record_download_archive(self, info_dict):
1249 fn = self.params.get('download_archive')
1252 vid_id = self._make_archive_id(info_dict)
1254 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1255 archive_file.write(vid_id + '\n')
1258 def format_resolution(format, default='unknown'):
1259 if format.get('vcodec') == 'none':
1261 if format.get('resolution') is not None:
1262 return format['resolution']
1263 if format.get('height') is not None:
1264 if format.get('width') is not None:
1265 res = '%sx%s' % (format['width'], format['height'])
1267 res = '%sp' % format['height']
1268 elif format.get('width') is not None:
1269 res = '?x%d' % format['width']
1274 def _format_note(self, fdict):
1276 if fdict.get('ext') in ['f4f', 'f4m']:
1277 res += '(unsupported) '
1278 if fdict.get('format_note') is not None:
1279 res += fdict['format_note'] + ' '
1280 if fdict.get('tbr') is not None:
1281 res += '%4dk ' % fdict['tbr']
1282 if fdict.get('container') is not None:
1285 res += '%s container' % fdict['container']
1286 if (fdict.get('vcodec') is not None and
1287 fdict.get('vcodec') != 'none'):
1290 res += fdict['vcodec']
1291 if fdict.get('vbr') is not None:
1293 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1295 if fdict.get('vbr') is not None:
1296 res += '%4dk' % fdict['vbr']
1297 if fdict.get('fps') is not None:
1298 res += ', %sfps' % fdict['fps']
1299 if fdict.get('acodec') is not None:
1302 if fdict['acodec'] == 'none':
1305 res += '%-5s' % fdict['acodec']
1306 elif fdict.get('abr') is not None:
1310 if fdict.get('abr') is not None:
1311 res += '@%3dk' % fdict['abr']
1312 if fdict.get('asr') is not None:
1313 res += ' (%5dHz)' % fdict['asr']
1314 if fdict.get('filesize') is not None:
1317 res += format_bytes(fdict['filesize'])
1318 elif fdict.get('filesize_approx') is not None:
1321 res += '~' + format_bytes(fdict['filesize_approx'])
1324 def list_formats(self, info_dict):
1325 def line(format, idlen=20):
1326 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1327 format['format_id'],
1329 self.format_resolution(format),
1330 self._format_note(format),
1333 formats = info_dict.get('formats', [info_dict])
1334 idlen = max(len('format code'),
1335 max(len(f['format_id']) for f in formats))
1337 line(f, idlen) for f in formats
1338 if f.get('preference') is None or f['preference'] >= -1000]
1339 if len(formats) > 1:
1340 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1341 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1343 header_line = line({
1344 'format_id': 'format code', 'ext': 'extension',
1345 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1346 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1347 (info_dict['id'], header_line, '\n'.join(formats_s)))
1349 def urlopen(self, req):
1350 """ Start an HTTP download """
1352 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1353 # always respected by websites, some tend to give out URLs with non percent-encoded
1354 # non-ASCII characters (see telemb.py, ard.py [#3412])
1355 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1356 # To work around aforementioned issue we will replace request's original URL with
1357 # percent-encoded one
1358 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1359 url = req if req_is_string else req.get_full_url()
1360 url_escaped = escape_url(url)
1362 # Substitute URL if any change after escaping
1363 if url != url_escaped:
1367 req = compat_urllib_request.Request(
1368 url_escaped, data=req.data, headers=req.headers,
1369 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1371 return self._opener.open(req, timeout=self._socket_timeout)
1373 def print_debug_header(self):
1374 if not self.params.get('verbose'):
1377 if type('') is not compat_str:
1378 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1379 self.report_warning(
1380 'Your Python is broken! Update to a newer and supported version')
1382 stdout_encoding = getattr(
1383 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1385 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1386 locale.getpreferredencoding(),
1387 sys.getfilesystemencoding(),
1389 self.get_encoding()))
1390 write_string(encoding_str, encoding=None)
1392 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1394 sp = subprocess.Popen(
1395 ['git', 'rev-parse', '--short', 'HEAD'],
1396 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1397 cwd=os.path.dirname(os.path.abspath(__file__)))
1398 out, err = sp.communicate()
1399 out = out.decode().strip()
1400 if re.match('[0-9a-f]+', out):
1401 self._write_string('[debug] Git HEAD: ' + out + '\n')
1407 self._write_string('[debug] Python version %s - %s\n' % (
1408 platform.python_version(), platform_name()))
1410 exe_versions = FFmpegPostProcessor.get_versions()
1411 exe_versions['rtmpdump'] = rtmpdump_version()
1412 exe_str = ', '.join(
1414 for exe, v in sorted(exe_versions.items())
1419 self._write_string('[debug] exe versions: %s\n' % exe_str)
1422 for handler in self._opener.handlers:
1423 if hasattr(handler, 'proxies'):
1424 proxy_map.update(handler.proxies)
1425 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1427 def _setup_opener(self):
1428 timeout_val = self.params.get('socket_timeout')
1429 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1431 opts_cookiefile = self.params.get('cookiefile')
1432 opts_proxy = self.params.get('proxy')
1434 if opts_cookiefile is None:
1435 self.cookiejar = compat_cookiejar.CookieJar()
1437 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1439 if os.access(opts_cookiefile, os.R_OK):
1440 self.cookiejar.load()
1442 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1444 if opts_proxy is not None:
1445 if opts_proxy == '':
1448 proxies = {'http': opts_proxy, 'https': opts_proxy}
1450 proxies = compat_urllib_request.getproxies()
1451 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1452 if 'http' in proxies and 'https' not in proxies:
1453 proxies['https'] = proxies['http']
1454 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1456 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1457 https_handler = make_HTTPS_handler(
1458 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1459 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1460 opener = compat_urllib_request.build_opener(
1461 https_handler, proxy_handler, cookie_processor, ydlh)
1462 # Delete the default user-agent header, which would otherwise apply in
1463 # cases where our custom HTTP handler doesn't come into play
1464 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1465 opener.addheaders = []
1466 self._opener = opener
1468 def encode(self, s):
1469 if isinstance(s, bytes):
1470 return s # Already encoded
1473 return s.encode(self.get_encoding())
1474 except UnicodeEncodeError as err:
1475 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1478 def get_encoding(self):
1479 encoding = self.params.get('encoding')
1480 if encoding is None:
1481 encoding = preferredencoding()