youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DownloadError,
  55     encode_compat_str,
  56     encodeFilename,
  57     error_to_compat_str,
  58     expand_path,
  59     ExtractorError,
  60     format_bytes,
  61     formatSeconds,
  62     GeoRestrictedError,
  63     int_or_none,
  64     ISO3166Utils,
  65     locked_file,
  66     make_HTTPS_handler,
  67     MaxDownloadsReached,
  68     orderedSet,
  69     PagedList,
  70     parse_filesize,
  71     PerRequestProxyHandler,
  72     platform_name,
  73     PostProcessingError,
  74     preferredencoding,
  75     prepend_extension,
  76     register_socks_protocols,
  77     render_table,
  78     replace_extension,
  79     SameFileError,
  80     sanitize_filename,
  81     sanitize_path,
  82     sanitize_url,
  83     sanitized_Request,
  84     std_headers,
  85     str_or_none,
  86     subtitles_filename,
  87     UnavailableVideoError,
  88     url_basename,
  89     version_tuple,
  90     write_json_file,
  91     write_string,
  92     YoutubeDLCookieJar,
  93     YoutubeDLCookieProcessor,
  94     YoutubeDLHandler,
  95     YoutubeDLRedirectHandler,
  96 )
  97 from .cache import Cache
  98 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  99 from .extractor.openload import PhantomJSwrapper
 100 from .downloader import get_suitable_downloader
 101 from .downloader.rtmp import rtmpdump_version
 102 from .postprocessor import (
 103     FFmpegFixupM3u8PP,
 104     FFmpegFixupM4aPP,
 105     FFmpegFixupStretchedPP,
 106     FFmpegMergerPP,
 107     FFmpegPostProcessor,
 108     get_postprocessor,
 109 )
 110 from .version import __version__
 111
 112 if compat_os_name == 'nt':
 113     import ctypes
 114
 115
 116 class YoutubeDL(object):
 117     """YoutubeDL class.
 118
 119     YoutubeDL objects are the ones responsible of downloading the
 120     actual video file and writing it to disk if the user has requested
 121     it, among some other tasks. In most cases there should be one per
 122     program. As, given a video URL, the downloader doesn't know how to
 123     extract all the needed information, task that InfoExtractors do, it
 124     has to pass the URL to one of them.
 125
 126     For this, YoutubeDL objects have a method that allows
 127     InfoExtractors to be registered in a given order. When it is passed
 128     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 129     finds that reports being able to handle it. The InfoExtractor extracts
 130     all the information about the video or videos the URL refers to, and
 131     YoutubeDL process the extracted information, possibly using a File
 132     Downloader to download the video.
 133
 134     YoutubeDL objects accept a lot of parameters. In order not to saturate
 135     the object constructor with arguments, it receives a dictionary of
 136     options instead. These options are available through the params
 137     attribute for the InfoExtractors to use. The YoutubeDL also
 138     registers itself as the downloader in charge for the InfoExtractors
 139     that are added to it, so this is a "mutual registration".
 140
 141     Available options:
 142
 143     username:          Username for authentication purposes.
 144     password:          Password for authentication purposes.
 145     videopassword:     Password for accessing a video.
 146     ap_mso:            Adobe Pass multiple-system operator identifier.
 147     ap_username:       Multiple-system operator account username.
 148     ap_password:       Multiple-system operator account password.
 149     usenetrc:          Use netrc for authentication instead.
 150     verbose:           Print additional info to stdout.
 151     quiet:             Do not print messages to stdout.
 152     no_warnings:       Do not print out anything for warnings.
 153     forceurl:          Force printing final URL.
 154     forcetitle:        Force printing title.
 155     forceid:           Force printing ID.
 156     forcethumbnail:    Force printing thumbnail URL.
 157     forcedescription:  Force printing description.
 158     forcefilename:     Force printing final filename.
 159     forceduration:     Force printing duration.
 160     forcejson:         Force printing info_dict as JSON.
 161     dump_single_json:  Force printing the info_dict of the whole playlist
 162                        (or video) as a single JSON line.
 163     simulate:          Do not download the video files.
 164     format:            Video format code. See options.py for more information.
 165     outtmpl:           Template for output names.
 166     restrictfilenames: Do not allow "&" and spaces in file names
 167     ignoreerrors:      Do not stop on download errors.
 168     force_generic_extractor: Force downloader to use the generic extractor
 169     nooverwrites:      Prevent overwriting files.
 170     playliststart:     Playlist item to start at.
 171     playlistend:       Playlist item to end at.
 172     playlist_items:    Specific indices of playlist to download.
 173     playlistreverse:   Download playlist items in reverse order.
 174     playlistrandom:    Download playlist items in random order.
 175     matchtitle:        Download only matching titles.
 176     rejecttitle:       Reject downloads for matching titles.
 177     logger:            Log messages to a logging.Logger instance.
 178     logtostderr:       Log messages to stderr instead of stdout.
 179     writedescription:  Write the video description to a .description file
 180     writeinfojson:     Write the video description to a .info.json file
 181     writeannotations:  Write the video annotations to a .annotations.xml file
 182     writethumbnail:    Write the thumbnail image to a file
 183     write_all_thumbnails:  Write all thumbnail formats to files
 184     writesubtitles:    Write the video subtitles to a file
 185     writeautomaticsub: Write the automatically generated subtitles to a file
 186     allsubtitles:      Downloads all the subtitles of the video
 187                        (requires writesubtitles or writeautomaticsub)
 188     listsubtitles:     Lists all available subtitles for the video
 189     subtitlesformat:   The format code for subtitles
 190     subtitleslangs:    List of languages of the subtitles to download
 191     keepvideo:         Keep the video file after post-processing
 192     daterange:         A DateRange object, download only if the upload_date is in the range.
 193     skip_download:     Skip the actual download of the video file
 194     cachedir:          Location of the cache files in the filesystem.
 195                        False to disable filesystem cache.
 196     noplaylist:        Download single video instead of a playlist if in doubt.
 197     age_limit:         An integer representing the user's age in years.
 198                        Unsuitable videos for the given age are skipped.
 199     min_views:         An integer representing the minimum view count the video
 200                        must have in order to not be skipped.
 201                        Videos without view count information are always
 202                        downloaded. None for no limit.
 203     max_views:         An integer representing the maximum view count.
 204                        Videos that are more popular than that are not
 205                        downloaded.
 206                        Videos without view count information are always
 207                        downloaded. None for no limit.
 208     download_archive:  File name of a file where all downloads are recorded.
 209                        Videos already present in the file are not downloaded
 210                        again.
 211     cookiefile:        File name where cookies should be read from and dumped to.
 212     nocheckcertificate:Do not verify SSL certificates
 213     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 214                        At the moment, this is only supported by YouTube.
 215     proxy:             URL of the proxy server to use
 216     geo_verification_proxy:  URL of the proxy to use for IP address verification
 217                        on geo-restricted sites.
 218     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 219     bidi_workaround:   Work around buggy terminals without bidirectional text
 220                        support, using fridibi
 221     debug_printtraffic:Print out sent and received HTTP traffic
 222     include_ads:       Download ads as well
 223     default_search:    Prepend this string if an input url is not valid.
 224                        'auto' for elaborate guessing
 225     encoding:          Use this encoding instead of the system-specified.
 226     extract_flat:      Do not resolve URLs, return the immediate result.
 227                        Pass in 'in_playlist' to only show this behavior for
 228                        playlist items.
 229     postprocessors:    A list of dictionaries, each with an entry
 230                        * key:  The name of the postprocessor. See
 231                                youtube_dl/postprocessor/__init__.py for a list.
 232                        as well as any further keyword arguments for the
 233                        postprocessor.
 234     progress_hooks:    A list of functions that get called on download
 235                        progress, with a dictionary with the entries
 236                        * status: One of "downloading", "error", or "finished".
 237                                  Check this first and ignore unknown values.
 238
 239                        If status is one of "downloading", or "finished", the
 240                        following properties may also be present:
 241                        * filename: The final filename (always present)
 242                        * tmpfilename: The filename we're currently writing to
 243                        * downloaded_bytes: Bytes on disk
 244                        * total_bytes: Size of the whole file, None if unknown
 245                        * total_bytes_estimate: Guess of the eventual file size,
 246                                                None if unavailable.
 247                        * elapsed: The number of seconds since download started.
 248                        * eta: The estimated time in seconds, None if unknown
 249                        * speed: The download speed in bytes/second, None if
 250                                 unknown
 251                        * fragment_index: The counter of the currently
 252                                          downloaded video fragment.
 253                        * fragment_count: The number of fragments (= individual
 254                                          files that will be merged)
 255
 256                        Progress hooks are guaranteed to be called at least once
 257                        (with status "finished") if the download is successful.
 258     merge_output_format: Extension to use when merging formats.
 259     fixup:             Automatically correct known faults of the file.
 260                        One of:
 261                        - "never": do nothing
 262                        - "warn": only emit a warning
 263                        - "detect_or_warn": check whether we can do anything
 264                                            about it, warn otherwise (default)
 265     source_address:    Client-side IP address to bind to.
 266     call_home:         Boolean, true iff we are allowed to contact the
 267                        youtube-dl servers for debugging.
 268     sleep_interval:    Number of seconds to sleep before each download when
 269                        used alone or a lower bound of a range for randomized
 270                        sleep before each download (minimum possible number
 271                        of seconds to sleep) when used along with
 272                        max_sleep_interval.
 273     max_sleep_interval:Upper bound of a range for randomized sleep before each
 274                        download (maximum possible number of seconds to sleep).
 275                        Must only be used along with sleep_interval.
 276                        Actual sleep time will be a random float from range
 277                        [sleep_interval; max_sleep_interval].
 278     listformats:       Print an overview of available video formats and exit.
 279     list_thumbnails:   Print a table of all thumbnails and exit.
 280     match_filter:      A function that gets called with the info_dict of
 281                        every video.
 282                        If it returns a message, the video is ignored.
 283                        If it returns None, the video is downloaded.
 284                        match_filter_func in utils.py is one example for this.
 285     no_color:          Do not emit color codes in output.
 286     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 287                        HTTP header
 288     geo_bypass_country:
 289                        Two-letter ISO 3166-2 country code that will be used for
 290                        explicit geographic restriction bypassing via faking
 291                        X-Forwarded-For HTTP header
 292     geo_bypass_ip_block:
 293                        IP range in CIDR notation that will be used similarly to
 294                        geo_bypass_country
 295
 296     The following options determine which downloader is picked:
 297     external_downloader: Executable of the external downloader to call.
 298                        None or unset for standard (built-in) downloader.
 299     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 300                        if True, otherwise use ffmpeg/avconv if False, otherwise
 301                        use downloader suggested by extractor if None.
 302
 303     The following parameters are not used by YoutubeDL itself, they are used by
 304     the downloader (see youtube_dl/downloader/common.py):
 305     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 306     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 307     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 308     http_chunk_size.
 309
 310     The following options are used by the post processors:
 311     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 312                        otherwise prefer ffmpeg.
 313     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 314                        to the binary or its containing directory.
 315     postprocessor_args: A list of additional command-line arguments for the
 316                         postprocessor.
 317
 318     The following options are used by the Youtube extractor:
 319     youtube_include_dash_manifest: If True (default), DASH manifests and related
 320                         data will be downloaded and processed by extractor.
 321                         You can reduce network I/O by disabling it if you don't
 322                         care about DASH.
 323     """
 324
 325     _NUMERIC_FIELDS = set((
 326         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 327         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 328         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 329         'average_rating', 'comment_count', 'age_limit',
 330         'start_time', 'end_time',
 331         'chapter_number', 'season_number', 'episode_number',
 332         'track_number', 'disc_number', 'release_year',
 333         'playlist_index',
 334     ))
 335
 336     params = None
 337     _ies = []
 338     _pps = []
 339     _download_retcode = None
 340     _num_downloads = None
 341     _screen_file = None
 342
 343     def __init__(self, params=None, auto_init=True):
 344         """Create a FileDownloader object with the given options."""
 345         if params is None:
 346             params = {}
 347         self._ies = []
 348         self._ies_instances = {}
 349         self._pps = []
 350         self._progress_hooks = []
 351         self._download_retcode = 0
 352         self._num_downloads = 0
 353         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 354         self._err_file = sys.stderr
 355         self.params = {
 356             # Default parameters
 357             'nocheckcertificate': False,
 358         }
 359         self.params.update(params)
 360         self.cache = Cache(self)
 361
 362         def check_deprecated(param, option, suggestion):
 363             if self.params.get(param) is not None:
 364                 self.report_warning(
 365                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 366                 return True
 367             return False
 368
 369         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 370             if self.params.get('geo_verification_proxy') is None:
 371                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 372
 373         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 374         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 375         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 376
 377         if params.get('bidi_workaround', False):
 378             try:
 379                 import pty
 380                 master, slave = pty.openpty()
 381                 width = compat_get_terminal_size().columns
 382                 if width is None:
 383                     width_args = []
 384                 else:
 385                     width_args = ['-w', str(width)]
 386                 sp_kwargs = dict(
 387                     stdin=subprocess.PIPE,
 388                     stdout=slave,
 389                     stderr=self._err_file)
 390                 try:
 391                     self._output_process = subprocess.Popen(
 392                         ['bidiv'] + width_args, **sp_kwargs
 393                     )
 394                 except OSError:
 395                     self._output_process = subprocess.Popen(
 396                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 397                 self._output_channel = os.fdopen(master, 'rb')
 398             except OSError as ose:
 399                 if ose.errno == errno.ENOENT:
 400                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 401                 else:
 402                     raise
 403
 404         if (sys.platform != 'win32'
 405                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 406                 and not params.get('restrictfilenames', False)):
 407             # Unicode filesystem API will throw errors (#1474, #13027)
 408             self.report_warning(
 409                 'Assuming --restrict-filenames since file system encoding '
 410                 'cannot encode all characters. '
 411                 'Set the LC_ALL environment variable to fix this.')
 412             self.params['restrictfilenames'] = True
 413
 414         if isinstance(params.get('outtmpl'), bytes):
 415             self.report_warning(
 416                 'Parameter outtmpl is bytes, but should be a unicode string. '
 417                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 418
 419         self._setup_opener()
 420
 421         if auto_init:
 422             self.print_debug_header()
 423             self.add_default_info_extractors()
 424
 425         for pp_def_raw in self.params.get('postprocessors', []):
 426             pp_class = get_postprocessor(pp_def_raw['key'])
 427             pp_def = dict(pp_def_raw)
 428             del pp_def['key']
 429             pp = pp_class(self, **compat_kwargs(pp_def))
 430             self.add_post_processor(pp)
 431
 432         for ph in self.params.get('progress_hooks', []):
 433             self.add_progress_hook(ph)
 434
 435         register_socks_protocols()
 436
 437     def warn_if_short_id(self, argv):
 438         # short YouTube ID starting with dash?
 439         idxs = [
 440             i for i, a in enumerate(argv)
 441             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 442         if idxs:
 443             correct_argv = (
 444                 ['youtube-dl']
 445                 + [a for i, a in enumerate(argv) if i not in idxs]
 446                 + ['--'] + [argv[i] for i in idxs]
 447             )
 448             self.report_warning(
 449                 'Long argument string detected. '
 450                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 451                 args_to_str(correct_argv))
 452
 453     def add_info_extractor(self, ie):
 454         """Add an InfoExtractor object to the end of the list."""
 455         self._ies.append(ie)
 456         if not isinstance(ie, type):
 457             self._ies_instances[ie.ie_key()] = ie
 458             ie.set_downloader(self)
 459
 460     def get_info_extractor(self, ie_key):
 461         """
 462         Get an instance of an IE with name ie_key, it will try to get one from
 463         the _ies list, if there's no instance it will create a new one and add
 464         it to the extractor list.
 465         """
 466         ie = self._ies_instances.get(ie_key)
 467         if ie is None:
 468             ie = get_info_extractor(ie_key)()
 469             self.add_info_extractor(ie)
 470         return ie
 471
 472     def add_default_info_extractors(self):
 473         """
 474         Add the InfoExtractors returned by gen_extractors to the end of the list
 475         """
 476         for ie in gen_extractor_classes():
 477             self.add_info_extractor(ie)
 478
 479     def add_post_processor(self, pp):
 480         """Add a PostProcessor object to the end of the chain."""
 481         self._pps.append(pp)
 482         pp.set_downloader(self)
 483
 484     def add_progress_hook(self, ph):
 485         """Add the progress hook (currently only for the file downloader)"""
 486         self._progress_hooks.append(ph)
 487
 488     def _bidi_workaround(self, message):
 489         if not hasattr(self, '_output_channel'):
 490             return message
 491
 492         assert hasattr(self, '_output_process')
 493         assert isinstance(message, compat_str)
 494         line_count = message.count('\n') + 1
 495         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 496         self._output_process.stdin.flush()
 497         res = ''.join(self._output_channel.readline().decode('utf-8')
 498                       for _ in range(line_count))
 499         return res[:-len('\n')]
 500
 501     def to_screen(self, message, skip_eol=False):
 502         """Print message to stdout if not in quiet mode."""
 503         return self.to_stdout(message, skip_eol, check_quiet=True)
 504
 505     def _write_string(self, s, out=None):
 506         write_string(s, out=out, encoding=self.params.get('encoding'))
 507
 508     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 509         """Print message to stdout if not in quiet mode."""
 510         if self.params.get('logger'):
 511             self.params['logger'].debug(message)
 512         elif not check_quiet or not self.params.get('quiet', False):
 513             message = self._bidi_workaround(message)
 514             terminator = ['\n', ''][skip_eol]
 515             output = message + terminator
 516
 517             self._write_string(output, self._screen_file)
 518
 519     def to_stderr(self, message):
 520         """Print message to stderr."""
 521         assert isinstance(message, compat_str)
 522         if self.params.get('logger'):
 523             self.params['logger'].error(message)
 524         else:
 525             message = self._bidi_workaround(message)
 526             output = message + '\n'
 527             self._write_string(output, self._err_file)
 528
 529     def to_console_title(self, message):
 530         if not self.params.get('consoletitle', False):
 531             return
 532         if compat_os_name == 'nt':
 533             if ctypes.windll.kernel32.GetConsoleWindow():
 534                 # c_wchar_p() might not be necessary if `message` is
 535                 # already of type unicode()
 536                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 537         elif 'TERM' in os.environ:
 538             self._write_string('\033]0;%s\007' % message, self._screen_file)
 539
 540     def save_console_title(self):
 541         if not self.params.get('consoletitle', False):
 542             return
 543         if self.params.get('simulate', False):
 544             return
 545         if compat_os_name != 'nt' and 'TERM' in os.environ:
 546             # Save the title on stack
 547             self._write_string('\033[22;0t', self._screen_file)
 548
 549     def restore_console_title(self):
 550         if not self.params.get('consoletitle', False):
 551             return
 552         if self.params.get('simulate', False):
 553             return
 554         if compat_os_name != 'nt' and 'TERM' in os.environ:
 555             # Restore the title from stack
 556             self._write_string('\033[23;0t', self._screen_file)
 557
 558     def __enter__(self):
 559         self.save_console_title()
 560         return self
 561
 562     def __exit__(self, *args):
 563         self.restore_console_title()
 564
 565         if self.params.get('cookiefile') is not None:
 566             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 567
 568     def trouble(self, message=None, tb=None):
 569         """Determine action to take when a download problem appears.
 570
 571         Depending on if the downloader has been configured to ignore
 572         download errors or not, this method may throw an exception or
 573         not when errors are found, after printing the message.
 574
 575         tb, if given, is additional traceback information.
 576         """
 577         if message is not None:
 578             self.to_stderr(message)
 579         if self.params.get('verbose'):
 580             if tb is None:
 581                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 582                     tb = ''
 583                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 584                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 585                     tb += encode_compat_str(traceback.format_exc())
 586                 else:
 587                     tb_data = traceback.format_list(traceback.extract_stack())
 588                     tb = ''.join(tb_data)
 589             self.to_stderr(tb)
 590         if not self.params.get('ignoreerrors', False):
 591             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 592                 exc_info = sys.exc_info()[1].exc_info
 593             else:
 594                 exc_info = sys.exc_info()
 595             raise DownloadError(message, exc_info)
 596         self._download_retcode = 1
 597
 598     def report_warning(self, message):
 599         '''
 600         Print the message to stderr, it will be prefixed with 'WARNING:'
 601         If stderr is a tty file the 'WARNING:' will be colored
 602         '''
 603         if self.params.get('logger') is not None:
 604             self.params['logger'].warning(message)
 605         else:
 606             if self.params.get('no_warnings'):
 607                 return
 608             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 609                 _msg_header = '\033[0;33mWARNING:\033[0m'
 610             else:
 611                 _msg_header = 'WARNING:'
 612             warning_message = '%s %s' % (_msg_header, message)
 613             self.to_stderr(warning_message)
 614
 615     def report_error(self, message, tb=None):
 616         '''
 617         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 618         in red if stderr is a tty file.
 619         '''
 620         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 621             _msg_header = '\033[0;31mERROR:\033[0m'
 622         else:
 623             _msg_header = 'ERROR:'
 624         error_message = '%s %s' % (_msg_header, message)
 625         self.trouble(error_message, tb)
 626
 627     def report_file_already_downloaded(self, file_name):
 628         """Report file has already been fully downloaded."""
 629         try:
 630             self.to_screen('[download] %s has already been downloaded' % file_name)
 631         except UnicodeEncodeError:
 632             self.to_screen('[download] The file has already been downloaded')
 633
 634     def prepare_filename(self, info_dict):
 635         """Generate the output filename."""
 636         try:
 637             template_dict = dict(info_dict)
 638
 639             template_dict['epoch'] = int(time.time())
 640             autonumber_size = self.params.get('autonumber_size')
 641             if autonumber_size is None:
 642                 autonumber_size = 5
 643             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 644             if template_dict.get('resolution') is None:
 645                 if template_dict.get('width') and template_dict.get('height'):
 646                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 647                 elif template_dict.get('height'):
 648                     template_dict['resolution'] = '%sp' % template_dict['height']
 649                 elif template_dict.get('width'):
 650                     template_dict['resolution'] = '%dx?' % template_dict['width']
 651
 652             sanitize = lambda k, v: sanitize_filename(
 653                 compat_str(v),
 654                 restricted=self.params.get('restrictfilenames'),
 655                 is_id=(k == 'id' or k.endswith('_id')))
 656             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 657                                  for k, v in template_dict.items()
 658                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 659             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 660
 661             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 662
 663             # For fields playlist_index and autonumber convert all occurrences
 664             # of %(field)s to %(field)0Nd for backward compatibility
 665             field_size_compat_map = {
 666                 'playlist_index': len(str(template_dict['n_entries'])),
 667                 'autonumber': autonumber_size,
 668             }
 669             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 670             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 671             if mobj:
 672                 outtmpl = re.sub(
 673                     FIELD_SIZE_COMPAT_RE,
 674                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 675                     outtmpl)
 676
 677             # Missing numeric fields used together with integer presentation types
 678             # in format specification will break the argument substitution since
 679             # string 'NA' is returned for missing fields. We will patch output
 680             # template for missing fields to meet string presentation type.
 681             for numeric_field in self._NUMERIC_FIELDS:
 682                 if numeric_field not in template_dict:
 683                     # As of [1] format syntax is:
 684                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 685                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 686                     FORMAT_RE = r'''(?x)
 687                         (?<!%)
 688                         %
 689                         \({0}\)  # mapping key
 690                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 691                         (?:\d+)?  # minimum field width (optional)
 692                         (?:\.\d+)?  # precision (optional)
 693                         [hlL]?  # length modifier (optional)
 694                         [diouxXeEfFgGcrs%]  # conversion type
 695                     '''
 696                     outtmpl = re.sub(
 697                         FORMAT_RE.format(numeric_field),
 698                         r'%({0})s'.format(numeric_field), outtmpl)
 699
 700             # expand_path translates '%%' into '%' and '$$' into '$'
 701             # correspondingly that is not what we want since we need to keep
 702             # '%%' intact for template dict substitution step. Working around
 703             # with boundary-alike separator hack.
 704             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 705             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 706
 707             # outtmpl should be expand_path'ed before template dict substitution
 708             # because meta fields may contain env variables we don't want to
 709             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 710             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 711             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 712
 713             # Temporary fix for #4787
 714             # 'Treat' all problem characters by passing filename through preferredencoding
 715             # to workaround encoding issues with subprocess on python2 @ Windows
 716             if sys.version_info < (3, 0) and sys.platform == 'win32':
 717                 filename = encodeFilename(filename, True).decode(preferredencoding())
 718             return sanitize_path(filename)
 719         except ValueError as err:
 720             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 721             return None
 722
 723     def _match_entry(self, info_dict, incomplete):
 724         """ Returns None iff the file should be downloaded """
 725
 726         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 727         if 'title' in info_dict:
 728             # This can happen when we're just evaluating the playlist
 729             title = info_dict['title']
 730             matchtitle = self.params.get('matchtitle', False)
 731             if matchtitle:
 732                 if not re.search(matchtitle, title, re.IGNORECASE):
 733                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 734             rejecttitle = self.params.get('rejecttitle', False)
 735             if rejecttitle:
 736                 if re.search(rejecttitle, title, re.IGNORECASE):
 737                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 738         date = info_dict.get('upload_date')
 739         if date is not None:
 740             dateRange = self.params.get('daterange', DateRange())
 741             if date not in dateRange:
 742                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 743         view_count = info_dict.get('view_count')
 744         if view_count is not None:
 745             min_views = self.params.get('min_views')
 746             if min_views is not None and view_count < min_views:
 747                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 748             max_views = self.params.get('max_views')
 749             if max_views is not None and view_count > max_views:
 750                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 751         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 752             return 'Skipping "%s" because it is age restricted' % video_title
 753         if self.in_download_archive(info_dict):
 754             return '%s has already been recorded in archive' % video_title
 755
 756         if not incomplete:
 757             match_filter = self.params.get('match_filter')
 758             if match_filter is not None:
 759                 ret = match_filter(info_dict)
 760                 if ret is not None:
 761                     return ret
 762
 763         return None
 764
 765     @staticmethod
 766     def add_extra_info(info_dict, extra_info):
 767         '''Set the keys from extra_info in info dict if they are missing'''
 768         for key, value in extra_info.items():
 769             info_dict.setdefault(key, value)
 770
 771     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 772                      process=True, force_generic_extractor=False):
 773         '''
 774         Returns a list with a dictionary for each video we find.
 775         If 'download', also downloads the videos.
 776         extra_info is a dict containing the extra values to add to each result
 777         '''
 778
 779         if not ie_key and force_generic_extractor:
 780             ie_key = 'Generic'
 781
 782         if ie_key:
 783             ies = [self.get_info_extractor(ie_key)]
 784         else:
 785             ies = self._ies
 786
 787         for ie in ies:
 788             if not ie.suitable(url):
 789                 continue
 790
 791             ie = self.get_info_extractor(ie.ie_key())
 792             if not ie.working():
 793                 self.report_warning('The program functionality for this site has been marked as broken, '
 794                                     'and will probably not work.')
 795
 796             return self.__extract_info(url, ie, download, extra_info, process)
 797         else:
 798             self.report_error('no suitable InfoExtractor for URL %s' % url)
 799
 800     def __handle_extraction_exceptions(func):
 801         def wrapper(self, *args, **kwargs):
 802             try:
 803                 return func(self, *args, **kwargs)
 804             except GeoRestrictedError as e:
 805                 msg = e.msg
 806                 if e.countries:
 807                     msg += '\nThis video is available in %s.' % ', '.join(
 808                         map(ISO3166Utils.short2full, e.countries))
 809                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 810                 self.report_error(msg)
 811             except ExtractorError as e:  # An error we somewhat expected
 812                 self.report_error(compat_str(e), e.format_traceback())
 813             except MaxDownloadsReached:
 814                 raise
 815             except Exception as e:
 816                 if self.params.get('ignoreerrors', False):
 817                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 818                 else:
 819                     raise
 820         return wrapper
 821
 822     @__handle_extraction_exceptions
 823     def __extract_info(self, url, ie, download, extra_info, process):
 824         ie_result = ie.extract(url)
 825         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 826             return
 827         if isinstance(ie_result, list):
 828             # Backwards compatibility: old IE result format
 829             ie_result = {
 830                 '_type': 'compat_list',
 831                 'entries': ie_result,
 832             }
 833         self.add_default_extra_info(ie_result, ie, url)
 834         if process:
 835             return self.process_ie_result(ie_result, download, extra_info)
 836         else:
 837             return ie_result
 838
 839     def add_default_extra_info(self, ie_result, ie, url):
 840         self.add_extra_info(ie_result, {
 841             'extractor': ie.IE_NAME,
 842             'webpage_url': url,
 843             'webpage_url_basename': url_basename(url),
 844             'extractor_key': ie.ie_key(),
 845         })
 846
 847     def process_ie_result(self, ie_result, download=True, extra_info={}):
 848         """
 849         Take the result of the ie(may be modified) and resolve all unresolved
 850         references (URLs, playlist items).
 851
 852         It will also download the videos if 'download'.
 853         Returns the resolved ie_result.
 854         """
 855         result_type = ie_result.get('_type', 'video')
 856
 857         if result_type in ('url', 'url_transparent'):
 858             ie_result['url'] = sanitize_url(ie_result['url'])
 859             extract_flat = self.params.get('extract_flat', False)
 860             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
 861                     or extract_flat is True):
 862                 self.__forced_printings(
 863                     ie_result, self.prepare_filename(ie_result),
 864                     incomplete=True)
 865                 return ie_result
 866
 867         if result_type == 'video':
 868             self.add_extra_info(ie_result, extra_info)
 869             return self.process_video_result(ie_result, download=download)
 870         elif result_type == 'url':
 871             # We have to add extra_info to the results because it may be
 872             # contained in a playlist
 873             return self.extract_info(ie_result['url'],
 874                                      download,
 875                                      ie_key=ie_result.get('ie_key'),
 876                                      extra_info=extra_info)
 877         elif result_type == 'url_transparent':
 878             # Use the information from the embedding page
 879             info = self.extract_info(
 880                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 881                 extra_info=extra_info, download=False, process=False)
 882
 883             # extract_info may return None when ignoreerrors is enabled and
 884             # extraction failed with an error, don't crash and return early
 885             # in this case
 886             if not info:
 887                 return info
 888
 889             force_properties = dict(
 890                 (k, v) for k, v in ie_result.items() if v is not None)
 891             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
 892                 if f in force_properties:
 893                     del force_properties[f]
 894             new_result = info.copy()
 895             new_result.update(force_properties)
 896
 897             # Extracted info may not be a video result (i.e.
 898             # info.get('_type', 'video') != video) but rather an url or
 899             # url_transparent. In such cases outer metadata (from ie_result)
 900             # should be propagated to inner one (info). For this to happen
 901             # _type of info should be overridden with url_transparent. This
 902             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
 903             if new_result.get('_type') == 'url':
 904                 new_result['_type'] = 'url_transparent'
 905
 906             return self.process_ie_result(
 907                 new_result, download=download, extra_info=extra_info)
 908         elif result_type in ('playlist', 'multi_video'):
 909             # We process each entry in the playlist
 910             playlist = ie_result.get('title') or ie_result.get('id')
 911             self.to_screen('[download] Downloading playlist: %s' % playlist)
 912
 913             playlist_results = []
 914
 915             playliststart = self.params.get('playliststart', 1) - 1
 916             playlistend = self.params.get('playlistend')
 917             # For backwards compatibility, interpret -1 as whole list
 918             if playlistend == -1:
 919                 playlistend = None
 920
 921             playlistitems_str = self.params.get('playlist_items')
 922             playlistitems = None
 923             if playlistitems_str is not None:
 924                 def iter_playlistitems(format):
 925                     for string_segment in format.split(','):
 926                         if '-' in string_segment:
 927                             start, end = string_segment.split('-')
 928                             for item in range(int(start), int(end) + 1):
 929                                 yield int(item)
 930                         else:
 931                             yield int(string_segment)
 932                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
 933
 934             ie_entries = ie_result['entries']
 935
 936             def make_playlistitems_entries(list_ie_entries):
 937                 num_entries = len(list_ie_entries)
 938                 return [
 939                     list_ie_entries[i - 1] for i in playlistitems
 940                     if -num_entries <= i - 1 < num_entries]
 941
 942             def report_download(num_entries):
 943                 self.to_screen(
 944                     '[%s] playlist %s: Downloading %d videos' %
 945                     (ie_result['extractor'], playlist, num_entries))
 946
 947             if isinstance(ie_entries, list):
 948                 n_all_entries = len(ie_entries)
 949                 if playlistitems:
 950                     entries = make_playlistitems_entries(ie_entries)
 951                 else:
 952                     entries = ie_entries[playliststart:playlistend]
 953                 n_entries = len(entries)
 954                 self.to_screen(
 955                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 956                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 957             elif isinstance(ie_entries, PagedList):
 958                 if playlistitems:
 959                     entries = []
 960                     for item in playlistitems:
 961                         entries.extend(ie_entries.getslice(
 962                             item - 1, item
 963                         ))
 964                 else:
 965                     entries = ie_entries.getslice(
 966                         playliststart, playlistend)
 967                 n_entries = len(entries)
 968                 report_download(n_entries)
 969             else:  # iterable
 970                 if playlistitems:
 971                     entries = make_playlistitems_entries(list(itertools.islice(
 972                         ie_entries, 0, max(playlistitems))))
 973                 else:
 974                     entries = list(itertools.islice(
 975                         ie_entries, playliststart, playlistend))
 976                 n_entries = len(entries)
 977                 report_download(n_entries)
 978
 979             if self.params.get('playlistreverse', False):
 980                 entries = entries[::-1]
 981
 982             if self.params.get('playlistrandom', False):
 983                 random.shuffle(entries)
 984
 985             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
 986
 987             for i, entry in enumerate(entries, 1):
 988                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 989                 # This __x_forwarded_for_ip thing is a bit ugly but requires
 990                 # minimal changes
 991                 if x_forwarded_for:
 992                     entry['__x_forwarded_for_ip'] = x_forwarded_for
 993                 extra = {
 994                     'n_entries': n_entries,
 995                     'playlist': playlist,
 996                     'playlist_id': ie_result.get('id'),
 997                     'playlist_title': ie_result.get('title'),
 998                     'playlist_uploader': ie_result.get('uploader'),
 999                     'playlist_uploader_id': ie_result.get('uploader_id'),
1000                     'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1001                     'extractor': ie_result['extractor'],
1002                     'webpage_url': ie_result['webpage_url'],
1003                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1004                     'extractor_key': ie_result['extractor_key'],
1005                 }
1006
1007                 reason = self._match_entry(entry, incomplete=True)
1008                 if reason is not None:
1009                     self.to_screen('[download] ' + reason)
1010                     continue
1011
1012                 entry_result = self.__process_iterable_entry(entry, download, extra)
1013                 # TODO: skip failed (empty) entries?
1014                 playlist_results.append(entry_result)
1015             ie_result['entries'] = playlist_results
1016             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1017             return ie_result
1018         elif result_type == 'compat_list':
1019             self.report_warning(
1020                 'Extractor %s returned a compat_list result. '
1021                 'It needs to be updated.' % ie_result.get('extractor'))
1022
1023             def _fixup(r):
1024                 self.add_extra_info(
1025                     r,
1026                     {
1027                         'extractor': ie_result['extractor'],
1028                         'webpage_url': ie_result['webpage_url'],
1029                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1030                         'extractor_key': ie_result['extractor_key'],
1031                     }
1032                 )
1033                 return r
1034             ie_result['entries'] = [
1035                 self.process_ie_result(_fixup(r), download, extra_info)
1036                 for r in ie_result['entries']
1037             ]
1038             return ie_result
1039         else:
1040             raise Exception('Invalid result type: %s' % result_type)
1041
1042     @__handle_extraction_exceptions
1043     def __process_iterable_entry(self, entry, download, extra_info):
1044         return self.process_ie_result(
1045             entry, download=download, extra_info=extra_info)
1046
1047     def _build_format_filter(self, filter_spec):
1048         " Returns a function to filter the formats according to the filter_spec "
1049
1050         OPERATORS = {
1051             '<': operator.lt,
1052             '<=': operator.le,
1053             '>': operator.gt,
1054             '>=': operator.ge,
1055             '=': operator.eq,
1056             '!=': operator.ne,
1057         }
1058         operator_rex = re.compile(r'''(?x)\s*
1059             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1060             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1061             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1062             $
1063             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1064         m = operator_rex.search(filter_spec)
1065         if m:
1066             try:
1067                 comparison_value = int(m.group('value'))
1068             except ValueError:
1069                 comparison_value = parse_filesize(m.group('value'))
1070                 if comparison_value is None:
1071                     comparison_value = parse_filesize(m.group('value') + 'B')
1072                 if comparison_value is None:
1073                     raise ValueError(
1074                         'Invalid value %r in format specification %r' % (
1075                             m.group('value'), filter_spec))
1076             op = OPERATORS[m.group('op')]
1077
1078         if not m:
1079             STR_OPERATORS = {
1080                 '=': operator.eq,
1081                 '^=': lambda attr, value: attr.startswith(value),
1082                 '$=': lambda attr, value: attr.endswith(value),
1083                 '*=': lambda attr, value: value in attr,
1084             }
1085             str_operator_rex = re.compile(r'''(?x)
1086                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1087                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1088                 \s*(?P<value>[a-zA-Z0-9._-]+)
1089                 \s*$
1090                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1091             m = str_operator_rex.search(filter_spec)
1092             if m:
1093                 comparison_value = m.group('value')
1094                 str_op = STR_OPERATORS[m.group('op')]
1095                 if m.group('negation'):
1096                     op = lambda attr, value: not str_op(attr, value)
1097                 else:
1098                     op = str_op
1099
1100         if not m:
1101             raise ValueError('Invalid filter specification %r' % filter_spec)
1102
1103         def _filter(f):
1104             actual_value = f.get(m.group('key'))
1105             if actual_value is None:
1106                 return m.group('none_inclusive')
1107             return op(actual_value, comparison_value)
1108         return _filter
1109
1110     def _default_format_spec(self, info_dict, download=True):
1111
1112         def can_merge():
1113             merger = FFmpegMergerPP(self)
1114             return merger.available and merger.can_merge()
1115
1116         def prefer_best():
1117             if self.params.get('simulate', False):
1118                 return False
1119             if not download:
1120                 return False
1121             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1122                 return True
1123             if info_dict.get('is_live'):
1124                 return True
1125             if not can_merge():
1126                 return True
1127             return False
1128
1129         req_format_list = ['bestvideo+bestaudio', 'best']
1130         if prefer_best():
1131             req_format_list.reverse()
1132         return '/'.join(req_format_list)
1133
1134     def build_format_selector(self, format_spec):
1135         def syntax_error(note, start):
1136             message = (
1137                 'Invalid format specification: '
1138                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1139             return SyntaxError(message)
1140
1141         PICKFIRST = 'PICKFIRST'
1142         MERGE = 'MERGE'
1143         SINGLE = 'SINGLE'
1144         GROUP = 'GROUP'
1145         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1146
1147         def _parse_filter(tokens):
1148             filter_parts = []
1149             for type, string, start, _, _ in tokens:
1150                 if type == tokenize.OP and string == ']':
1151                     return ''.join(filter_parts)
1152                 else:
1153                     filter_parts.append(string)
1154
1155         def _remove_unused_ops(tokens):
1156             # Remove operators that we don't use and join them with the surrounding strings
1157             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1158             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1159             last_string, last_start, last_end, last_line = None, None, None, None
1160             for type, string, start, end, line in tokens:
1161                 if type == tokenize.OP and string == '[':
1162                     if last_string:
1163                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1164                         last_string = None
1165                     yield type, string, start, end, line
1166                     # everything inside brackets will be handled by _parse_filter
1167                     for type, string, start, end, line in tokens:
1168                         yield type, string, start, end, line
1169                         if type == tokenize.OP and string == ']':
1170                             break
1171                 elif type == tokenize.OP and string in ALLOWED_OPS:
1172                     if last_string:
1173                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1174                         last_string = None
1175                     yield type, string, start, end, line
1176                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1177                     if not last_string:
1178                         last_string = string
1179                         last_start = start
1180                         last_end = end
1181                     else:
1182                         last_string += string
1183             if last_string:
1184                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1185
1186         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1187             selectors = []
1188             current_selector = None
1189             for type, string, start, _, _ in tokens:
1190                 # ENCODING is only defined in python 3.x
1191                 if type == getattr(tokenize, 'ENCODING', None):
1192                     continue
1193                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1194                     current_selector = FormatSelector(SINGLE, string, [])
1195                 elif type == tokenize.OP:
1196                     if string == ')':
1197                         if not inside_group:
1198                             # ')' will be handled by the parentheses group
1199                             tokens.restore_last_token()
1200                         break
1201                     elif inside_merge and string in ['/', ',']:
1202                         tokens.restore_last_token()
1203                         break
1204                     elif inside_choice and string == ',':
1205                         tokens.restore_last_token()
1206                         break
1207                     elif string == ',':
1208                         if not current_selector:
1209                             raise syntax_error('"," must follow a format selector', start)
1210                         selectors.append(current_selector)
1211                         current_selector = None
1212                     elif string == '/':
1213                         if not current_selector:
1214                             raise syntax_error('"/" must follow a format selector', start)
1215                         first_choice = current_selector
1216                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1217                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1218                     elif string == '[':
1219                         if not current_selector:
1220                             current_selector = FormatSelector(SINGLE, 'best', [])
1221                         format_filter = _parse_filter(tokens)
1222                         current_selector.filters.append(format_filter)
1223                     elif string == '(':
1224                         if current_selector:
1225                             raise syntax_error('Unexpected "("', start)
1226                         group = _parse_format_selection(tokens, inside_group=True)
1227                         current_selector = FormatSelector(GROUP, group, [])
1228                     elif string == '+':
1229                         video_selector = current_selector
1230                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1231                         if not video_selector or not audio_selector:
1232                             raise syntax_error('"+" must be between two format selectors', start)
1233                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1234                     else:
1235                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1236                 elif type == tokenize.ENDMARKER:
1237                     break
1238             if current_selector:
1239                 selectors.append(current_selector)
1240             return selectors
1241
1242         def _build_selector_function(selector):
1243             if isinstance(selector, list):
1244                 fs = [_build_selector_function(s) for s in selector]
1245
1246                 def selector_function(ctx):
1247                     for f in fs:
1248                         for format in f(ctx):
1249                             yield format
1250                 return selector_function
1251             elif selector.type == GROUP:
1252                 selector_function = _build_selector_function(selector.selector)
1253             elif selector.type == PICKFIRST:
1254                 fs = [_build_selector_function(s) for s in selector.selector]
1255
1256                 def selector_function(ctx):
1257                     for f in fs:
1258                         picked_formats = list(f(ctx))
1259                         if picked_formats:
1260                             return picked_formats
1261                     return []
1262             elif selector.type == SINGLE:
1263                 format_spec = selector.selector
1264
1265                 def selector_function(ctx):
1266                     formats = list(ctx['formats'])
1267                     if not formats:
1268                         return
1269                     if format_spec == 'all':
1270                         for f in formats:
1271                             yield f
1272                     elif format_spec in ['best', 'worst', None]:
1273                         format_idx = 0 if format_spec == 'worst' else -1
1274                         audiovideo_formats = [
1275                             f for f in formats
1276                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1277                         if audiovideo_formats:
1278                             yield audiovideo_formats[format_idx]
1279                         # for extractors with incomplete formats (audio only (soundcloud)
1280                         # or video only (imgur)) we will fallback to best/worst
1281                         # {video,audio}-only format
1282                         elif ctx['incomplete_formats']:
1283                             yield formats[format_idx]
1284                     elif format_spec == 'bestaudio':
1285                         audio_formats = [
1286                             f for f in formats
1287                             if f.get('vcodec') == 'none']
1288                         if audio_formats:
1289                             yield audio_formats[-1]
1290                     elif format_spec == 'worstaudio':
1291                         audio_formats = [
1292                             f for f in formats
1293                             if f.get('vcodec') == 'none']
1294                         if audio_formats:
1295                             yield audio_formats[0]
1296                     elif format_spec == 'bestvideo':
1297                         video_formats = [
1298                             f for f in formats
1299                             if f.get('acodec') == 'none']
1300                         if video_formats:
1301                             yield video_formats[-1]
1302                     elif format_spec == 'worstvideo':
1303                         video_formats = [
1304                             f for f in formats
1305                             if f.get('acodec') == 'none']
1306                         if video_formats:
1307                             yield video_formats[0]
1308                     else:
1309                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1310                         if format_spec in extensions:
1311                             filter_f = lambda f: f['ext'] == format_spec
1312                         else:
1313                             filter_f = lambda f: f['format_id'] == format_spec
1314                         matches = list(filter(filter_f, formats))
1315                         if matches:
1316                             yield matches[-1]
1317             elif selector.type == MERGE:
1318                 def _merge(formats_info):
1319                     format_1, format_2 = [f['format_id'] for f in formats_info]
1320                     # The first format must contain the video and the
1321                     # second the audio
1322                     if formats_info[0].get('vcodec') == 'none':
1323                         self.report_error('The first format must '
1324                                           'contain the video, try using '
1325                                           '"-f %s+%s"' % (format_2, format_1))
1326                         return
1327                     # Formats must be opposite (video+audio)
1328                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1329                         self.report_error(
1330                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1331                             % (format_1, format_2))
1332                         return
1333                     output_ext = (
1334                         formats_info[0]['ext']
1335                         if self.params.get('merge_output_format') is None
1336                         else self.params['merge_output_format'])
1337                     return {
1338                         'requested_formats': formats_info,
1339                         'format': '%s+%s' % (formats_info[0].get('format'),
1340                                              formats_info[1].get('format')),
1341                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1342                                                 formats_info[1].get('format_id')),
1343                         'width': formats_info[0].get('width'),
1344                         'height': formats_info[0].get('height'),
1345                         'resolution': formats_info[0].get('resolution'),
1346                         'fps': formats_info[0].get('fps'),
1347                         'vcodec': formats_info[0].get('vcodec'),
1348                         'vbr': formats_info[0].get('vbr'),
1349                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1350                         'acodec': formats_info[1].get('acodec'),
1351                         'abr': formats_info[1].get('abr'),
1352                         'ext': output_ext,
1353                     }
1354                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1355
1356                 def selector_function(ctx):
1357                     for pair in itertools.product(
1358                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1359                         yield _merge(pair)
1360
1361             filters = [self._build_format_filter(f) for f in selector.filters]
1362
1363             def final_selector(ctx):
1364                 ctx_copy = copy.deepcopy(ctx)
1365                 for _filter in filters:
1366                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1367                 return selector_function(ctx_copy)
1368             return final_selector
1369
1370         stream = io.BytesIO(format_spec.encode('utf-8'))
1371         try:
1372             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1373         except tokenize.TokenError:
1374             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1375
1376         class TokenIterator(object):
1377             def __init__(self, tokens):
1378                 self.tokens = tokens
1379                 self.counter = 0
1380
1381             def __iter__(self):
1382                 return self
1383
1384             def __next__(self):
1385                 if self.counter >= len(self.tokens):
1386                     raise StopIteration()
1387                 value = self.tokens[self.counter]
1388                 self.counter += 1
1389                 return value
1390
1391             next = __next__
1392
1393             def restore_last_token(self):
1394                 self.counter -= 1
1395
1396         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1397         return _build_selector_function(parsed_selector)
1398
1399     def _calc_headers(self, info_dict):
1400         res = std_headers.copy()
1401
1402         add_headers = info_dict.get('http_headers')
1403         if add_headers:
1404             res.update(add_headers)
1405
1406         cookies = self._calc_cookies(info_dict)
1407         if cookies:
1408             res['Cookie'] = cookies
1409
1410         if 'X-Forwarded-For' not in res:
1411             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1412             if x_forwarded_for_ip:
1413                 res['X-Forwarded-For'] = x_forwarded_for_ip
1414
1415         return res
1416
1417     def _calc_cookies(self, info_dict):
1418         pr = sanitized_Request(info_dict['url'])
1419         self.cookiejar.add_cookie_header(pr)
1420         return pr.get_header('Cookie')
1421
1422     def process_video_result(self, info_dict, download=True):
1423         assert info_dict.get('_type', 'video') == 'video'
1424
1425         if 'id' not in info_dict:
1426             raise ExtractorError('Missing "id" field in extractor result')
1427         if 'title' not in info_dict:
1428             raise ExtractorError('Missing "title" field in extractor result')
1429
1430         def report_force_conversion(field, field_not, conversion):
1431             self.report_warning(
1432                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1433                 % (field, field_not, conversion))
1434
1435         def sanitize_string_field(info, string_field):
1436             field = info.get(string_field)
1437             if field is None or isinstance(field, compat_str):
1438                 return
1439             report_force_conversion(string_field, 'a string', 'string')
1440             info[string_field] = compat_str(field)
1441
1442         def sanitize_numeric_fields(info):
1443             for numeric_field in self._NUMERIC_FIELDS:
1444                 field = info.get(numeric_field)
1445                 if field is None or isinstance(field, compat_numeric_types):
1446                     continue
1447                 report_force_conversion(numeric_field, 'numeric', 'int')
1448                 info[numeric_field] = int_or_none(field)
1449
1450         sanitize_string_field(info_dict, 'id')
1451         sanitize_numeric_fields(info_dict)
1452
1453         if 'playlist' not in info_dict:
1454             # It isn't part of a playlist
1455             info_dict['playlist'] = None
1456             info_dict['playlist_index'] = None
1457
1458         thumbnails = info_dict.get('thumbnails')
1459         if thumbnails is None:
1460             thumbnail = info_dict.get('thumbnail')
1461             if thumbnail:
1462                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1463         if thumbnails:
1464             thumbnails.sort(key=lambda t: (
1465                 t.get('preference') if t.get('preference') is not None else -1,
1466                 t.get('width') if t.get('width') is not None else -1,
1467                 t.get('height') if t.get('height') is not None else -1,
1468                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1469             for i, t in enumerate(thumbnails):
1470                 t['url'] = sanitize_url(t['url'])
1471                 if t.get('width') and t.get('height'):
1472                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1473                 if t.get('id') is None:
1474                     t['id'] = '%d' % i
1475
1476         if self.params.get('list_thumbnails'):
1477             self.list_thumbnails(info_dict)
1478             return
1479
1480         thumbnail = info_dict.get('thumbnail')
1481         if thumbnail:
1482             info_dict['thumbnail'] = sanitize_url(thumbnail)
1483         elif thumbnails:
1484             info_dict['thumbnail'] = thumbnails[-1]['url']
1485
1486         if 'display_id' not in info_dict and 'id' in info_dict:
1487             info_dict['display_id'] = info_dict['id']
1488
1489         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1490             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1491             # see http://bugs.python.org/issue1646728)
1492             try:
1493                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1494                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1495             except (ValueError, OverflowError, OSError):
1496                 pass
1497
1498         # Auto generate title fields corresponding to the *_number fields when missing
1499         # in order to always have clean titles. This is very common for TV series.
1500         for field in ('chapter', 'season', 'episode'):
1501             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1502                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1503
1504         for cc_kind in ('subtitles', 'automatic_captions'):
1505             cc = info_dict.get(cc_kind)
1506             if cc:
1507                 for _, subtitle in cc.items():
1508                     for subtitle_format in subtitle:
1509                         if subtitle_format.get('url'):
1510                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1511                         if subtitle_format.get('ext') is None:
1512                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1513
1514         automatic_captions = info_dict.get('automatic_captions')
1515         subtitles = info_dict.get('subtitles')
1516
1517         if self.params.get('listsubtitles', False):
1518             if 'automatic_captions' in info_dict:
1519                 self.list_subtitles(
1520                     info_dict['id'], automatic_captions, 'automatic captions')
1521             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1522             return
1523
1524         info_dict['requested_subtitles'] = self.process_subtitles(
1525             info_dict['id'], subtitles, automatic_captions)
1526
1527         # We now pick which formats have to be downloaded
1528         if info_dict.get('formats') is None:
1529             # There's only one format available
1530             formats = [info_dict]
1531         else:
1532             formats = info_dict['formats']
1533
1534         if not formats:
1535             raise ExtractorError('No video formats found!')
1536
1537         def is_wellformed(f):
1538             url = f.get('url')
1539             if not url:
1540                 self.report_warning(
1541                     '"url" field is missing or empty - skipping format, '
1542                     'there is an error in extractor')
1543                 return False
1544             if isinstance(url, bytes):
1545                 sanitize_string_field(f, 'url')
1546             return True
1547
1548         # Filter out malformed formats for better extraction robustness
1549         formats = list(filter(is_wellformed, formats))
1550
1551         formats_dict = {}
1552
1553         # We check that all the formats have the format and format_id fields
1554         for i, format in enumerate(formats):
1555             sanitize_string_field(format, 'format_id')
1556             sanitize_numeric_fields(format)
1557             format['url'] = sanitize_url(format['url'])
1558             if not format.get('format_id'):
1559                 format['format_id'] = compat_str(i)
1560             else:
1561                 # Sanitize format_id from characters used in format selector expression
1562                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1563             format_id = format['format_id']
1564             if format_id not in formats_dict:
1565                 formats_dict[format_id] = []
1566             formats_dict[format_id].append(format)
1567
1568         # Make sure all formats have unique format_id
1569         for format_id, ambiguous_formats in formats_dict.items():
1570             if len(ambiguous_formats) > 1:
1571                 for i, format in enumerate(ambiguous_formats):
1572                     format['format_id'] = '%s-%d' % (format_id, i)
1573
1574         for i, format in enumerate(formats):
1575             if format.get('format') is None:
1576                 format['format'] = '{id} - {res}{note}'.format(
1577                     id=format['format_id'],
1578                     res=self.format_resolution(format),
1579                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1580                 )
1581             # Automatically determine file extension if missing
1582             if format.get('ext') is None:
1583                 format['ext'] = determine_ext(format['url']).lower()
1584             # Automatically determine protocol if missing (useful for format
1585             # selection purposes)
1586             if format.get('protocol') is None:
1587                 format['protocol'] = determine_protocol(format)
1588             # Add HTTP headers, so that external programs can use them from the
1589             # json output
1590             full_format_info = info_dict.copy()
1591             full_format_info.update(format)
1592             format['http_headers'] = self._calc_headers(full_format_info)
1593         # Remove private housekeeping stuff
1594         if '__x_forwarded_for_ip' in info_dict:
1595             del info_dict['__x_forwarded_for_ip']
1596
1597         # TODO Central sorting goes here
1598
1599         if formats[0] is not info_dict:
1600             # only set the 'formats' fields if the original info_dict list them
1601             # otherwise we end up with a circular reference, the first (and unique)
1602             # element in the 'formats' field in info_dict is info_dict itself,
1603             # which can't be exported to json
1604             info_dict['formats'] = formats
1605         if self.params.get('listformats'):
1606             self.list_formats(info_dict)
1607             return
1608
1609         req_format = self.params.get('format')
1610         if req_format is None:
1611             req_format = self._default_format_spec(info_dict, download=download)
1612             if self.params.get('verbose'):
1613                 self._write_string('[debug] Default format spec: %s\n' % req_format)
1614
1615         format_selector = self.build_format_selector(req_format)
1616
1617         # While in format selection we may need to have an access to the original
1618         # format set in order to calculate some metrics or do some processing.
1619         # For now we need to be able to guess whether original formats provided
1620         # by extractor are incomplete or not (i.e. whether extractor provides only
1621         # video-only or audio-only formats) for proper formats selection for
1622         # extractors with such incomplete formats (see
1623         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1624         # Since formats may be filtered during format selection and may not match
1625         # the original formats the results may be incorrect. Thus original formats
1626         # or pre-calculated metrics should be passed to format selection routines
1627         # as well.
1628         # We will pass a context object containing all necessary additional data
1629         # instead of just formats.
1630         # This fixes incorrect format selection issue (see
1631         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1632         incomplete_formats = (
1633             # All formats are video-only or
1634             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1635             # all formats are audio-only
1636             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1637
1638         ctx = {
1639             'formats': formats,
1640             'incomplete_formats': incomplete_formats,
1641         }
1642
1643         formats_to_download = list(format_selector(ctx))
1644         if not formats_to_download:
1645             raise ExtractorError('requested format not available',
1646                                  expected=True)
1647
1648         if download:
1649             if len(formats_to_download) > 1:
1650                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1651             for format in formats_to_download:
1652                 new_info = dict(info_dict)
1653                 new_info.update(format)
1654                 self.process_info(new_info)
1655         # We update the info dict with the best quality format (backwards compatibility)
1656         info_dict.update(formats_to_download[-1])
1657         return info_dict
1658
1659     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1660         """Select the requested subtitles and their format"""
1661         available_subs = {}
1662         if normal_subtitles and self.params.get('writesubtitles'):
1663             available_subs.update(normal_subtitles)
1664         if automatic_captions and self.params.get('writeautomaticsub'):
1665             for lang, cap_info in automatic_captions.items():
1666                 if lang not in available_subs:
1667                     available_subs[lang] = cap_info
1668
1669         if (not self.params.get('writesubtitles') and not
1670                 self.params.get('writeautomaticsub') or not
1671                 available_subs):
1672             return None
1673
1674         if self.params.get('allsubtitles', False):
1675             requested_langs = available_subs.keys()
1676         else:
1677             if self.params.get('subtitleslangs', False):
1678                 requested_langs = self.params.get('subtitleslangs')
1679             elif 'en' in available_subs:
1680                 requested_langs = ['en']
1681             else:
1682                 requested_langs = [list(available_subs.keys())[0]]
1683
1684         formats_query = self.params.get('subtitlesformat', 'best')
1685         formats_preference = formats_query.split('/') if formats_query else []
1686         subs = {}
1687         for lang in requested_langs:
1688             formats = available_subs.get(lang)
1689             if formats is None:
1690                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1691                 continue
1692             for ext in formats_preference:
1693                 if ext == 'best':
1694                     f = formats[-1]
1695                     break
1696                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1697                 if matches:
1698                     f = matches[-1]
1699                     break
1700             else:
1701                 f = formats[-1]
1702                 self.report_warning(
1703                     'No subtitle format found matching "%s" for language %s, '
1704                     'using %s' % (formats_query, lang, f['ext']))
1705             subs[lang] = f
1706         return subs
1707
1708     def __forced_printings(self, info_dict, filename, incomplete):
1709         def print_mandatory(field):
1710             if (self.params.get('force%s' % field, False)
1711                     and (not incomplete or info_dict.get(field) is not None)):
1712                 self.to_stdout(info_dict[field])
1713
1714         def print_optional(field):
1715             if (self.params.get('force%s' % field, False)
1716                     and info_dict.get(field) is not None):
1717                 self.to_stdout(info_dict[field])
1718
1719         print_mandatory('title')
1720         print_mandatory('id')
1721         if self.params.get('forceurl', False) and not incomplete:
1722             if info_dict.get('requested_formats') is not None:
1723                 for f in info_dict['requested_formats']:
1724                     self.to_stdout(f['url'] + f.get('play_path', ''))
1725             else:
1726                 # For RTMP URLs, also include the playpath
1727                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1728         print_optional('thumbnail')
1729         print_optional('description')
1730         if self.params.get('forcefilename', False) and filename is not None:
1731             self.to_stdout(filename)
1732         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1733             self.to_stdout(formatSeconds(info_dict['duration']))
1734         print_mandatory('format')
1735         if self.params.get('forcejson', False):
1736             self.to_stdout(json.dumps(info_dict))
1737
1738     def process_info(self, info_dict):
1739         """Process a single resolved IE result."""
1740
1741         assert info_dict.get('_type', 'video') == 'video'
1742
1743         max_downloads = self.params.get('max_downloads')
1744         if max_downloads is not None:
1745             if self._num_downloads >= int(max_downloads):
1746                 raise MaxDownloadsReached()
1747
1748         # TODO: backward compatibility, to be removed
1749         info_dict['fulltitle'] = info_dict['title']
1750
1751         if 'format' not in info_dict:
1752             info_dict['format'] = info_dict['ext']
1753
1754         reason = self._match_entry(info_dict, incomplete=False)
1755         if reason is not None:
1756             self.to_screen('[download] ' + reason)
1757             return
1758
1759         self._num_downloads += 1
1760
1761         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1762
1763         # Forced printings
1764         self.__forced_printings(info_dict, filename, incomplete=False)
1765
1766         # Do nothing else if in simulate mode
1767         if self.params.get('simulate', False):
1768             return
1769
1770         if filename is None:
1771             return
1772
1773         def ensure_dir_exists(path):
1774             try:
1775                 dn = os.path.dirname(path)
1776                 if dn and not os.path.exists(dn):
1777                     os.makedirs(dn)
1778                 return True
1779             except (OSError, IOError) as err:
1780                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1781                 return False
1782
1783         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1784             return
1785
1786         if self.params.get('writedescription', False):
1787             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1788             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1789                 self.to_screen('[info] Video description is already present')
1790             elif info_dict.get('description') is None:
1791                 self.report_warning('There\'s no description to write.')
1792             else:
1793                 try:
1794                     self.to_screen('[info] Writing video description to: ' + descfn)
1795                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1796                         descfile.write(info_dict['description'])
1797                 except (OSError, IOError):
1798                     self.report_error('Cannot write description file ' + descfn)
1799                     return
1800
1801         if self.params.get('writeannotations', False):
1802             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1803             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1804                 self.to_screen('[info] Video annotations are already present')
1805             elif not info_dict.get('annotations'):
1806                 self.report_warning('There are no annotations to write.')
1807             else:
1808                 try:
1809                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1810                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1811                         annofile.write(info_dict['annotations'])
1812                 except (KeyError, TypeError):
1813                     self.report_warning('There are no annotations to write.')
1814                 except (OSError, IOError):
1815                     self.report_error('Cannot write annotations file: ' + annofn)
1816                     return
1817
1818         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1819                                        self.params.get('writeautomaticsub')])
1820
1821         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1822             # subtitles download errors are already managed as troubles in relevant IE
1823             # that way it will silently go on when used with unsupporting IE
1824             subtitles = info_dict['requested_subtitles']
1825             ie = self.get_info_extractor(info_dict['extractor_key'])
1826             for sub_lang, sub_info in subtitles.items():
1827                 sub_format = sub_info['ext']
1828                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1829                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1830                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1831                 else:
1832                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1833                     if sub_info.get('data') is not None:
1834                         try:
1835                             # Use newline='' to prevent conversion of newline characters
1836                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
1837                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1838                                 subfile.write(sub_info['data'])
1839                         except (OSError, IOError):
1840                             self.report_error('Cannot write subtitles file ' + sub_filename)
1841                             return
1842                     else:
1843                         try:
1844                             sub_data = ie._request_webpage(
1845                                 sub_info['url'], info_dict['id'], note=False).read()
1846                             with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1847                                 subfile.write(sub_data)
1848                         except (ExtractorError, IOError, OSError, ValueError) as err:
1849                             self.report_warning('Unable to download subtitle for "%s": %s' %
1850                                                 (sub_lang, error_to_compat_str(err)))
1851                             continue
1852
1853         if self.params.get('writeinfojson', False):
1854             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1855             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1856                 self.to_screen('[info] Video description metadata is already present')
1857             else:
1858                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1859                 try:
1860                     write_json_file(self.filter_requested_info(info_dict), infofn)
1861                 except (OSError, IOError):
1862                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1863                     return
1864
1865         self._write_thumbnails(info_dict, filename)
1866
1867         if not self.params.get('skip_download', False):
1868             try:
1869                 def dl(name, info):
1870                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1871                     for ph in self._progress_hooks:
1872                         fd.add_progress_hook(ph)
1873                     if self.params.get('verbose'):
1874                         self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
1875                     return fd.download(name, info)
1876
1877                 if info_dict.get('requested_formats') is not None:
1878                     downloaded = []
1879                     success = True
1880                     merger = FFmpegMergerPP(self)
1881                     if not merger.available:
1882                         postprocessors = []
1883                         self.report_warning('You have requested multiple '
1884                                             'formats but ffmpeg or avconv are not installed.'
1885                                             ' The formats won\'t be merged.')
1886                     else:
1887                         postprocessors = [merger]
1888
1889                     def compatible_formats(formats):
1890                         video, audio = formats
1891                         # Check extension
1892                         video_ext, audio_ext = video.get('ext'), audio.get('ext')
1893                         if video_ext and audio_ext:
1894                             COMPATIBLE_EXTS = (
1895                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1896                                 ('webm')
1897                             )
1898                             for exts in COMPATIBLE_EXTS:
1899                                 if video_ext in exts and audio_ext in exts:
1900                                     return True
1901                         # TODO: Check acodec/vcodec
1902                         return False
1903
1904                     filename_real_ext = os.path.splitext(filename)[1][1:]
1905                     filename_wo_ext = (
1906                         os.path.splitext(filename)[0]
1907                         if filename_real_ext == info_dict['ext']
1908                         else filename)
1909                     requested_formats = info_dict['requested_formats']
1910                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1911                         info_dict['ext'] = 'mkv'
1912                         self.report_warning(
1913                             'Requested formats are incompatible for merge and will be merged into mkv.')
1914                     # Ensure filename always has a correct extension for successful merge
1915                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1916                     if os.path.exists(encodeFilename(filename)):
1917                         self.to_screen(
1918                             '[download] %s has already been downloaded and '
1919                             'merged' % filename)
1920                     else:
1921                         for f in requested_formats:
1922                             new_info = dict(info_dict)
1923                             new_info.update(f)
1924                             fname = prepend_extension(
1925                                 self.prepare_filename(new_info),
1926                                 'f%s' % f['format_id'], new_info['ext'])
1927                             if not ensure_dir_exists(fname):
1928                                 return
1929                             downloaded.append(fname)
1930                             partial_success = dl(fname, new_info)
1931                             success = success and partial_success
1932                         info_dict['__postprocessors'] = postprocessors
1933                         info_dict['__files_to_merge'] = downloaded
1934                 else:
1935                     # Just a single file
1936                     success = dl(filename, info_dict)
1937             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1938                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1939                 return
1940             except (OSError, IOError) as err:
1941                 raise UnavailableVideoError(err)
1942             except (ContentTooShortError, ) as err:
1943                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1944                 return
1945
1946             if success and filename != '-':
1947                 # Fixup content
1948                 fixup_policy = self.params.get('fixup')
1949                 if fixup_policy is None:
1950                     fixup_policy = 'detect_or_warn'
1951
1952                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1953
1954                 stretched_ratio = info_dict.get('stretched_ratio')
1955                 if stretched_ratio is not None and stretched_ratio != 1:
1956                     if fixup_policy == 'warn':
1957                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1958                             info_dict['id'], stretched_ratio))
1959                     elif fixup_policy == 'detect_or_warn':
1960                         stretched_pp = FFmpegFixupStretchedPP(self)
1961                         if stretched_pp.available:
1962                             info_dict.setdefault('__postprocessors', [])
1963                             info_dict['__postprocessors'].append(stretched_pp)
1964                         else:
1965                             self.report_warning(
1966                                 '%s: Non-uniform pixel ratio (%s). %s'
1967                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1968                     else:
1969                         assert fixup_policy in ('ignore', 'never')
1970
1971                 if (info_dict.get('requested_formats') is None
1972                         and info_dict.get('container') == 'm4a_dash'):
1973                     if fixup_policy == 'warn':
1974                         self.report_warning(
1975                             '%s: writing DASH m4a. '
1976                             'Only some players support this container.'
1977                             % info_dict['id'])
1978                     elif fixup_policy == 'detect_or_warn':
1979                         fixup_pp = FFmpegFixupM4aPP(self)
1980                         if fixup_pp.available:
1981                             info_dict.setdefault('__postprocessors', [])
1982                             info_dict['__postprocessors'].append(fixup_pp)
1983                         else:
1984                             self.report_warning(
1985                                 '%s: writing DASH m4a. '
1986                                 'Only some players support this container. %s'
1987                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1988                     else:
1989                         assert fixup_policy in ('ignore', 'never')
1990
1991                 if (info_dict.get('protocol') == 'm3u8_native'
1992                         or info_dict.get('protocol') == 'm3u8'
1993                         and self.params.get('hls_prefer_native')):
1994                     if fixup_policy == 'warn':
1995                         self.report_warning('%s: malformed AAC bitstream detected.' % (
1996                             info_dict['id']))
1997                     elif fixup_policy == 'detect_or_warn':
1998                         fixup_pp = FFmpegFixupM3u8PP(self)
1999                         if fixup_pp.available:
2000                             info_dict.setdefault('__postprocessors', [])
2001                             info_dict['__postprocessors'].append(fixup_pp)
2002                         else:
2003                             self.report_warning(
2004                                 '%s: malformed AAC bitstream detected. %s'
2005                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2006                     else:
2007                         assert fixup_policy in ('ignore', 'never')
2008
2009                 try:
2010                     self.post_process(filename, info_dict)
2011                 except (PostProcessingError) as err:
2012                     self.report_error('postprocessing: %s' % str(err))
2013                     return
2014                 self.record_download_archive(info_dict)
2015
2016     def download(self, url_list):
2017         """Download a given list of URLs."""
2018         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2019         if (len(url_list) > 1
2020                 and outtmpl != '-'
2021                 and '%' not in outtmpl
2022                 and self.params.get('max_downloads') != 1):
2023             raise SameFileError(outtmpl)
2024
2025         for url in url_list:
2026             try:
2027                 # It also downloads the videos
2028                 res = self.extract_info(
2029                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2030             except UnavailableVideoError:
2031                 self.report_error('unable to download video')
2032             except MaxDownloadsReached:
2033                 self.to_screen('[info] Maximum number of downloaded files reached.')
2034                 raise
2035             else:
2036                 if self.params.get('dump_single_json', False):
2037                     self.to_stdout(json.dumps(res))
2038
2039         return self._download_retcode
2040
2041     def download_with_info_file(self, info_filename):
2042         with contextlib.closing(fileinput.FileInput(
2043                 [info_filename], mode='r',
2044                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2045             # FileInput doesn't have a read method, we can't call json.load
2046             info = self.filter_requested_info(json.loads('\n'.join(f)))
2047         try:
2048             self.process_ie_result(info, download=True)
2049         except DownloadError:
2050             webpage_url = info.get('webpage_url')
2051             if webpage_url is not None:
2052                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2053                 return self.download([webpage_url])
2054             else:
2055                 raise
2056         return self._download_retcode
2057
2058     @staticmethod
2059     def filter_requested_info(info_dict):
2060         return dict(
2061             (k, v) for k, v in info_dict.items()
2062             if k not in ['requested_formats', 'requested_subtitles'])
2063
2064     def post_process(self, filename, ie_info):
2065         """Run all the postprocessors on the given file."""
2066         info = dict(ie_info)
2067         info['filepath'] = filename
2068         pps_chain = []
2069         if ie_info.get('__postprocessors') is not None:
2070             pps_chain.extend(ie_info['__postprocessors'])
2071         pps_chain.extend(self._pps)
2072         for pp in pps_chain:
2073             files_to_delete = []
2074             try:
2075                 files_to_delete, info = pp.run(info)
2076             except PostProcessingError as e:
2077                 self.report_error(e.msg)
2078             if files_to_delete and not self.params.get('keepvideo', False):
2079                 for old_filename in files_to_delete:
2080                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2081                     try:
2082                         os.remove(encodeFilename(old_filename))
2083                     except (IOError, OSError):
2084                         self.report_warning('Unable to remove downloaded original file')
2085
2086     def _make_archive_id(self, info_dict):
2087         video_id = info_dict.get('id')
2088         if not video_id:
2089             return
2090         # Future-proof against any change in case
2091         # and backwards compatibility with prior versions
2092         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2093         if extractor is None:
2094             url = str_or_none(info_dict.get('url'))
2095             if not url:
2096                 return
2097             # Try to find matching extractor for the URL and take its ie_key
2098             for ie in self._ies:
2099                 if ie.suitable(url):
2100                     extractor = ie.ie_key()
2101                     break
2102             else:
2103                 return
2104         return extractor.lower() + ' ' + video_id
2105
2106     def in_download_archive(self, info_dict):
2107         fn = self.params.get('download_archive')
2108         if fn is None:
2109             return False
2110
2111         vid_id = self._make_archive_id(info_dict)
2112         if not vid_id:
2113             return False  # Incomplete video information
2114
2115         try:
2116             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
2117                 for line in archive_file:
2118                     if line.strip() == vid_id:
2119                         return True
2120         except IOError as ioe:
2121             if ioe.errno != errno.ENOENT:
2122                 raise
2123         return False
2124
2125     def record_download_archive(self, info_dict):
2126         fn = self.params.get('download_archive')
2127         if fn is None:
2128             return
2129         vid_id = self._make_archive_id(info_dict)
2130         assert vid_id
2131         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2132             archive_file.write(vid_id + '\n')
2133
2134     @staticmethod
2135     def format_resolution(format, default='unknown'):
2136         if format.get('vcodec') == 'none':
2137             return 'audio only'
2138         if format.get('resolution') is not None:
2139             return format['resolution']
2140         if format.get('height') is not None:
2141             if format.get('width') is not None:
2142                 res = '%sx%s' % (format['width'], format['height'])
2143             else:
2144                 res = '%sp' % format['height']
2145         elif format.get('width') is not None:
2146             res = '%dx?' % format['width']
2147         else:
2148             res = default
2149         return res
2150
2151     def _format_note(self, fdict):
2152         res = ''
2153         if fdict.get('ext') in ['f4f', 'f4m']:
2154             res += '(unsupported) '
2155         if fdict.get('language'):
2156             if res:
2157                 res += ' '
2158             res += '[%s] ' % fdict['language']
2159         if fdict.get('format_note') is not None:
2160             res += fdict['format_note'] + ' '
2161         if fdict.get('tbr') is not None:
2162             res += '%4dk ' % fdict['tbr']
2163         if fdict.get('container') is not None:
2164             if res:
2165                 res += ', '
2166             res += '%s container' % fdict['container']
2167         if (fdict.get('vcodec') is not None
2168                 and fdict.get('vcodec') != 'none'):
2169             if res:
2170                 res += ', '
2171             res += fdict['vcodec']
2172             if fdict.get('vbr') is not None:
2173                 res += '@'
2174         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2175             res += 'video@'
2176         if fdict.get('vbr') is not None:
2177             res += '%4dk' % fdict['vbr']
2178         if fdict.get('fps') is not None:
2179             if res:
2180                 res += ', '
2181             res += '%sfps' % fdict['fps']
2182         if fdict.get('acodec') is not None:
2183             if res:
2184                 res += ', '
2185             if fdict['acodec'] == 'none':
2186                 res += 'video only'
2187             else:
2188                 res += '%-5s' % fdict['acodec']
2189         elif fdict.get('abr') is not None:
2190             if res:
2191                 res += ', '
2192             res += 'audio'
2193         if fdict.get('abr') is not None:
2194             res += '@%3dk' % fdict['abr']
2195         if fdict.get('asr') is not None:
2196             res += ' (%5dHz)' % fdict['asr']
2197         if fdict.get('filesize') is not None:
2198             if res:
2199                 res += ', '
2200             res += format_bytes(fdict['filesize'])
2201         elif fdict.get('filesize_approx') is not None:
2202             if res:
2203                 res += ', '
2204             res += '~' + format_bytes(fdict['filesize_approx'])
2205         return res
2206
2207     def list_formats(self, info_dict):
2208         formats = info_dict.get('formats', [info_dict])
2209         table = [
2210             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2211             for f in formats
2212             if f.get('preference') is None or f['preference'] >= -1000]
2213         if len(formats) > 1:
2214             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2215
2216         header_line = ['format code', 'extension', 'resolution', 'note']
2217         self.to_screen(
2218             '[info] Available formats for %s:\n%s' %
2219             (info_dict['id'], render_table(header_line, table)))
2220
2221     def list_thumbnails(self, info_dict):
2222         thumbnails = info_dict.get('thumbnails')
2223         if not thumbnails:
2224             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2225             return
2226
2227         self.to_screen(
2228             '[info] Thumbnails for %s:' % info_dict['id'])
2229         self.to_screen(render_table(
2230             ['ID', 'width', 'height', 'URL'],
2231             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2232
2233     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2234         if not subtitles:
2235             self.to_screen('%s has no %s' % (video_id, name))
2236             return
2237         self.to_screen(
2238             'Available %s for %s:' % (name, video_id))
2239         self.to_screen(render_table(
2240             ['Language', 'formats'],
2241             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2242                 for lang, formats in subtitles.items()]))
2243
2244     def urlopen(self, req):
2245         """ Start an HTTP download """
2246         if isinstance(req, compat_basestring):
2247             req = sanitized_Request(req)
2248         return self._opener.open(req, timeout=self._socket_timeout)
2249
2250     def print_debug_header(self):
2251         if not self.params.get('verbose'):
2252             return
2253
2254         if type('') is not compat_str:
2255             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2256             self.report_warning(
2257                 'Your Python is broken! Update to a newer and supported version')
2258
2259         stdout_encoding = getattr(
2260             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2261         encoding_str = (
2262             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2263                 locale.getpreferredencoding(),
2264                 sys.getfilesystemencoding(),
2265                 stdout_encoding,
2266                 self.get_encoding()))
2267         write_string(encoding_str, encoding=None)
2268
2269         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2270         if _LAZY_LOADER:
2271             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2272         try:
2273             sp = subprocess.Popen(
2274                 ['git', 'rev-parse', '--short', 'HEAD'],
2275                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2276                 cwd=os.path.dirname(os.path.abspath(__file__)))
2277             out, err = sp.communicate()
2278             out = out.decode().strip()
2279             if re.match('[0-9a-f]+', out):
2280                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2281         except Exception:
2282             try:
2283                 sys.exc_clear()
2284             except Exception:
2285                 pass
2286
2287         def python_implementation():
2288             impl_name = platform.python_implementation()
2289             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2290                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2291             return impl_name
2292
2293         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2294             platform.python_version(), python_implementation(),
2295             platform_name()))
2296
2297         exe_versions = FFmpegPostProcessor.get_versions(self)
2298         exe_versions['rtmpdump'] = rtmpdump_version()
2299         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2300         exe_str = ', '.join(
2301             '%s %s' % (exe, v)
2302             for exe, v in sorted(exe_versions.items())
2303             if v
2304         )
2305         if not exe_str:
2306             exe_str = 'none'
2307         self._write_string('[debug] exe versions: %s\n' % exe_str)
2308
2309         proxy_map = {}
2310         for handler in self._opener.handlers:
2311             if hasattr(handler, 'proxies'):
2312                 proxy_map.update(handler.proxies)
2313         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2314
2315         if self.params.get('call_home', False):
2316             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2317             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2318             latest_version = self.urlopen(
2319                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2320             if version_tuple(latest_version) > version_tuple(__version__):
2321                 self.report_warning(
2322                     'You are using an outdated version (newest version: %s)! '
2323                     'See https://yt-dl.org/update if you need help updating.' %
2324                     latest_version)
2325
2326     def _setup_opener(self):
2327         timeout_val = self.params.get('socket_timeout')
2328         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2329
2330         opts_cookiefile = self.params.get('cookiefile')
2331         opts_proxy = self.params.get('proxy')
2332
2333         if opts_cookiefile is None:
2334             self.cookiejar = compat_cookiejar.CookieJar()
2335         else:
2336             opts_cookiefile = expand_path(opts_cookiefile)
2337             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2338             if os.access(opts_cookiefile, os.R_OK):
2339                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2340
2341         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2342         if opts_proxy is not None:
2343             if opts_proxy == '':
2344                 proxies = {}
2345             else:
2346                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2347         else:
2348             proxies = compat_urllib_request.getproxies()
2349             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2350             if 'http' in proxies and 'https' not in proxies:
2351                 proxies['https'] = proxies['http']
2352         proxy_handler = PerRequestProxyHandler(proxies)
2353
2354         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2355         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2356         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2357         redirect_handler = YoutubeDLRedirectHandler()
2358         data_handler = compat_urllib_request_DataHandler()
2359
2360         # When passing our own FileHandler instance, build_opener won't add the
2361         # default FileHandler and allows us to disable the file protocol, which
2362         # can be used for malicious purposes (see
2363         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2364         file_handler = compat_urllib_request.FileHandler()
2365
2366         def file_open(*args, **kwargs):
2367             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2368         file_handler.file_open = file_open
2369
2370         opener = compat_urllib_request.build_opener(
2371             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2372
2373         # Delete the default user-agent header, which would otherwise apply in
2374         # cases where our custom HTTP handler doesn't come into play
2375         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2376         opener.addheaders = []
2377         self._opener = opener
2378
2379     def encode(self, s):
2380         if isinstance(s, bytes):
2381             return s  # Already encoded
2382
2383         try:
2384             return s.encode(self.get_encoding())
2385         except UnicodeEncodeError as err:
2386             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2387             raise
2388
2389     def get_encoding(self):
2390         encoding = self.params.get('encoding')
2391         if encoding is None:
2392             encoding = preferredencoding()
2393         return encoding
2394
2395     def _write_thumbnails(self, info_dict, filename):
2396         if self.params.get('writethumbnail', False):
2397             thumbnails = info_dict.get('thumbnails')
2398             if thumbnails:
2399                 thumbnails = [thumbnails[-1]]
2400         elif self.params.get('write_all_thumbnails', False):
2401             thumbnails = info_dict.get('thumbnails')
2402         else:
2403             return
2404
2405         if not thumbnails:
2406             # No thumbnails present, so return immediately
2407             return
2408
2409         for t in thumbnails:
2410             thumb_ext = determine_ext(t['url'], 'jpg')
2411             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2412             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2413             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2414
2415             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2416                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2417                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2418             else:
2419                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2420                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2421                 try:
2422                     uf = self.urlopen(t['url'])
2423                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2424                         shutil.copyfileobj(uf, thumbf)
2425                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2426                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2427                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2428                     self.report_warning('Unable to download thumbnail "%s": %s' %
2429                                         (t['url'], error_to_compat_str(err)))