youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import io
  20 import itertools
  21 import json
  22 import locale
  23 import math
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import socket
  30 import ssl
  31 import subprocess
  32 import sys
  33 import tempfile
  34 import time
  35 import traceback
  36 import xml.etree.ElementTree
  37 import zlib
  38
  39 from .compat import (
  40     compat_HTMLParseError,
  41     compat_HTMLParser,
  42     compat_HTTPError,
  43     compat_basestring,
  44     compat_chr,
  45     compat_cookiejar,
  46     compat_ctypes_WINFUNCTYPE,
  47     compat_etree_fromstring,
  48     compat_expanduser,
  49     compat_html_entities,
  50     compat_html_entities_html5,
  51     compat_http_client,
  52     compat_integer_types,
  53     compat_kwargs,
  54     compat_os_name,
  55     compat_parse_qs,
  56     compat_shlex_quote,
  57     compat_str,
  58     compat_struct_pack,
  59     compat_struct_unpack,
  60     compat_urllib_error,
  61     compat_urllib_parse,
  62     compat_urllib_parse_urlencode,
  63     compat_urllib_parse_urlparse,
  64     compat_urllib_parse_unquote_plus,
  65     compat_urllib_request,
  66     compat_urlparse,
  67     compat_xpath,
  68 )
  69
  70 from .socks import (
  71     ProxyType,
  72     sockssocket,
  73 )
  74
  75
  76 def register_socks_protocols():
  77     # "Register" SOCKS protocols
  78     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  79     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  80     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  81         if scheme not in compat_urlparse.uses_netloc:
  82             compat_urlparse.uses_netloc.append(scheme)
  83
  84
  85 # This is not clearly defined otherwise
  86 compiled_regex_type = type(re.compile(''))
  87
  88
  89 def random_user_agent():
  90     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  91     _CHROME_VERSIONS = (
  92         '74.0.3729.129',
  93         '76.0.3780.3',
  94         '76.0.3780.2',
  95         '74.0.3729.128',
  96         '76.0.3780.1',
  97         '76.0.3780.0',
  98         '75.0.3770.15',
  99         '74.0.3729.127',
 100         '74.0.3729.126',
 101         '76.0.3779.1',
 102         '76.0.3779.0',
 103         '75.0.3770.14',
 104         '74.0.3729.125',
 105         '76.0.3778.1',
 106         '76.0.3778.0',
 107         '75.0.3770.13',
 108         '74.0.3729.124',
 109         '74.0.3729.123',
 110         '73.0.3683.121',
 111         '76.0.3777.1',
 112         '76.0.3777.0',
 113         '75.0.3770.12',
 114         '74.0.3729.122',
 115         '76.0.3776.4',
 116         '75.0.3770.11',
 117         '74.0.3729.121',
 118         '76.0.3776.3',
 119         '76.0.3776.2',
 120         '73.0.3683.120',
 121         '74.0.3729.120',
 122         '74.0.3729.119',
 123         '74.0.3729.118',
 124         '76.0.3776.1',
 125         '76.0.3776.0',
 126         '76.0.3775.5',
 127         '75.0.3770.10',
 128         '74.0.3729.117',
 129         '76.0.3775.4',
 130         '76.0.3775.3',
 131         '74.0.3729.116',
 132         '75.0.3770.9',
 133         '76.0.3775.2',
 134         '76.0.3775.1',
 135         '76.0.3775.0',
 136         '75.0.3770.8',
 137         '74.0.3729.115',
 138         '74.0.3729.114',
 139         '76.0.3774.1',
 140         '76.0.3774.0',
 141         '75.0.3770.7',
 142         '74.0.3729.113',
 143         '74.0.3729.112',
 144         '74.0.3729.111',
 145         '76.0.3773.1',
 146         '76.0.3773.0',
 147         '75.0.3770.6',
 148         '74.0.3729.110',
 149         '74.0.3729.109',
 150         '76.0.3772.1',
 151         '76.0.3772.0',
 152         '75.0.3770.5',
 153         '74.0.3729.108',
 154         '74.0.3729.107',
 155         '76.0.3771.1',
 156         '76.0.3771.0',
 157         '75.0.3770.4',
 158         '74.0.3729.106',
 159         '74.0.3729.105',
 160         '75.0.3770.3',
 161         '74.0.3729.104',
 162         '74.0.3729.103',
 163         '74.0.3729.102',
 164         '75.0.3770.2',
 165         '74.0.3729.101',
 166         '75.0.3770.1',
 167         '75.0.3770.0',
 168         '74.0.3729.100',
 169         '75.0.3769.5',
 170         '75.0.3769.4',
 171         '74.0.3729.99',
 172         '75.0.3769.3',
 173         '75.0.3769.2',
 174         '75.0.3768.6',
 175         '74.0.3729.98',
 176         '75.0.3769.1',
 177         '75.0.3769.0',
 178         '74.0.3729.97',
 179         '73.0.3683.119',
 180         '73.0.3683.118',
 181         '74.0.3729.96',
 182         '75.0.3768.5',
 183         '75.0.3768.4',
 184         '75.0.3768.3',
 185         '75.0.3768.2',
 186         '74.0.3729.95',
 187         '74.0.3729.94',
 188         '75.0.3768.1',
 189         '75.0.3768.0',
 190         '74.0.3729.93',
 191         '74.0.3729.92',
 192         '73.0.3683.117',
 193         '74.0.3729.91',
 194         '75.0.3766.3',
 195         '74.0.3729.90',
 196         '75.0.3767.2',
 197         '75.0.3767.1',
 198         '75.0.3767.0',
 199         '74.0.3729.89',
 200         '73.0.3683.116',
 201         '75.0.3766.2',
 202         '74.0.3729.88',
 203         '75.0.3766.1',
 204         '75.0.3766.0',
 205         '74.0.3729.87',
 206         '73.0.3683.115',
 207         '74.0.3729.86',
 208         '75.0.3765.1',
 209         '75.0.3765.0',
 210         '74.0.3729.85',
 211         '73.0.3683.114',
 212         '74.0.3729.84',
 213         '75.0.3764.1',
 214         '75.0.3764.0',
 215         '74.0.3729.83',
 216         '73.0.3683.113',
 217         '75.0.3763.2',
 218         '75.0.3761.4',
 219         '74.0.3729.82',
 220         '75.0.3763.1',
 221         '75.0.3763.0',
 222         '74.0.3729.81',
 223         '73.0.3683.112',
 224         '75.0.3762.1',
 225         '75.0.3762.0',
 226         '74.0.3729.80',
 227         '75.0.3761.3',
 228         '74.0.3729.79',
 229         '73.0.3683.111',
 230         '75.0.3761.2',
 231         '74.0.3729.78',
 232         '74.0.3729.77',
 233         '75.0.3761.1',
 234         '75.0.3761.0',
 235         '73.0.3683.110',
 236         '74.0.3729.76',
 237         '74.0.3729.75',
 238         '75.0.3760.0',
 239         '74.0.3729.74',
 240         '75.0.3759.8',
 241         '75.0.3759.7',
 242         '75.0.3759.6',
 243         '74.0.3729.73',
 244         '75.0.3759.5',
 245         '74.0.3729.72',
 246         '73.0.3683.109',
 247         '75.0.3759.4',
 248         '75.0.3759.3',
 249         '74.0.3729.71',
 250         '75.0.3759.2',
 251         '74.0.3729.70',
 252         '73.0.3683.108',
 253         '74.0.3729.69',
 254         '75.0.3759.1',
 255         '75.0.3759.0',
 256         '74.0.3729.68',
 257         '73.0.3683.107',
 258         '74.0.3729.67',
 259         '75.0.3758.1',
 260         '75.0.3758.0',
 261         '74.0.3729.66',
 262         '73.0.3683.106',
 263         '74.0.3729.65',
 264         '75.0.3757.1',
 265         '75.0.3757.0',
 266         '74.0.3729.64',
 267         '73.0.3683.105',
 268         '74.0.3729.63',
 269         '75.0.3756.1',
 270         '75.0.3756.0',
 271         '74.0.3729.62',
 272         '73.0.3683.104',
 273         '75.0.3755.3',
 274         '75.0.3755.2',
 275         '73.0.3683.103',
 276         '75.0.3755.1',
 277         '75.0.3755.0',
 278         '74.0.3729.61',
 279         '73.0.3683.102',
 280         '74.0.3729.60',
 281         '75.0.3754.2',
 282         '74.0.3729.59',
 283         '75.0.3753.4',
 284         '74.0.3729.58',
 285         '75.0.3754.1',
 286         '75.0.3754.0',
 287         '74.0.3729.57',
 288         '73.0.3683.101',
 289         '75.0.3753.3',
 290         '75.0.3752.2',
 291         '75.0.3753.2',
 292         '74.0.3729.56',
 293         '75.0.3753.1',
 294         '75.0.3753.0',
 295         '74.0.3729.55',
 296         '73.0.3683.100',
 297         '74.0.3729.54',
 298         '75.0.3752.1',
 299         '75.0.3752.0',
 300         '74.0.3729.53',
 301         '73.0.3683.99',
 302         '74.0.3729.52',
 303         '75.0.3751.1',
 304         '75.0.3751.0',
 305         '74.0.3729.51',
 306         '73.0.3683.98',
 307         '74.0.3729.50',
 308         '75.0.3750.0',
 309         '74.0.3729.49',
 310         '74.0.3729.48',
 311         '74.0.3729.47',
 312         '75.0.3749.3',
 313         '74.0.3729.46',
 314         '73.0.3683.97',
 315         '75.0.3749.2',
 316         '74.0.3729.45',
 317         '75.0.3749.1',
 318         '75.0.3749.0',
 319         '74.0.3729.44',
 320         '73.0.3683.96',
 321         '74.0.3729.43',
 322         '74.0.3729.42',
 323         '75.0.3748.1',
 324         '75.0.3748.0',
 325         '74.0.3729.41',
 326         '75.0.3747.1',
 327         '73.0.3683.95',
 328         '75.0.3746.4',
 329         '74.0.3729.40',
 330         '74.0.3729.39',
 331         '75.0.3747.0',
 332         '75.0.3746.3',
 333         '75.0.3746.2',
 334         '74.0.3729.38',
 335         '75.0.3746.1',
 336         '75.0.3746.0',
 337         '74.0.3729.37',
 338         '73.0.3683.94',
 339         '75.0.3745.5',
 340         '75.0.3745.4',
 341         '75.0.3745.3',
 342         '75.0.3745.2',
 343         '74.0.3729.36',
 344         '75.0.3745.1',
 345         '75.0.3745.0',
 346         '75.0.3744.2',
 347         '74.0.3729.35',
 348         '73.0.3683.93',
 349         '74.0.3729.34',
 350         '75.0.3744.1',
 351         '75.0.3744.0',
 352         '74.0.3729.33',
 353         '73.0.3683.92',
 354         '74.0.3729.32',
 355         '74.0.3729.31',
 356         '73.0.3683.91',
 357         '75.0.3741.2',
 358         '75.0.3740.5',
 359         '74.0.3729.30',
 360         '75.0.3741.1',
 361         '75.0.3741.0',
 362         '74.0.3729.29',
 363         '75.0.3740.4',
 364         '73.0.3683.90',
 365         '74.0.3729.28',
 366         '75.0.3740.3',
 367         '73.0.3683.89',
 368         '75.0.3740.2',
 369         '74.0.3729.27',
 370         '75.0.3740.1',
 371         '75.0.3740.0',
 372         '74.0.3729.26',
 373         '73.0.3683.88',
 374         '73.0.3683.87',
 375         '74.0.3729.25',
 376         '75.0.3739.1',
 377         '75.0.3739.0',
 378         '73.0.3683.86',
 379         '74.0.3729.24',
 380         '73.0.3683.85',
 381         '75.0.3738.4',
 382         '75.0.3738.3',
 383         '75.0.3738.2',
 384         '75.0.3738.1',
 385         '75.0.3738.0',
 386         '74.0.3729.23',
 387         '73.0.3683.84',
 388         '74.0.3729.22',
 389         '74.0.3729.21',
 390         '75.0.3737.1',
 391         '75.0.3737.0',
 392         '74.0.3729.20',
 393         '73.0.3683.83',
 394         '74.0.3729.19',
 395         '75.0.3736.1',
 396         '75.0.3736.0',
 397         '74.0.3729.18',
 398         '73.0.3683.82',
 399         '74.0.3729.17',
 400         '75.0.3735.1',
 401         '75.0.3735.0',
 402         '74.0.3729.16',
 403         '73.0.3683.81',
 404         '75.0.3734.1',
 405         '75.0.3734.0',
 406         '74.0.3729.15',
 407         '73.0.3683.80',
 408         '74.0.3729.14',
 409         '75.0.3733.1',
 410         '75.0.3733.0',
 411         '75.0.3732.1',
 412         '74.0.3729.13',
 413         '74.0.3729.12',
 414         '73.0.3683.79',
 415         '74.0.3729.11',
 416         '75.0.3732.0',
 417         '74.0.3729.10',
 418         '73.0.3683.78',
 419         '74.0.3729.9',
 420         '74.0.3729.8',
 421         '74.0.3729.7',
 422         '75.0.3731.3',
 423         '75.0.3731.2',
 424         '75.0.3731.0',
 425         '74.0.3729.6',
 426         '73.0.3683.77',
 427         '73.0.3683.76',
 428         '75.0.3730.5',
 429         '75.0.3730.4',
 430         '73.0.3683.75',
 431         '74.0.3729.5',
 432         '73.0.3683.74',
 433         '75.0.3730.3',
 434         '75.0.3730.2',
 435         '74.0.3729.4',
 436         '73.0.3683.73',
 437         '73.0.3683.72',
 438         '75.0.3730.1',
 439         '75.0.3730.0',
 440         '74.0.3729.3',
 441         '73.0.3683.71',
 442         '74.0.3729.2',
 443         '73.0.3683.70',
 444         '74.0.3729.1',
 445         '74.0.3729.0',
 446         '74.0.3726.4',
 447         '73.0.3683.69',
 448         '74.0.3726.3',
 449         '74.0.3728.0',
 450         '74.0.3726.2',
 451         '73.0.3683.68',
 452         '74.0.3726.1',
 453         '74.0.3726.0',
 454         '74.0.3725.4',
 455         '73.0.3683.67',
 456         '73.0.3683.66',
 457         '74.0.3725.3',
 458         '74.0.3725.2',
 459         '74.0.3725.1',
 460         '74.0.3724.8',
 461         '74.0.3725.0',
 462         '73.0.3683.65',
 463         '74.0.3724.7',
 464         '74.0.3724.6',
 465         '74.0.3724.5',
 466         '74.0.3724.4',
 467         '74.0.3724.3',
 468         '74.0.3724.2',
 469         '74.0.3724.1',
 470         '74.0.3724.0',
 471         '73.0.3683.64',
 472         '74.0.3723.1',
 473         '74.0.3723.0',
 474         '73.0.3683.63',
 475         '74.0.3722.1',
 476         '74.0.3722.0',
 477         '73.0.3683.62',
 478         '74.0.3718.9',
 479         '74.0.3702.3',
 480         '74.0.3721.3',
 481         '74.0.3721.2',
 482         '74.0.3721.1',
 483         '74.0.3721.0',
 484         '74.0.3720.6',
 485         '73.0.3683.61',
 486         '72.0.3626.122',
 487         '73.0.3683.60',
 488         '74.0.3720.5',
 489         '72.0.3626.121',
 490         '74.0.3718.8',
 491         '74.0.3720.4',
 492         '74.0.3720.3',
 493         '74.0.3718.7',
 494         '74.0.3720.2',
 495         '74.0.3720.1',
 496         '74.0.3720.0',
 497         '74.0.3718.6',
 498         '74.0.3719.5',
 499         '73.0.3683.59',
 500         '74.0.3718.5',
 501         '74.0.3718.4',
 502         '74.0.3719.4',
 503         '74.0.3719.3',
 504         '74.0.3719.2',
 505         '74.0.3719.1',
 506         '73.0.3683.58',
 507         '74.0.3719.0',
 508         '73.0.3683.57',
 509         '73.0.3683.56',
 510         '74.0.3718.3',
 511         '73.0.3683.55',
 512         '74.0.3718.2',
 513         '74.0.3718.1',
 514         '74.0.3718.0',
 515         '73.0.3683.54',
 516         '74.0.3717.2',
 517         '73.0.3683.53',
 518         '74.0.3717.1',
 519         '74.0.3717.0',
 520         '73.0.3683.52',
 521         '74.0.3716.1',
 522         '74.0.3716.0',
 523         '73.0.3683.51',
 524         '74.0.3715.1',
 525         '74.0.3715.0',
 526         '73.0.3683.50',
 527         '74.0.3711.2',
 528         '74.0.3714.2',
 529         '74.0.3713.3',
 530         '74.0.3714.1',
 531         '74.0.3714.0',
 532         '73.0.3683.49',
 533         '74.0.3713.1',
 534         '74.0.3713.0',
 535         '72.0.3626.120',
 536         '73.0.3683.48',
 537         '74.0.3712.2',
 538         '74.0.3712.1',
 539         '74.0.3712.0',
 540         '73.0.3683.47',
 541         '72.0.3626.119',
 542         '73.0.3683.46',
 543         '74.0.3710.2',
 544         '72.0.3626.118',
 545         '74.0.3711.1',
 546         '74.0.3711.0',
 547         '73.0.3683.45',
 548         '72.0.3626.117',
 549         '74.0.3710.1',
 550         '74.0.3710.0',
 551         '73.0.3683.44',
 552         '72.0.3626.116',
 553         '74.0.3709.1',
 554         '74.0.3709.0',
 555         '74.0.3704.9',
 556         '73.0.3683.43',
 557         '72.0.3626.115',
 558         '74.0.3704.8',
 559         '74.0.3704.7',
 560         '74.0.3708.0',
 561         '74.0.3706.7',
 562         '74.0.3704.6',
 563         '73.0.3683.42',
 564         '72.0.3626.114',
 565         '74.0.3706.6',
 566         '72.0.3626.113',
 567         '74.0.3704.5',
 568         '74.0.3706.5',
 569         '74.0.3706.4',
 570         '74.0.3706.3',
 571         '74.0.3706.2',
 572         '74.0.3706.1',
 573         '74.0.3706.0',
 574         '73.0.3683.41',
 575         '72.0.3626.112',
 576         '74.0.3705.1',
 577         '74.0.3705.0',
 578         '73.0.3683.40',
 579         '72.0.3626.111',
 580         '73.0.3683.39',
 581         '74.0.3704.4',
 582         '73.0.3683.38',
 583         '74.0.3704.3',
 584         '74.0.3704.2',
 585         '74.0.3704.1',
 586         '74.0.3704.0',
 587         '73.0.3683.37',
 588         '72.0.3626.110',
 589         '72.0.3626.109',
 590         '74.0.3703.3',
 591         '74.0.3703.2',
 592         '73.0.3683.36',
 593         '74.0.3703.1',
 594         '74.0.3703.0',
 595         '73.0.3683.35',
 596         '72.0.3626.108',
 597         '74.0.3702.2',
 598         '74.0.3699.3',
 599         '74.0.3702.1',
 600         '74.0.3702.0',
 601         '73.0.3683.34',
 602         '72.0.3626.107',
 603         '73.0.3683.33',
 604         '74.0.3701.1',
 605         '74.0.3701.0',
 606         '73.0.3683.32',
 607         '73.0.3683.31',
 608         '72.0.3626.105',
 609         '74.0.3700.1',
 610         '74.0.3700.0',
 611         '73.0.3683.29',
 612         '72.0.3626.103',
 613         '74.0.3699.2',
 614         '74.0.3699.1',
 615         '74.0.3699.0',
 616         '73.0.3683.28',
 617         '72.0.3626.102',
 618         '73.0.3683.27',
 619         '73.0.3683.26',
 620         '74.0.3698.0',
 621         '74.0.3696.2',
 622         '72.0.3626.101',
 623         '73.0.3683.25',
 624         '74.0.3696.1',
 625         '74.0.3696.0',
 626         '74.0.3694.8',
 627         '72.0.3626.100',
 628         '74.0.3694.7',
 629         '74.0.3694.6',
 630         '74.0.3694.5',
 631         '74.0.3694.4',
 632         '72.0.3626.99',
 633         '72.0.3626.98',
 634         '74.0.3694.3',
 635         '73.0.3683.24',
 636         '72.0.3626.97',
 637         '72.0.3626.96',
 638         '72.0.3626.95',
 639         '73.0.3683.23',
 640         '72.0.3626.94',
 641         '73.0.3683.22',
 642         '73.0.3683.21',
 643         '72.0.3626.93',
 644         '74.0.3694.2',
 645         '72.0.3626.92',
 646         '74.0.3694.1',
 647         '74.0.3694.0',
 648         '74.0.3693.6',
 649         '73.0.3683.20',
 650         '72.0.3626.91',
 651         '74.0.3693.5',
 652         '74.0.3693.4',
 653         '74.0.3693.3',
 654         '74.0.3693.2',
 655         '73.0.3683.19',
 656         '74.0.3693.1',
 657         '74.0.3693.0',
 658         '73.0.3683.18',
 659         '72.0.3626.90',
 660         '74.0.3692.1',
 661         '74.0.3692.0',
 662         '73.0.3683.17',
 663         '72.0.3626.89',
 664         '74.0.3687.3',
 665         '74.0.3691.1',
 666         '74.0.3691.0',
 667         '73.0.3683.16',
 668         '72.0.3626.88',
 669         '72.0.3626.87',
 670         '73.0.3683.15',
 671         '74.0.3690.1',
 672         '74.0.3690.0',
 673         '73.0.3683.14',
 674         '72.0.3626.86',
 675         '73.0.3683.13',
 676         '73.0.3683.12',
 677         '74.0.3689.1',
 678         '74.0.3689.0',
 679         '73.0.3683.11',
 680         '72.0.3626.85',
 681         '73.0.3683.10',
 682         '72.0.3626.84',
 683         '73.0.3683.9',
 684         '74.0.3688.1',
 685         '74.0.3688.0',
 686         '73.0.3683.8',
 687         '72.0.3626.83',
 688         '74.0.3687.2',
 689         '74.0.3687.1',
 690         '74.0.3687.0',
 691         '73.0.3683.7',
 692         '72.0.3626.82',
 693         '74.0.3686.4',
 694         '72.0.3626.81',
 695         '74.0.3686.3',
 696         '74.0.3686.2',
 697         '74.0.3686.1',
 698         '74.0.3686.0',
 699         '73.0.3683.6',
 700         '72.0.3626.80',
 701         '74.0.3685.1',
 702         '74.0.3685.0',
 703         '73.0.3683.5',
 704         '72.0.3626.79',
 705         '74.0.3684.1',
 706         '74.0.3684.0',
 707         '73.0.3683.4',
 708         '72.0.3626.78',
 709         '72.0.3626.77',
 710         '73.0.3683.3',
 711         '73.0.3683.2',
 712         '72.0.3626.76',
 713         '73.0.3683.1',
 714         '73.0.3683.0',
 715         '72.0.3626.75',
 716         '71.0.3578.141',
 717         '73.0.3682.1',
 718         '73.0.3682.0',
 719         '72.0.3626.74',
 720         '71.0.3578.140',
 721         '73.0.3681.4',
 722         '73.0.3681.3',
 723         '73.0.3681.2',
 724         '73.0.3681.1',
 725         '73.0.3681.0',
 726         '72.0.3626.73',
 727         '71.0.3578.139',
 728         '72.0.3626.72',
 729         '72.0.3626.71',
 730         '73.0.3680.1',
 731         '73.0.3680.0',
 732         '72.0.3626.70',
 733         '71.0.3578.138',
 734         '73.0.3678.2',
 735         '73.0.3679.1',
 736         '73.0.3679.0',
 737         '72.0.3626.69',
 738         '71.0.3578.137',
 739         '73.0.3678.1',
 740         '73.0.3678.0',
 741         '71.0.3578.136',
 742         '73.0.3677.1',
 743         '73.0.3677.0',
 744         '72.0.3626.68',
 745         '72.0.3626.67',
 746         '71.0.3578.135',
 747         '73.0.3676.1',
 748         '73.0.3676.0',
 749         '73.0.3674.2',
 750         '72.0.3626.66',
 751         '71.0.3578.134',
 752         '73.0.3674.1',
 753         '73.0.3674.0',
 754         '72.0.3626.65',
 755         '71.0.3578.133',
 756         '73.0.3673.2',
 757         '73.0.3673.1',
 758         '73.0.3673.0',
 759         '72.0.3626.64',
 760         '71.0.3578.132',
 761         '72.0.3626.63',
 762         '72.0.3626.62',
 763         '72.0.3626.61',
 764         '72.0.3626.60',
 765         '73.0.3672.1',
 766         '73.0.3672.0',
 767         '72.0.3626.59',
 768         '71.0.3578.131',
 769         '73.0.3671.3',
 770         '73.0.3671.2',
 771         '73.0.3671.1',
 772         '73.0.3671.0',
 773         '72.0.3626.58',
 774         '71.0.3578.130',
 775         '73.0.3670.1',
 776         '73.0.3670.0',
 777         '72.0.3626.57',
 778         '71.0.3578.129',
 779         '73.0.3669.1',
 780         '73.0.3669.0',
 781         '72.0.3626.56',
 782         '71.0.3578.128',
 783         '73.0.3668.2',
 784         '73.0.3668.1',
 785         '73.0.3668.0',
 786         '72.0.3626.55',
 787         '71.0.3578.127',
 788         '73.0.3667.2',
 789         '73.0.3667.1',
 790         '73.0.3667.0',
 791         '72.0.3626.54',
 792         '71.0.3578.126',
 793         '73.0.3666.1',
 794         '73.0.3666.0',
 795         '72.0.3626.53',
 796         '71.0.3578.125',
 797         '73.0.3665.4',
 798         '73.0.3665.3',
 799         '72.0.3626.52',
 800         '73.0.3665.2',
 801         '73.0.3664.4',
 802         '73.0.3665.1',
 803         '73.0.3665.0',
 804         '72.0.3626.51',
 805         '71.0.3578.124',
 806         '72.0.3626.50',
 807         '73.0.3664.3',
 808         '73.0.3664.2',
 809         '73.0.3664.1',
 810         '73.0.3664.0',
 811         '73.0.3663.2',
 812         '72.0.3626.49',
 813         '71.0.3578.123',
 814         '73.0.3663.1',
 815         '73.0.3663.0',
 816         '72.0.3626.48',
 817         '71.0.3578.122',
 818         '73.0.3662.1',
 819         '73.0.3662.0',
 820         '72.0.3626.47',
 821         '71.0.3578.121',
 822         '73.0.3661.1',
 823         '72.0.3626.46',
 824         '73.0.3661.0',
 825         '72.0.3626.45',
 826         '71.0.3578.120',
 827         '73.0.3660.2',
 828         '73.0.3660.1',
 829         '73.0.3660.0',
 830         '72.0.3626.44',
 831         '71.0.3578.119',
 832         '73.0.3659.1',
 833         '73.0.3659.0',
 834         '72.0.3626.43',
 835         '71.0.3578.118',
 836         '73.0.3658.1',
 837         '73.0.3658.0',
 838         '72.0.3626.42',
 839         '71.0.3578.117',
 840         '73.0.3657.1',
 841         '73.0.3657.0',
 842         '72.0.3626.41',
 843         '71.0.3578.116',
 844         '73.0.3656.1',
 845         '73.0.3656.0',
 846         '72.0.3626.40',
 847         '71.0.3578.115',
 848         '73.0.3655.1',
 849         '73.0.3655.0',
 850         '72.0.3626.39',
 851         '71.0.3578.114',
 852         '73.0.3654.1',
 853         '73.0.3654.0',
 854         '72.0.3626.38',
 855         '71.0.3578.113',
 856         '73.0.3653.1',
 857         '73.0.3653.0',
 858         '72.0.3626.37',
 859         '71.0.3578.112',
 860         '73.0.3652.1',
 861         '73.0.3652.0',
 862         '72.0.3626.36',
 863         '71.0.3578.111',
 864         '73.0.3651.1',
 865         '73.0.3651.0',
 866         '72.0.3626.35',
 867         '71.0.3578.110',
 868         '73.0.3650.1',
 869         '73.0.3650.0',
 870         '72.0.3626.34',
 871         '71.0.3578.109',
 872         '73.0.3649.1',
 873         '73.0.3649.0',
 874         '72.0.3626.33',
 875         '71.0.3578.108',
 876         '73.0.3648.2',
 877         '73.0.3648.1',
 878         '73.0.3648.0',
 879         '72.0.3626.32',
 880         '71.0.3578.107',
 881         '73.0.3647.2',
 882         '73.0.3647.1',
 883         '73.0.3647.0',
 884         '72.0.3626.31',
 885         '71.0.3578.106',
 886         '73.0.3635.3',
 887         '73.0.3646.2',
 888         '73.0.3646.1',
 889         '73.0.3646.0',
 890         '72.0.3626.30',
 891         '71.0.3578.105',
 892         '72.0.3626.29',
 893         '73.0.3645.2',
 894         '73.0.3645.1',
 895         '73.0.3645.0',
 896         '72.0.3626.28',
 897         '71.0.3578.104',
 898         '72.0.3626.27',
 899         '72.0.3626.26',
 900         '72.0.3626.25',
 901         '72.0.3626.24',
 902         '73.0.3644.0',
 903         '73.0.3643.2',
 904         '72.0.3626.23',
 905         '71.0.3578.103',
 906         '73.0.3643.1',
 907         '73.0.3643.0',
 908         '72.0.3626.22',
 909         '71.0.3578.102',
 910         '73.0.3642.1',
 911         '73.0.3642.0',
 912         '72.0.3626.21',
 913         '71.0.3578.101',
 914         '73.0.3641.1',
 915         '73.0.3641.0',
 916         '72.0.3626.20',
 917         '71.0.3578.100',
 918         '72.0.3626.19',
 919         '73.0.3640.1',
 920         '73.0.3640.0',
 921         '72.0.3626.18',
 922         '73.0.3639.1',
 923         '71.0.3578.99',
 924         '73.0.3639.0',
 925         '72.0.3626.17',
 926         '73.0.3638.2',
 927         '72.0.3626.16',
 928         '73.0.3638.1',
 929         '73.0.3638.0',
 930         '72.0.3626.15',
 931         '71.0.3578.98',
 932         '73.0.3635.2',
 933         '71.0.3578.97',
 934         '73.0.3637.1',
 935         '73.0.3637.0',
 936         '72.0.3626.14',
 937         '71.0.3578.96',
 938         '71.0.3578.95',
 939         '72.0.3626.13',
 940         '71.0.3578.94',
 941         '73.0.3636.2',
 942         '71.0.3578.93',
 943         '73.0.3636.1',
 944         '73.0.3636.0',
 945         '72.0.3626.12',
 946         '71.0.3578.92',
 947         '73.0.3635.1',
 948         '73.0.3635.0',
 949         '72.0.3626.11',
 950         '71.0.3578.91',
 951         '73.0.3634.2',
 952         '73.0.3634.1',
 953         '73.0.3634.0',
 954         '72.0.3626.10',
 955         '71.0.3578.90',
 956         '71.0.3578.89',
 957         '73.0.3633.2',
 958         '73.0.3633.1',
 959         '73.0.3633.0',
 960         '72.0.3610.4',
 961         '72.0.3626.9',
 962         '71.0.3578.88',
 963         '73.0.3632.5',
 964         '73.0.3632.4',
 965         '73.0.3632.3',
 966         '73.0.3632.2',
 967         '73.0.3632.1',
 968         '73.0.3632.0',
 969         '72.0.3626.8',
 970         '71.0.3578.87',
 971         '73.0.3631.2',
 972         '73.0.3631.1',
 973         '73.0.3631.0',
 974         '72.0.3626.7',
 975         '71.0.3578.86',
 976         '72.0.3626.6',
 977         '73.0.3630.1',
 978         '73.0.3630.0',
 979         '72.0.3626.5',
 980         '71.0.3578.85',
 981         '72.0.3626.4',
 982         '73.0.3628.3',
 983         '73.0.3628.2',
 984         '73.0.3629.1',
 985         '73.0.3629.0',
 986         '72.0.3626.3',
 987         '71.0.3578.84',
 988         '73.0.3628.1',
 989         '73.0.3628.0',
 990         '71.0.3578.83',
 991         '73.0.3627.1',
 992         '73.0.3627.0',
 993         '72.0.3626.2',
 994         '71.0.3578.82',
 995         '71.0.3578.81',
 996         '71.0.3578.80',
 997         '72.0.3626.1',
 998         '72.0.3626.0',
 999         '71.0.3578.79',
1000         '70.0.3538.124',
1001         '71.0.3578.78',
1002         '72.0.3623.4',
1003         '72.0.3625.2',
1004         '72.0.3625.1',
1005         '72.0.3625.0',
1006         '71.0.3578.77',
1007         '70.0.3538.123',
1008         '72.0.3624.4',
1009         '72.0.3624.3',
1010         '72.0.3624.2',
1011         '71.0.3578.76',
1012         '72.0.3624.1',
1013         '72.0.3624.0',
1014         '72.0.3623.3',
1015         '71.0.3578.75',
1016         '70.0.3538.122',
1017         '71.0.3578.74',
1018         '72.0.3623.2',
1019         '72.0.3610.3',
1020         '72.0.3623.1',
1021         '72.0.3623.0',
1022         '72.0.3622.3',
1023         '72.0.3622.2',
1024         '71.0.3578.73',
1025         '70.0.3538.121',
1026         '72.0.3622.1',
1027         '72.0.3622.0',
1028         '71.0.3578.72',
1029         '70.0.3538.120',
1030         '72.0.3621.1',
1031         '72.0.3621.0',
1032         '71.0.3578.71',
1033         '70.0.3538.119',
1034         '72.0.3620.1',
1035         '72.0.3620.0',
1036         '71.0.3578.70',
1037         '70.0.3538.118',
1038         '71.0.3578.69',
1039         '72.0.3619.1',
1040         '72.0.3619.0',
1041         '71.0.3578.68',
1042         '70.0.3538.117',
1043         '71.0.3578.67',
1044         '72.0.3618.1',
1045         '72.0.3618.0',
1046         '71.0.3578.66',
1047         '70.0.3538.116',
1048         '72.0.3617.1',
1049         '72.0.3617.0',
1050         '71.0.3578.65',
1051         '70.0.3538.115',
1052         '72.0.3602.3',
1053         '71.0.3578.64',
1054         '72.0.3616.1',
1055         '72.0.3616.0',
1056         '71.0.3578.63',
1057         '70.0.3538.114',
1058         '71.0.3578.62',
1059         '72.0.3615.1',
1060         '72.0.3615.0',
1061         '71.0.3578.61',
1062         '70.0.3538.113',
1063         '72.0.3614.1',
1064         '72.0.3614.0',
1065         '71.0.3578.60',
1066         '70.0.3538.112',
1067         '72.0.3613.1',
1068         '72.0.3613.0',
1069         '71.0.3578.59',
1070         '70.0.3538.111',
1071         '72.0.3612.2',
1072         '72.0.3612.1',
1073         '72.0.3612.0',
1074         '70.0.3538.110',
1075         '71.0.3578.58',
1076         '70.0.3538.109',
1077         '72.0.3611.2',
1078         '72.0.3611.1',
1079         '72.0.3611.0',
1080         '71.0.3578.57',
1081         '70.0.3538.108',
1082         '72.0.3610.2',
1083         '71.0.3578.56',
1084         '71.0.3578.55',
1085         '72.0.3610.1',
1086         '72.0.3610.0',
1087         '71.0.3578.54',
1088         '70.0.3538.107',
1089         '71.0.3578.53',
1090         '72.0.3609.3',
1091         '71.0.3578.52',
1092         '72.0.3609.2',
1093         '71.0.3578.51',
1094         '72.0.3608.5',
1095         '72.0.3609.1',
1096         '72.0.3609.0',
1097         '71.0.3578.50',
1098         '70.0.3538.106',
1099         '72.0.3608.4',
1100         '72.0.3608.3',
1101         '72.0.3608.2',
1102         '71.0.3578.49',
1103         '72.0.3608.1',
1104         '72.0.3608.0',
1105         '70.0.3538.105',
1106         '71.0.3578.48',
1107         '72.0.3607.1',
1108         '72.0.3607.0',
1109         '71.0.3578.47',
1110         '70.0.3538.104',
1111         '72.0.3606.2',
1112         '72.0.3606.1',
1113         '72.0.3606.0',
1114         '71.0.3578.46',
1115         '70.0.3538.103',
1116         '70.0.3538.102',
1117         '72.0.3605.3',
1118         '72.0.3605.2',
1119         '72.0.3605.1',
1120         '72.0.3605.0',
1121         '71.0.3578.45',
1122         '70.0.3538.101',
1123         '71.0.3578.44',
1124         '71.0.3578.43',
1125         '70.0.3538.100',
1126         '70.0.3538.99',
1127         '71.0.3578.42',
1128         '72.0.3604.1',
1129         '72.0.3604.0',
1130         '71.0.3578.41',
1131         '70.0.3538.98',
1132         '71.0.3578.40',
1133         '72.0.3603.2',
1134         '72.0.3603.1',
1135         '72.0.3603.0',
1136         '71.0.3578.39',
1137         '70.0.3538.97',
1138         '72.0.3602.2',
1139         '71.0.3578.38',
1140         '71.0.3578.37',
1141         '72.0.3602.1',
1142         '72.0.3602.0',
1143         '71.0.3578.36',
1144         '70.0.3538.96',
1145         '72.0.3601.1',
1146         '72.0.3601.0',
1147         '71.0.3578.35',
1148         '70.0.3538.95',
1149         '72.0.3600.1',
1150         '72.0.3600.0',
1151         '71.0.3578.34',
1152         '70.0.3538.94',
1153         '72.0.3599.3',
1154         '72.0.3599.2',
1155         '72.0.3599.1',
1156         '72.0.3599.0',
1157         '71.0.3578.33',
1158         '70.0.3538.93',
1159         '72.0.3598.1',
1160         '72.0.3598.0',
1161         '71.0.3578.32',
1162         '70.0.3538.87',
1163         '72.0.3597.1',
1164         '72.0.3597.0',
1165         '72.0.3596.2',
1166         '71.0.3578.31',
1167         '70.0.3538.86',
1168         '71.0.3578.30',
1169         '71.0.3578.29',
1170         '72.0.3596.1',
1171         '72.0.3596.0',
1172         '71.0.3578.28',
1173         '70.0.3538.85',
1174         '72.0.3595.2',
1175         '72.0.3591.3',
1176         '72.0.3595.1',
1177         '72.0.3595.0',
1178         '71.0.3578.27',
1179         '70.0.3538.84',
1180         '72.0.3594.1',
1181         '72.0.3594.0',
1182         '71.0.3578.26',
1183         '70.0.3538.83',
1184         '72.0.3593.2',
1185         '72.0.3593.1',
1186         '72.0.3593.0',
1187         '71.0.3578.25',
1188         '70.0.3538.82',
1189         '72.0.3589.3',
1190         '72.0.3592.2',
1191         '72.0.3592.1',
1192         '72.0.3592.0',
1193         '71.0.3578.24',
1194         '72.0.3589.2',
1195         '70.0.3538.81',
1196         '70.0.3538.80',
1197         '72.0.3591.2',
1198         '72.0.3591.1',
1199         '72.0.3591.0',
1200         '71.0.3578.23',
1201         '70.0.3538.79',
1202         '71.0.3578.22',
1203         '72.0.3590.1',
1204         '72.0.3590.0',
1205         '71.0.3578.21',
1206         '70.0.3538.78',
1207         '70.0.3538.77',
1208         '72.0.3589.1',
1209         '72.0.3589.0',
1210         '71.0.3578.20',
1211         '70.0.3538.76',
1212         '71.0.3578.19',
1213         '70.0.3538.75',
1214         '72.0.3588.1',
1215         '72.0.3588.0',
1216         '71.0.3578.18',
1217         '70.0.3538.74',
1218         '72.0.3586.2',
1219         '72.0.3587.0',
1220         '71.0.3578.17',
1221         '70.0.3538.73',
1222         '72.0.3586.1',
1223         '72.0.3586.0',
1224         '71.0.3578.16',
1225         '70.0.3538.72',
1226         '72.0.3585.1',
1227         '72.0.3585.0',
1228         '71.0.3578.15',
1229         '70.0.3538.71',
1230         '71.0.3578.14',
1231         '72.0.3584.1',
1232         '72.0.3584.0',
1233         '71.0.3578.13',
1234         '70.0.3538.70',
1235         '72.0.3583.2',
1236         '71.0.3578.12',
1237         '72.0.3583.1',
1238         '72.0.3583.0',
1239         '71.0.3578.11',
1240         '70.0.3538.69',
1241         '71.0.3578.10',
1242         '72.0.3582.0',
1243         '72.0.3581.4',
1244         '71.0.3578.9',
1245         '70.0.3538.67',
1246         '72.0.3581.3',
1247         '72.0.3581.2',
1248         '72.0.3581.1',
1249         '72.0.3581.0',
1250         '71.0.3578.8',
1251         '70.0.3538.66',
1252         '72.0.3580.1',
1253         '72.0.3580.0',
1254         '71.0.3578.7',
1255         '70.0.3538.65',
1256         '71.0.3578.6',
1257         '72.0.3579.1',
1258         '72.0.3579.0',
1259         '71.0.3578.5',
1260         '70.0.3538.64',
1261         '71.0.3578.4',
1262         '71.0.3578.3',
1263         '71.0.3578.2',
1264         '71.0.3578.1',
1265         '71.0.3578.0',
1266         '70.0.3538.63',
1267         '69.0.3497.128',
1268         '70.0.3538.62',
1269         '70.0.3538.61',
1270         '70.0.3538.60',
1271         '70.0.3538.59',
1272         '71.0.3577.1',
1273         '71.0.3577.0',
1274         '70.0.3538.58',
1275         '69.0.3497.127',
1276         '71.0.3576.2',
1277         '71.0.3576.1',
1278         '71.0.3576.0',
1279         '70.0.3538.57',
1280         '70.0.3538.56',
1281         '71.0.3575.2',
1282         '70.0.3538.55',
1283         '69.0.3497.126',
1284         '70.0.3538.54',
1285         '71.0.3575.1',
1286         '71.0.3575.0',
1287         '71.0.3574.1',
1288         '71.0.3574.0',
1289         '70.0.3538.53',
1290         '69.0.3497.125',
1291         '70.0.3538.52',
1292         '71.0.3573.1',
1293         '71.0.3573.0',
1294         '70.0.3538.51',
1295         '69.0.3497.124',
1296         '71.0.3572.1',
1297         '71.0.3572.0',
1298         '70.0.3538.50',
1299         '69.0.3497.123',
1300         '71.0.3571.2',
1301         '70.0.3538.49',
1302         '69.0.3497.122',
1303         '71.0.3571.1',
1304         '71.0.3571.0',
1305         '70.0.3538.48',
1306         '69.0.3497.121',
1307         '71.0.3570.1',
1308         '71.0.3570.0',
1309         '70.0.3538.47',
1310         '69.0.3497.120',
1311         '71.0.3568.2',
1312         '71.0.3569.1',
1313         '71.0.3569.0',
1314         '70.0.3538.46',
1315         '69.0.3497.119',
1316         '70.0.3538.45',
1317         '71.0.3568.1',
1318         '71.0.3568.0',
1319         '70.0.3538.44',
1320         '69.0.3497.118',
1321         '70.0.3538.43',
1322         '70.0.3538.42',
1323         '71.0.3567.1',
1324         '71.0.3567.0',
1325         '70.0.3538.41',
1326         '69.0.3497.117',
1327         '71.0.3566.1',
1328         '71.0.3566.0',
1329         '70.0.3538.40',
1330         '69.0.3497.116',
1331         '71.0.3565.1',
1332         '71.0.3565.0',
1333         '70.0.3538.39',
1334         '69.0.3497.115',
1335         '71.0.3564.1',
1336         '71.0.3564.0',
1337         '70.0.3538.38',
1338         '69.0.3497.114',
1339         '71.0.3563.0',
1340         '71.0.3562.2',
1341         '70.0.3538.37',
1342         '69.0.3497.113',
1343         '70.0.3538.36',
1344         '70.0.3538.35',
1345         '71.0.3562.1',
1346         '71.0.3562.0',
1347         '70.0.3538.34',
1348         '69.0.3497.112',
1349         '70.0.3538.33',
1350         '71.0.3561.1',
1351         '71.0.3561.0',
1352         '70.0.3538.32',
1353         '69.0.3497.111',
1354         '71.0.3559.6',
1355         '71.0.3560.1',
1356         '71.0.3560.0',
1357         '71.0.3559.5',
1358         '71.0.3559.4',
1359         '70.0.3538.31',
1360         '69.0.3497.110',
1361         '71.0.3559.3',
1362         '70.0.3538.30',
1363         '69.0.3497.109',
1364         '71.0.3559.2',
1365         '71.0.3559.1',
1366         '71.0.3559.0',
1367         '70.0.3538.29',
1368         '69.0.3497.108',
1369         '71.0.3558.2',
1370         '71.0.3558.1',
1371         '71.0.3558.0',
1372         '70.0.3538.28',
1373         '69.0.3497.107',
1374         '71.0.3557.2',
1375         '71.0.3557.1',
1376         '71.0.3557.0',
1377         '70.0.3538.27',
1378         '69.0.3497.106',
1379         '71.0.3554.4',
1380         '70.0.3538.26',
1381         '71.0.3556.1',
1382         '71.0.3556.0',
1383         '70.0.3538.25',
1384         '71.0.3554.3',
1385         '69.0.3497.105',
1386         '71.0.3554.2',
1387         '70.0.3538.24',
1388         '69.0.3497.104',
1389         '71.0.3555.2',
1390         '70.0.3538.23',
1391         '71.0.3555.1',
1392         '71.0.3555.0',
1393         '70.0.3538.22',
1394         '69.0.3497.103',
1395         '71.0.3554.1',
1396         '71.0.3554.0',
1397         '70.0.3538.21',
1398         '69.0.3497.102',
1399         '71.0.3553.3',
1400         '70.0.3538.20',
1401         '69.0.3497.101',
1402         '71.0.3553.2',
1403         '69.0.3497.100',
1404         '71.0.3553.1',
1405         '71.0.3553.0',
1406         '70.0.3538.19',
1407         '69.0.3497.99',
1408         '69.0.3497.98',
1409         '69.0.3497.97',
1410         '71.0.3552.6',
1411         '71.0.3552.5',
1412         '71.0.3552.4',
1413         '71.0.3552.3',
1414         '71.0.3552.2',
1415         '71.0.3552.1',
1416         '71.0.3552.0',
1417         '70.0.3538.18',
1418         '69.0.3497.96',
1419         '71.0.3551.3',
1420         '71.0.3551.2',
1421         '71.0.3551.1',
1422         '71.0.3551.0',
1423         '70.0.3538.17',
1424         '69.0.3497.95',
1425         '71.0.3550.3',
1426         '71.0.3550.2',
1427         '71.0.3550.1',
1428         '71.0.3550.0',
1429         '70.0.3538.16',
1430         '69.0.3497.94',
1431         '71.0.3549.1',
1432         '71.0.3549.0',
1433         '70.0.3538.15',
1434         '69.0.3497.93',
1435         '69.0.3497.92',
1436         '71.0.3548.1',
1437         '71.0.3548.0',
1438         '70.0.3538.14',
1439         '69.0.3497.91',
1440         '71.0.3547.1',
1441         '71.0.3547.0',
1442         '70.0.3538.13',
1443         '69.0.3497.90',
1444         '71.0.3546.2',
1445         '69.0.3497.89',
1446         '71.0.3546.1',
1447         '71.0.3546.0',
1448         '70.0.3538.12',
1449         '69.0.3497.88',
1450         '71.0.3545.4',
1451         '71.0.3545.3',
1452         '71.0.3545.2',
1453         '71.0.3545.1',
1454         '71.0.3545.0',
1455         '70.0.3538.11',
1456         '69.0.3497.87',
1457         '71.0.3544.5',
1458         '71.0.3544.4',
1459         '71.0.3544.3',
1460         '71.0.3544.2',
1461         '71.0.3544.1',
1462         '71.0.3544.0',
1463         '69.0.3497.86',
1464         '70.0.3538.10',
1465         '69.0.3497.85',
1466         '70.0.3538.9',
1467         '69.0.3497.84',
1468         '71.0.3543.4',
1469         '70.0.3538.8',
1470         '71.0.3543.3',
1471         '71.0.3543.2',
1472         '71.0.3543.1',
1473         '71.0.3543.0',
1474         '70.0.3538.7',
1475         '69.0.3497.83',
1476         '71.0.3542.2',
1477         '71.0.3542.1',
1478         '71.0.3542.0',
1479         '70.0.3538.6',
1480         '69.0.3497.82',
1481         '69.0.3497.81',
1482         '71.0.3541.1',
1483         '71.0.3541.0',
1484         '70.0.3538.5',
1485         '69.0.3497.80',
1486         '71.0.3540.1',
1487         '71.0.3540.0',
1488         '70.0.3538.4',
1489         '69.0.3497.79',
1490         '70.0.3538.3',
1491         '71.0.3539.1',
1492         '71.0.3539.0',
1493         '69.0.3497.78',
1494         '68.0.3440.134',
1495         '69.0.3497.77',
1496         '70.0.3538.2',
1497         '70.0.3538.1',
1498         '70.0.3538.0',
1499         '69.0.3497.76',
1500         '68.0.3440.133',
1501         '69.0.3497.75',
1502         '70.0.3537.2',
1503         '70.0.3537.1',
1504         '70.0.3537.0',
1505         '69.0.3497.74',
1506         '68.0.3440.132',
1507         '70.0.3536.0',
1508         '70.0.3535.5',
1509         '70.0.3535.4',
1510         '70.0.3535.3',
1511         '69.0.3497.73',
1512         '68.0.3440.131',
1513         '70.0.3532.8',
1514         '70.0.3532.7',
1515         '69.0.3497.72',
1516         '69.0.3497.71',
1517         '70.0.3535.2',
1518         '70.0.3535.1',
1519         '70.0.3535.0',
1520         '69.0.3497.70',
1521         '68.0.3440.130',
1522         '69.0.3497.69',
1523         '68.0.3440.129',
1524         '70.0.3534.4',
1525         '70.0.3534.3',
1526         '70.0.3534.2',
1527         '70.0.3534.1',
1528         '70.0.3534.0',
1529         '69.0.3497.68',
1530         '68.0.3440.128',
1531         '70.0.3533.2',
1532         '70.0.3533.1',
1533         '70.0.3533.0',
1534         '69.0.3497.67',
1535         '68.0.3440.127',
1536         '70.0.3532.6',
1537         '70.0.3532.5',
1538         '70.0.3532.4',
1539         '69.0.3497.66',
1540         '68.0.3440.126',
1541         '70.0.3532.3',
1542         '70.0.3532.2',
1543         '70.0.3532.1',
1544         '69.0.3497.60',
1545         '69.0.3497.65',
1546         '69.0.3497.64',
1547         '70.0.3532.0',
1548         '70.0.3531.0',
1549         '70.0.3530.4',
1550         '70.0.3530.3',
1551         '70.0.3530.2',
1552         '69.0.3497.58',
1553         '68.0.3440.125',
1554         '69.0.3497.57',
1555         '69.0.3497.56',
1556         '69.0.3497.55',
1557         '69.0.3497.54',
1558         '70.0.3530.1',
1559         '70.0.3530.0',
1560         '69.0.3497.53',
1561         '68.0.3440.124',
1562         '69.0.3497.52',
1563         '70.0.3529.3',
1564         '70.0.3529.2',
1565         '70.0.3529.1',
1566         '70.0.3529.0',
1567         '69.0.3497.51',
1568         '70.0.3528.4',
1569         '68.0.3440.123',
1570         '70.0.3528.3',
1571         '70.0.3528.2',
1572         '70.0.3528.1',
1573         '70.0.3528.0',
1574         '69.0.3497.50',
1575         '68.0.3440.122',
1576         '70.0.3527.1',
1577         '70.0.3527.0',
1578         '69.0.3497.49',
1579         '68.0.3440.121',
1580         '70.0.3526.1',
1581         '70.0.3526.0',
1582         '68.0.3440.120',
1583         '69.0.3497.48',
1584         '69.0.3497.47',
1585         '68.0.3440.119',
1586         '68.0.3440.118',
1587         '70.0.3525.5',
1588         '70.0.3525.4',
1589         '70.0.3525.3',
1590         '68.0.3440.117',
1591         '69.0.3497.46',
1592         '70.0.3525.2',
1593         '70.0.3525.1',
1594         '70.0.3525.0',
1595         '69.0.3497.45',
1596         '68.0.3440.116',
1597         '70.0.3524.4',
1598         '70.0.3524.3',
1599         '69.0.3497.44',
1600         '70.0.3524.2',
1601         '70.0.3524.1',
1602         '70.0.3524.0',
1603         '70.0.3523.2',
1604         '69.0.3497.43',
1605         '68.0.3440.115',
1606         '70.0.3505.9',
1607         '69.0.3497.42',
1608         '70.0.3505.8',
1609         '70.0.3523.1',
1610         '70.0.3523.0',
1611         '69.0.3497.41',
1612         '68.0.3440.114',
1613         '70.0.3505.7',
1614         '69.0.3497.40',
1615         '70.0.3522.1',
1616         '70.0.3522.0',
1617         '70.0.3521.2',
1618         '69.0.3497.39',
1619         '68.0.3440.113',
1620         '70.0.3505.6',
1621         '70.0.3521.1',
1622         '70.0.3521.0',
1623         '69.0.3497.38',
1624         '68.0.3440.112',
1625         '70.0.3520.1',
1626         '70.0.3520.0',
1627         '69.0.3497.37',
1628         '68.0.3440.111',
1629         '70.0.3519.3',
1630         '70.0.3519.2',
1631         '70.0.3519.1',
1632         '70.0.3519.0',
1633         '69.0.3497.36',
1634         '68.0.3440.110',
1635         '70.0.3518.1',
1636         '70.0.3518.0',
1637         '69.0.3497.35',
1638         '69.0.3497.34',
1639         '68.0.3440.109',
1640         '70.0.3517.1',
1641         '70.0.3517.0',
1642         '69.0.3497.33',
1643         '68.0.3440.108',
1644         '69.0.3497.32',
1645         '70.0.3516.3',
1646         '70.0.3516.2',
1647         '70.0.3516.1',
1648         '70.0.3516.0',
1649         '69.0.3497.31',
1650         '68.0.3440.107',
1651         '70.0.3515.4',
1652         '68.0.3440.106',
1653         '70.0.3515.3',
1654         '70.0.3515.2',
1655         '70.0.3515.1',
1656         '70.0.3515.0',
1657         '69.0.3497.30',
1658         '68.0.3440.105',
1659         '68.0.3440.104',
1660         '70.0.3514.2',
1661         '70.0.3514.1',
1662         '70.0.3514.0',
1663         '69.0.3497.29',
1664         '68.0.3440.103',
1665         '70.0.3513.1',
1666         '70.0.3513.0',
1667         '69.0.3497.28',
1668     )
1669     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1670
1671
1672 std_headers = {
1673     'User-Agent': random_user_agent(),
1674     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1675     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1676     'Accept-Encoding': 'gzip, deflate',
1677     'Accept-Language': 'en-us,en;q=0.5',
1678 }
1679
1680
1681 USER_AGENTS = {
1682     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1683 }
1684
1685
1686 NO_DEFAULT = object()
1687
1688 ENGLISH_MONTH_NAMES = [
1689     'January', 'February', 'March', 'April', 'May', 'June',
1690     'July', 'August', 'September', 'October', 'November', 'December']
1691
1692 MONTH_NAMES = {
1693     'en': ENGLISH_MONTH_NAMES,
1694     'fr': [
1695         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1696         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1697 }
1698
1699 KNOWN_EXTENSIONS = (
1700     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1701     'flv', 'f4v', 'f4a', 'f4b',
1702     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1703     'mkv', 'mka', 'mk3d',
1704     'avi', 'divx',
1705     'mov',
1706     'asf', 'wmv', 'wma',
1707     '3gp', '3g2',
1708     'mp3',
1709     'flac',
1710     'ape',
1711     'wav',
1712     'f4f', 'f4m', 'm3u8', 'smil')
1713
1714 # needed for sanitizing filenames in restricted mode
1715 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1716                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1717                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1718
1719 DATE_FORMATS = (
1720     '%d %B %Y',
1721     '%d %b %Y',
1722     '%B %d %Y',
1723     '%B %dst %Y',
1724     '%B %dnd %Y',
1725     '%B %drd %Y',
1726     '%B %dth %Y',
1727     '%b %d %Y',
1728     '%b %dst %Y',
1729     '%b %dnd %Y',
1730     '%b %drd %Y',
1731     '%b %dth %Y',
1732     '%b %dst %Y %I:%M',
1733     '%b %dnd %Y %I:%M',
1734     '%b %drd %Y %I:%M',
1735     '%b %dth %Y %I:%M',
1736     '%Y %m %d',
1737     '%Y-%m-%d',
1738     '%Y/%m/%d',
1739     '%Y/%m/%d %H:%M',
1740     '%Y/%m/%d %H:%M:%S',
1741     '%Y-%m-%d %H:%M',
1742     '%Y-%m-%d %H:%M:%S',
1743     '%Y-%m-%d %H:%M:%S.%f',
1744     '%d.%m.%Y %H:%M',
1745     '%d.%m.%Y %H.%M',
1746     '%Y-%m-%dT%H:%M:%SZ',
1747     '%Y-%m-%dT%H:%M:%S.%fZ',
1748     '%Y-%m-%dT%H:%M:%S.%f0Z',
1749     '%Y-%m-%dT%H:%M:%S',
1750     '%Y-%m-%dT%H:%M:%S.%f',
1751     '%Y-%m-%dT%H:%M',
1752     '%b %d %Y at %H:%M',
1753     '%b %d %Y at %H:%M:%S',
1754     '%B %d %Y at %H:%M',
1755     '%B %d %Y at %H:%M:%S',
1756 )
1757
1758 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1759 DATE_FORMATS_DAY_FIRST.extend([
1760     '%d-%m-%Y',
1761     '%d.%m.%Y',
1762     '%d.%m.%y',
1763     '%d/%m/%Y',
1764     '%d/%m/%y',
1765     '%d/%m/%Y %H:%M:%S',
1766 ])
1767
1768 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1769 DATE_FORMATS_MONTH_FIRST.extend([
1770     '%m-%d-%Y',
1771     '%m.%d.%Y',
1772     '%m/%d/%Y',
1773     '%m/%d/%y',
1774     '%m/%d/%Y %H:%M:%S',
1775 ])
1776
1777 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1778 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1779
1780
1781 def preferredencoding():
1782     """Get preferred encoding.
1783
1784     Returns the best encoding scheme for the system, based on
1785     locale.getpreferredencoding() and some further tweaks.
1786     """
1787     try:
1788         pref = locale.getpreferredencoding()
1789         'TEST'.encode(pref)
1790     except Exception:
1791         pref = 'UTF-8'
1792
1793     return pref
1794
1795
1796 def write_json_file(obj, fn):
1797     """ Encode obj as JSON and write it to fn, atomically if possible """
1798
1799     fn = encodeFilename(fn)
1800     if sys.version_info < (3, 0) and sys.platform != 'win32':
1801         encoding = get_filesystem_encoding()
1802         # os.path.basename returns a bytes object, but NamedTemporaryFile
1803         # will fail if the filename contains non ascii characters unless we
1804         # use a unicode object
1805         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1806         # the same for os.path.dirname
1807         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1808     else:
1809         path_basename = os.path.basename
1810         path_dirname = os.path.dirname
1811
1812     args = {
1813         'suffix': '.tmp',
1814         'prefix': path_basename(fn) + '.',
1815         'dir': path_dirname(fn),
1816         'delete': False,
1817     }
1818
1819     # In Python 2.x, json.dump expects a bytestream.
1820     # In Python 3.x, it writes to a character stream
1821     if sys.version_info < (3, 0):
1822         args['mode'] = 'wb'
1823     else:
1824         args.update({
1825             'mode': 'w',
1826             'encoding': 'utf-8',
1827         })
1828
1829     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1830
1831     try:
1832         with tf:
1833             json.dump(obj, tf)
1834         if sys.platform == 'win32':
1835             # Need to remove existing file on Windows, else os.rename raises
1836             # WindowsError or FileExistsError.
1837             try:
1838                 os.unlink(fn)
1839             except OSError:
1840                 pass
1841         try:
1842             mask = os.umask(0)
1843             os.umask(mask)
1844             os.chmod(tf.name, 0o666 & ~mask)
1845         except OSError:
1846             pass
1847         os.rename(tf.name, fn)
1848     except Exception:
1849         try:
1850             os.remove(tf.name)
1851         except OSError:
1852             pass
1853         raise
1854
1855
1856 if sys.version_info >= (2, 7):
1857     def find_xpath_attr(node, xpath, key, val=None):
1858         """ Find the xpath xpath[@key=val] """
1859         assert re.match(r'^[a-zA-Z_-]+$', key)
1860         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1861         return node.find(expr)
1862 else:
1863     def find_xpath_attr(node, xpath, key, val=None):
1864         for f in node.findall(compat_xpath(xpath)):
1865             if key not in f.attrib:
1866                 continue
1867             if val is None or f.attrib.get(key) == val:
1868                 return f
1869         return None
1870
1871 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1872 # the namespace parameter
1873
1874
1875 def xpath_with_ns(path, ns_map):
1876     components = [c.split(':') for c in path.split('/')]
1877     replaced = []
1878     for c in components:
1879         if len(c) == 1:
1880             replaced.append(c[0])
1881         else:
1882             ns, tag = c
1883             replaced.append('{%s}%s' % (ns_map[ns], tag))
1884     return '/'.join(replaced)
1885
1886
1887 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1888     def _find_xpath(xpath):
1889         return node.find(compat_xpath(xpath))
1890
1891     if isinstance(xpath, (str, compat_str)):
1892         n = _find_xpath(xpath)
1893     else:
1894         for xp in xpath:
1895             n = _find_xpath(xp)
1896             if n is not None:
1897                 break
1898
1899     if n is None:
1900         if default is not NO_DEFAULT:
1901             return default
1902         elif fatal:
1903             name = xpath if name is None else name
1904             raise ExtractorError('Could not find XML element %s' % name)
1905         else:
1906             return None
1907     return n
1908
1909
1910 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1911     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1912     if n is None or n == default:
1913         return n
1914     if n.text is None:
1915         if default is not NO_DEFAULT:
1916             return default
1917         elif fatal:
1918             name = xpath if name is None else name
1919             raise ExtractorError('Could not find XML element\'s text %s' % name)
1920         else:
1921             return None
1922     return n.text
1923
1924
1925 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1926     n = find_xpath_attr(node, xpath, key)
1927     if n is None:
1928         if default is not NO_DEFAULT:
1929             return default
1930         elif fatal:
1931             name = '%s[@%s]' % (xpath, key) if name is None else name
1932             raise ExtractorError('Could not find XML attribute %s' % name)
1933         else:
1934             return None
1935     return n.attrib[key]
1936
1937
1938 def get_element_by_id(id, html):
1939     """Return the content of the tag with the specified ID in the passed HTML document"""
1940     return get_element_by_attribute('id', id, html)
1941
1942
1943 def get_element_by_class(class_name, html):
1944     """Return the content of the first tag with the specified class in the passed HTML document"""
1945     retval = get_elements_by_class(class_name, html)
1946     return retval[0] if retval else None
1947
1948
1949 def get_element_by_attribute(attribute, value, html, escape_value=True):
1950     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1951     return retval[0] if retval else None
1952
1953
1954 def get_elements_by_class(class_name, html):
1955     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1956     return get_elements_by_attribute(
1957         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1958         html, escape_value=False)
1959
1960
1961 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1962     """Return the content of the tag with the specified attribute in the passed HTML document"""
1963
1964     value = re.escape(value) if escape_value else value
1965
1966     retlist = []
1967     for m in re.finditer(r'''(?xs)
1968         <([a-zA-Z0-9:._-]+)
1969          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1970          \s+%s=['"]?%s['"]?
1971          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1972         \s*>
1973         (?P<content>.*?)
1974         </\1>
1975     ''' % (re.escape(attribute), value), html):
1976         res = m.group('content')
1977
1978         if res.startswith('"') or res.startswith("'"):
1979             res = res[1:-1]
1980
1981         retlist.append(unescapeHTML(res))
1982
1983     return retlist
1984
1985
1986 class HTMLAttributeParser(compat_HTMLParser):
1987     """Trivial HTML parser to gather the attributes for a single element"""
1988     def __init__(self):
1989         self.attrs = {}
1990         compat_HTMLParser.__init__(self)
1991
1992     def handle_starttag(self, tag, attrs):
1993         self.attrs = dict(attrs)
1994
1995
1996 def extract_attributes(html_element):
1997     """Given a string for an HTML element such as
1998     <el
1999          a="foo" B="bar" c="&98;az" d=boz
2000          empty= noval entity="&amp;"
2001          sq='"' dq="'"
2002     >
2003     Decode and return a dictionary of attributes.
2004     {
2005         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2006         'empty': '', 'noval': None, 'entity': '&',
2007         'sq': '"', 'dq': '\''
2008     }.
2009     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2010     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2011     """
2012     parser = HTMLAttributeParser()
2013     try:
2014         parser.feed(html_element)
2015         parser.close()
2016     # Older Python may throw HTMLParseError in case of malformed HTML
2017     except compat_HTMLParseError:
2018         pass
2019     return parser.attrs
2020
2021
2022 def clean_html(html):
2023     """Clean an HTML snippet into a readable string"""
2024
2025     if html is None:  # Convenience for sanitizing descriptions etc.
2026         return html
2027
2028     # Newline vs <br />
2029     html = html.replace('\n', ' ')
2030     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2031     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2032     # Strip html tags
2033     html = re.sub('<.*?>', '', html)
2034     # Replace html entities
2035     html = unescapeHTML(html)
2036     return html.strip()
2037
2038
2039 def sanitize_open(filename, open_mode):
2040     """Try to open the given filename, and slightly tweak it if this fails.
2041
2042     Attempts to open the given filename. If this fails, it tries to change
2043     the filename slightly, step by step, until it's either able to open it
2044     or it fails and raises a final exception, like the standard open()
2045     function.
2046
2047     It returns the tuple (stream, definitive_file_name).
2048     """
2049     try:
2050         if filename == '-':
2051             if sys.platform == 'win32':
2052                 import msvcrt
2053                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2054             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2055         stream = open(encodeFilename(filename), open_mode)
2056         return (stream, filename)
2057     except (IOError, OSError) as err:
2058         if err.errno in (errno.EACCES,):
2059             raise
2060
2061         # In case of error, try to remove win32 forbidden chars
2062         alt_filename = sanitize_path(filename)
2063         if alt_filename == filename:
2064             raise
2065         else:
2066             # An exception here should be caught in the caller
2067             stream = open(encodeFilename(alt_filename), open_mode)
2068             return (stream, alt_filename)
2069
2070
2071 def timeconvert(timestr):
2072     """Convert RFC 2822 defined time string into system timestamp"""
2073     timestamp = None
2074     timetuple = email.utils.parsedate_tz(timestr)
2075     if timetuple is not None:
2076         timestamp = email.utils.mktime_tz(timetuple)
2077     return timestamp
2078
2079
2080 def sanitize_filename(s, restricted=False, is_id=False):
2081     """Sanitizes a string so it could be used as part of a filename.
2082     If restricted is set, use a stricter subset of allowed characters.
2083     Set is_id if this is not an arbitrary string, but an ID that should be kept
2084     if possible.
2085     """
2086     def replace_insane(char):
2087         if restricted and char in ACCENT_CHARS:
2088             return ACCENT_CHARS[char]
2089         if char == '?' or ord(char) < 32 or ord(char) == 127:
2090             return ''
2091         elif char == '"':
2092             return '' if restricted else '\''
2093         elif char == ':':
2094             return '_-' if restricted else ' -'
2095         elif char in '\\/|*<>':
2096             return '_'
2097         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2098             return '_'
2099         if restricted and ord(char) > 127:
2100             return '_'
2101         return char
2102
2103     # Handle timestamps
2104     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2105     result = ''.join(map(replace_insane, s))
2106     if not is_id:
2107         while '__' in result:
2108             result = result.replace('__', '_')
2109         result = result.strip('_')
2110         # Common case of "Foreign band name - English song title"
2111         if restricted and result.startswith('-_'):
2112             result = result[2:]
2113         if result.startswith('-'):
2114             result = '_' + result[len('-'):]
2115         result = result.lstrip('.')
2116         if not result:
2117             result = '_'
2118     return result
2119
2120
2121 def sanitize_path(s):
2122     """Sanitizes and normalizes path on Windows"""
2123     if sys.platform != 'win32':
2124         return s
2125     drive_or_unc, _ = os.path.splitdrive(s)
2126     if sys.version_info < (2, 7) and not drive_or_unc:
2127         drive_or_unc, _ = os.path.splitunc(s)
2128     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2129     if drive_or_unc:
2130         norm_path.pop(0)
2131     sanitized_path = [
2132         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2133         for path_part in norm_path]
2134     if drive_or_unc:
2135         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2136     return os.path.join(*sanitized_path)
2137
2138
2139 def sanitize_url(url):
2140     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2141     # the number of unwanted failures due to missing protocol
2142     if url.startswith('//'):
2143         return 'http:%s' % url
2144     # Fix some common typos seen so far
2145     COMMON_TYPOS = (
2146         # https://github.com/ytdl-org/youtube-dl/issues/15649
2147         (r'^httpss://', r'https://'),
2148         # https://bx1.be/lives/direct-tv/
2149         (r'^rmtp([es]?)://', r'rtmp\1://'),
2150     )
2151     for mistake, fixup in COMMON_TYPOS:
2152         if re.match(mistake, url):
2153             return re.sub(mistake, fixup, url)
2154     return url
2155
2156
2157 def sanitized_Request(url, *args, **kwargs):
2158     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2159
2160
2161 def expand_path(s):
2162     """Expand shell variables and ~"""
2163     return os.path.expandvars(compat_expanduser(s))
2164
2165
2166 def orderedSet(iterable):
2167     """ Remove all duplicates from the input iterable """
2168     res = []
2169     for el in iterable:
2170         if el not in res:
2171             res.append(el)
2172     return res
2173
2174
2175 def _htmlentity_transform(entity_with_semicolon):
2176     """Transforms an HTML entity to a character."""
2177     entity = entity_with_semicolon[:-1]
2178
2179     # Known non-numeric HTML entity
2180     if entity in compat_html_entities.name2codepoint:
2181         return compat_chr(compat_html_entities.name2codepoint[entity])
2182
2183     # TODO: HTML5 allows entities without a semicolon. For example,
2184     # '&Eacuteric' should be decoded as 'Éric'.
2185     if entity_with_semicolon in compat_html_entities_html5:
2186         return compat_html_entities_html5[entity_with_semicolon]
2187
2188     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2189     if mobj is not None:
2190         numstr = mobj.group(1)
2191         if numstr.startswith('x'):
2192             base = 16
2193             numstr = '0%s' % numstr
2194         else:
2195             base = 10
2196         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2197         try:
2198             return compat_chr(int(numstr, base))
2199         except ValueError:
2200             pass
2201
2202     # Unknown entity in name, return its literal representation
2203     return '&%s;' % entity
2204
2205
2206 def unescapeHTML(s):
2207     if s is None:
2208         return None
2209     assert type(s) == compat_str
2210
2211     return re.sub(
2212         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2213
2214
2215 def get_subprocess_encoding():
2216     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2217         # For subprocess calls, encode with locale encoding
2218         # Refer to http://stackoverflow.com/a/9951851/35070
2219         encoding = preferredencoding()
2220     else:
2221         encoding = sys.getfilesystemencoding()
2222     if encoding is None:
2223         encoding = 'utf-8'
2224     return encoding
2225
2226
2227 def encodeFilename(s, for_subprocess=False):
2228     """
2229     @param s The name of the file
2230     """
2231
2232     assert type(s) == compat_str
2233
2234     # Python 3 has a Unicode API
2235     if sys.version_info >= (3, 0):
2236         return s
2237
2238     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2239     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2240     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2241     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2242         return s
2243
2244     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2245     if sys.platform.startswith('java'):
2246         return s
2247
2248     return s.encode(get_subprocess_encoding(), 'ignore')
2249
2250
2251 def decodeFilename(b, for_subprocess=False):
2252
2253     if sys.version_info >= (3, 0):
2254         return b
2255
2256     if not isinstance(b, bytes):
2257         return b
2258
2259     return b.decode(get_subprocess_encoding(), 'ignore')
2260
2261
2262 def encodeArgument(s):
2263     if not isinstance(s, compat_str):
2264         # Legacy code that uses byte strings
2265         # Uncomment the following line after fixing all post processors
2266         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2267         s = s.decode('ascii')
2268     return encodeFilename(s, True)
2269
2270
2271 def decodeArgument(b):
2272     return decodeFilename(b, True)
2273
2274
2275 def decodeOption(optval):
2276     if optval is None:
2277         return optval
2278     if isinstance(optval, bytes):
2279         optval = optval.decode(preferredencoding())
2280
2281     assert isinstance(optval, compat_str)
2282     return optval
2283
2284
2285 def formatSeconds(secs):
2286     if secs > 3600:
2287         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2288     elif secs > 60:
2289         return '%d:%02d' % (secs // 60, secs % 60)
2290     else:
2291         return '%d' % secs
2292
2293
2294 def make_HTTPS_handler(params, **kwargs):
2295     opts_no_check_certificate = params.get('nocheckcertificate', False)
2296     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2297         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2298         if opts_no_check_certificate:
2299             context.check_hostname = False
2300             context.verify_mode = ssl.CERT_NONE
2301         try:
2302             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2303         except TypeError:
2304             # Python 2.7.8
2305             # (create_default_context present but HTTPSHandler has no context=)
2306             pass
2307
2308     if sys.version_info < (3, 2):
2309         return YoutubeDLHTTPSHandler(params, **kwargs)
2310     else:  # Python < 3.4
2311         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2312         context.verify_mode = (ssl.CERT_NONE
2313                                if opts_no_check_certificate
2314                                else ssl.CERT_REQUIRED)
2315         context.set_default_verify_paths()
2316         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2317
2318
2319 def bug_reports_message():
2320     if ytdl_is_updateable():
2321         update_cmd = 'type  youtube-dl -U  to update'
2322     else:
2323         update_cmd = 'see  https://yt-dl.org/update  on how to update'
2324     msg = '; please report this issue on https://yt-dl.org/bug .'
2325     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2326     msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2327     return msg
2328
2329
2330 class YoutubeDLError(Exception):
2331     """Base exception for YoutubeDL errors."""
2332     pass
2333
2334
2335 class ExtractorError(YoutubeDLError):
2336     """Error during info extraction."""
2337
2338     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2339         """ tb, if given, is the original traceback (so that it can be printed out).
2340         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2341         """
2342
2343         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2344             expected = True
2345         if video_id is not None:
2346             msg = video_id + ': ' + msg
2347         if cause:
2348             msg += ' (caused by %r)' % cause
2349         if not expected:
2350             msg += bug_reports_message()
2351         super(ExtractorError, self).__init__(msg)
2352
2353         self.traceback = tb
2354         self.exc_info = sys.exc_info()  # preserve original exception
2355         self.cause = cause
2356         self.video_id = video_id
2357
2358     def format_traceback(self):
2359         if self.traceback is None:
2360             return None
2361         return ''.join(traceback.format_tb(self.traceback))
2362
2363
2364 class UnsupportedError(ExtractorError):
2365     def __init__(self, url):
2366         super(UnsupportedError, self).__init__(
2367             'Unsupported URL: %s' % url, expected=True)
2368         self.url = url
2369
2370
2371 class RegexNotFoundError(ExtractorError):
2372     """Error when a regex didn't match"""
2373     pass
2374
2375
2376 class GeoRestrictedError(ExtractorError):
2377     """Geographic restriction Error exception.
2378
2379     This exception may be thrown when a video is not available from your
2380     geographic location due to geographic restrictions imposed by a website.
2381     """
2382     def __init__(self, msg, countries=None):
2383         super(GeoRestrictedError, self).__init__(msg, expected=True)
2384         self.msg = msg
2385         self.countries = countries
2386
2387
2388 class DownloadError(YoutubeDLError):
2389     """Download Error exception.
2390
2391     This exception may be thrown by FileDownloader objects if they are not
2392     configured to continue on errors. They will contain the appropriate
2393     error message.
2394     """
2395
2396     def __init__(self, msg, exc_info=None):
2397         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2398         super(DownloadError, self).__init__(msg)
2399         self.exc_info = exc_info
2400
2401
2402 class SameFileError(YoutubeDLError):
2403     """Same File exception.
2404
2405     This exception will be thrown by FileDownloader objects if they detect
2406     multiple files would have to be downloaded to the same file on disk.
2407     """
2408     pass
2409
2410
2411 class PostProcessingError(YoutubeDLError):
2412     """Post Processing exception.
2413
2414     This exception may be raised by PostProcessor's .run() method to
2415     indicate an error in the postprocessing task.
2416     """
2417
2418     def __init__(self, msg):
2419         super(PostProcessingError, self).__init__(msg)
2420         self.msg = msg
2421
2422
2423 class MaxDownloadsReached(YoutubeDLError):
2424     """ --max-downloads limit has been reached. """
2425     pass
2426
2427
2428 class UnavailableVideoError(YoutubeDLError):
2429     """Unavailable Format exception.
2430
2431     This exception will be thrown when a video is requested
2432     in a format that is not available for that video.
2433     """
2434     pass
2435
2436
2437 class ContentTooShortError(YoutubeDLError):
2438     """Content Too Short exception.
2439
2440     This exception may be raised by FileDownloader objects when a file they
2441     download is too small for what the server announced first, indicating
2442     the connection was probably interrupted.
2443     """
2444
2445     def __init__(self, downloaded, expected):
2446         super(ContentTooShortError, self).__init__(
2447             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2448         )
2449         # Both in bytes
2450         self.downloaded = downloaded
2451         self.expected = expected
2452
2453
2454 class XAttrMetadataError(YoutubeDLError):
2455     def __init__(self, code=None, msg='Unknown error'):
2456         super(XAttrMetadataError, self).__init__(msg)
2457         self.code = code
2458         self.msg = msg
2459
2460         # Parsing code and msg
2461         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2462                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2463             self.reason = 'NO_SPACE'
2464         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2465             self.reason = 'VALUE_TOO_LONG'
2466         else:
2467             self.reason = 'NOT_SUPPORTED'
2468
2469
2470 class XAttrUnavailableError(YoutubeDLError):
2471     pass
2472
2473
2474 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2475     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2476     # expected HTTP responses to meet HTTP/1.0 or later (see also
2477     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2478     if sys.version_info < (3, 0):
2479         kwargs['strict'] = True
2480     hc = http_class(*args, **compat_kwargs(kwargs))
2481     source_address = ydl_handler._params.get('source_address')
2482
2483     if source_address is not None:
2484         # This is to workaround _create_connection() from socket where it will try all
2485         # address data from getaddrinfo() including IPv6. This filters the result from
2486         # getaddrinfo() based on the source_address value.
2487         # This is based on the cpython socket.create_connection() function.
2488         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2489         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2490             host, port = address
2491             err = None
2492             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2493             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2494             ip_addrs = [addr for addr in addrs if addr[0] == af]
2495             if addrs and not ip_addrs:
2496                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2497                 raise socket.error(
2498                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2499                     % (ip_version, source_address[0]))
2500             for res in ip_addrs:
2501                 af, socktype, proto, canonname, sa = res
2502                 sock = None
2503                 try:
2504                     sock = socket.socket(af, socktype, proto)
2505                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2506                         sock.settimeout(timeout)
2507                     sock.bind(source_address)
2508                     sock.connect(sa)
2509                     err = None  # Explicitly break reference cycle
2510                     return sock
2511                 except socket.error as _:
2512                     err = _
2513                     if sock is not None:
2514                         sock.close()
2515             if err is not None:
2516                 raise err
2517             else:
2518                 raise socket.error('getaddrinfo returns an empty list')
2519         if hasattr(hc, '_create_connection'):
2520             hc._create_connection = _create_connection
2521         sa = (source_address, 0)
2522         if hasattr(hc, 'source_address'):  # Python 2.7+
2523             hc.source_address = sa
2524         else:  # Python 2.6
2525             def _hc_connect(self, *args, **kwargs):
2526                 sock = _create_connection(
2527                     (self.host, self.port), self.timeout, sa)
2528                 if is_https:
2529                     self.sock = ssl.wrap_socket(
2530                         sock, self.key_file, self.cert_file,
2531                         ssl_version=ssl.PROTOCOL_TLSv1)
2532                 else:
2533                     self.sock = sock
2534             hc.connect = functools.partial(_hc_connect, hc)
2535
2536     return hc
2537
2538
2539 def handle_youtubedl_headers(headers):
2540     filtered_headers = headers
2541
2542     if 'Youtubedl-no-compression' in filtered_headers:
2543         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2544         del filtered_headers['Youtubedl-no-compression']
2545
2546     return filtered_headers
2547
2548
2549 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2550     """Handler for HTTP requests and responses.
2551
2552     This class, when installed with an OpenerDirector, automatically adds
2553     the standard headers to every HTTP request and handles gzipped and
2554     deflated responses from web servers. If compression is to be avoided in
2555     a particular request, the original request in the program code only has
2556     to include the HTTP header "Youtubedl-no-compression", which will be
2557     removed before making the real request.
2558
2559     Part of this code was copied from:
2560
2561     http://techknack.net/python-urllib2-handlers/
2562
2563     Andrew Rowls, the author of that code, agreed to release it to the
2564     public domain.
2565     """
2566
2567     def __init__(self, params, *args, **kwargs):
2568         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2569         self._params = params
2570
2571     def http_open(self, req):
2572         conn_class = compat_http_client.HTTPConnection
2573
2574         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2575         if socks_proxy:
2576             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2577             del req.headers['Ytdl-socks-proxy']
2578
2579         return self.do_open(functools.partial(
2580             _create_http_connection, self, conn_class, False),
2581             req)
2582
2583     @staticmethod
2584     def deflate(data):
2585         try:
2586             return zlib.decompress(data, -zlib.MAX_WBITS)
2587         except zlib.error:
2588             return zlib.decompress(data)
2589
2590     def http_request(self, req):
2591         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2592         # always respected by websites, some tend to give out URLs with non percent-encoded
2593         # non-ASCII characters (see telemb.py, ard.py [#3412])
2594         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2595         # To work around aforementioned issue we will replace request's original URL with
2596         # percent-encoded one
2597         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2598         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2599         url = req.get_full_url()
2600         url_escaped = escape_url(url)
2601
2602         # Substitute URL if any change after escaping
2603         if url != url_escaped:
2604             req = update_Request(req, url=url_escaped)
2605
2606         for h, v in std_headers.items():
2607             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2608             # The dict keys are capitalized because of this bug by urllib
2609             if h.capitalize() not in req.headers:
2610                 req.add_header(h, v)
2611
2612         req.headers = handle_youtubedl_headers(req.headers)
2613
2614         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2615             # Python 2.6 is brain-dead when it comes to fragments
2616             req._Request__original = req._Request__original.partition('#')[0]
2617             req._Request__r_type = req._Request__r_type.partition('#')[0]
2618
2619         return req
2620
2621     def http_response(self, req, resp):
2622         old_resp = resp
2623         # gzip
2624         if resp.headers.get('Content-encoding', '') == 'gzip':
2625             content = resp.read()
2626             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2627             try:
2628                 uncompressed = io.BytesIO(gz.read())
2629             except IOError as original_ioerror:
2630                 # There may be junk add the end of the file
2631                 # See http://stackoverflow.com/q/4928560/35070 for details
2632                 for i in range(1, 1024):
2633                     try:
2634                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2635                         uncompressed = io.BytesIO(gz.read())
2636                     except IOError:
2637                         continue
2638                     break
2639                 else:
2640                     raise original_ioerror
2641             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2642             resp.msg = old_resp.msg
2643             del resp.headers['Content-encoding']
2644         # deflate
2645         if resp.headers.get('Content-encoding', '') == 'deflate':
2646             gz = io.BytesIO(self.deflate(resp.read()))
2647             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2648             resp.msg = old_resp.msg
2649             del resp.headers['Content-encoding']
2650         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2651         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2652         if 300 <= resp.code < 400:
2653             location = resp.headers.get('Location')
2654             if location:
2655                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2656                 if sys.version_info >= (3, 0):
2657                     location = location.encode('iso-8859-1').decode('utf-8')
2658                 else:
2659                     location = location.decode('utf-8')
2660                 location_escaped = escape_url(location)
2661                 if location != location_escaped:
2662                     del resp.headers['Location']
2663                     if sys.version_info < (3, 0):
2664                         location_escaped = location_escaped.encode('utf-8')
2665                     resp.headers['Location'] = location_escaped
2666         return resp
2667
2668     https_request = http_request
2669     https_response = http_response
2670
2671
2672 def make_socks_conn_class(base_class, socks_proxy):
2673     assert issubclass(base_class, (
2674         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2675
2676     url_components = compat_urlparse.urlparse(socks_proxy)
2677     if url_components.scheme.lower() == 'socks5':
2678         socks_type = ProxyType.SOCKS5
2679     elif url_components.scheme.lower() in ('socks', 'socks4'):
2680         socks_type = ProxyType.SOCKS4
2681     elif url_components.scheme.lower() == 'socks4a':
2682         socks_type = ProxyType.SOCKS4A
2683
2684     def unquote_if_non_empty(s):
2685         if not s:
2686             return s
2687         return compat_urllib_parse_unquote_plus(s)
2688
2689     proxy_args = (
2690         socks_type,
2691         url_components.hostname, url_components.port or 1080,
2692         True,  # Remote DNS
2693         unquote_if_non_empty(url_components.username),
2694         unquote_if_non_empty(url_components.password),
2695     )
2696
2697     class SocksConnection(base_class):
2698         def connect(self):
2699             self.sock = sockssocket()
2700             self.sock.setproxy(*proxy_args)
2701             if type(self.timeout) in (int, float):
2702                 self.sock.settimeout(self.timeout)
2703             self.sock.connect((self.host, self.port))
2704
2705             if isinstance(self, compat_http_client.HTTPSConnection):
2706                 if hasattr(self, '_context'):  # Python > 2.6
2707                     self.sock = self._context.wrap_socket(
2708                         self.sock, server_hostname=self.host)
2709                 else:
2710                     self.sock = ssl.wrap_socket(self.sock)
2711
2712     return SocksConnection
2713
2714
2715 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2716     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2717         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2718         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2719         self._params = params
2720
2721     def https_open(self, req):
2722         kwargs = {}
2723         conn_class = self._https_conn_class
2724
2725         if hasattr(self, '_context'):  # python > 2.6
2726             kwargs['context'] = self._context
2727         if hasattr(self, '_check_hostname'):  # python 3.x
2728             kwargs['check_hostname'] = self._check_hostname
2729
2730         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2731         if socks_proxy:
2732             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2733             del req.headers['Ytdl-socks-proxy']
2734
2735         return self.do_open(functools.partial(
2736             _create_http_connection, self, conn_class, True),
2737             req, **kwargs)
2738
2739
2740 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2741     """
2742     See [1] for cookie file format.
2743
2744     1. https://curl.haxx.se/docs/http-cookies.html
2745     """
2746     _HTTPONLY_PREFIX = '#HttpOnly_'
2747     _ENTRY_LEN = 7
2748     _HEADER = '''# Netscape HTTP Cookie File
2749 # This file is generated by youtube-dl.  Do not edit.
2750
2751 '''
2752     _CookieFileEntry = collections.namedtuple(
2753         'CookieFileEntry',
2754         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2755
2756     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2757         """
2758         Save cookies to a file.
2759
2760         Most of the code is taken from CPython 3.8 and slightly adapted
2761         to support cookie files with UTF-8 in both python 2 and 3.
2762         """
2763         if filename is None:
2764             if self.filename is not None:
2765                 filename = self.filename
2766             else:
2767                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2768
2769         # Store session cookies with `expires` set to 0 instead of an empty
2770         # string
2771         for cookie in self:
2772             if cookie.expires is None:
2773                 cookie.expires = 0
2774
2775         with io.open(filename, 'w', encoding='utf-8') as f:
2776             f.write(self._HEADER)
2777             now = time.time()
2778             for cookie in self:
2779                 if not ignore_discard and cookie.discard:
2780                     continue
2781                 if not ignore_expires and cookie.is_expired(now):
2782                     continue
2783                 if cookie.secure:
2784                     secure = 'TRUE'
2785                 else:
2786                     secure = 'FALSE'
2787                 if cookie.domain.startswith('.'):
2788                     initial_dot = 'TRUE'
2789                 else:
2790                     initial_dot = 'FALSE'
2791                 if cookie.expires is not None:
2792                     expires = compat_str(cookie.expires)
2793                 else:
2794                     expires = ''
2795                 if cookie.value is None:
2796                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2797                     # with no name, whereas http.cookiejar regards it as a
2798                     # cookie with no value.
2799                     name = ''
2800                     value = cookie.name
2801                 else:
2802                     name = cookie.name
2803                     value = cookie.value
2804                 f.write(
2805                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2806                                secure, expires, name, value]) + '\n')
2807
2808     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2809         """Load cookies from a file."""
2810         if filename is None:
2811             if self.filename is not None:
2812                 filename = self.filename
2813             else:
2814                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2815
2816         def prepare_line(line):
2817             if line.startswith(self._HTTPONLY_PREFIX):
2818                 line = line[len(self._HTTPONLY_PREFIX):]
2819             # comments and empty lines are fine
2820             if line.startswith('#') or not line.strip():
2821                 return line
2822             cookie_list = line.split('\t')
2823             if len(cookie_list) != self._ENTRY_LEN:
2824                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2825             cookie = self._CookieFileEntry(*cookie_list)
2826             if cookie.expires_at and not cookie.expires_at.isdigit():
2827                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2828             return line
2829
2830         cf = io.StringIO()
2831         with io.open(filename, encoding='utf-8') as f:
2832             for line in f:
2833                 try:
2834                     cf.write(prepare_line(line))
2835                 except compat_cookiejar.LoadError as e:
2836                     write_string(
2837                         'WARNING: skipping cookie file entry due to %s: %r\n'
2838                         % (e, line), sys.stderr)
2839                     continue
2840         cf.seek(0)
2841         self._really_load(cf, filename, ignore_discard, ignore_expires)
2842         # Session cookies are denoted by either `expires` field set to
2843         # an empty string or 0. MozillaCookieJar only recognizes the former
2844         # (see [1]). So we need force the latter to be recognized as session
2845         # cookies on our own.
2846         # Session cookies may be important for cookies-based authentication,
2847         # e.g. usually, when user does not check 'Remember me' check box while
2848         # logging in on a site, some important cookies are stored as session
2849         # cookies so that not recognizing them will result in failed login.
2850         # 1. https://bugs.python.org/issue17164
2851         for cookie in self:
2852             # Treat `expires=0` cookies as session cookies
2853             if cookie.expires == 0:
2854                 cookie.expires = None
2855                 cookie.discard = True
2856
2857
2858 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2859     def __init__(self, cookiejar=None):
2860         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2861
2862     def http_response(self, request, response):
2863         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2864         # characters in Set-Cookie HTTP header of last response (see
2865         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2866         # In order to at least prevent crashing we will percent encode Set-Cookie
2867         # header before HTTPCookieProcessor starts processing it.
2868         # if sys.version_info < (3, 0) and response.headers:
2869         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2870         #         set_cookie = response.headers.get(set_cookie_header)
2871         #         if set_cookie:
2872         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2873         #             if set_cookie != set_cookie_escaped:
2874         #                 del response.headers[set_cookie_header]
2875         #                 response.headers[set_cookie_header] = set_cookie_escaped
2876         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2877
2878     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2879     https_response = http_response
2880
2881
2882 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2883     """YoutubeDL redirect handler
2884
2885     The code is based on HTTPRedirectHandler implementation from CPython [1].
2886
2887     This redirect handler solves two issues:
2888      - ensures redirect URL is always unicode under python 2
2889      - introduces support for experimental HTTP response status code
2890        308 Permanent Redirect [2] used by some sites [3]
2891
2892     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2893     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2894     3. https://github.com/ytdl-org/youtube-dl/issues/28768
2895     """
2896
2897     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2898
2899     def redirect_request(self, req, fp, code, msg, headers, newurl):
2900         """Return a Request or None in response to a redirect.
2901
2902         This is called by the http_error_30x methods when a
2903         redirection response is received.  If a redirection should
2904         take place, return a new Request to allow http_error_30x to
2905         perform the redirect.  Otherwise, raise HTTPError if no-one
2906         else should try to handle this url.  Return None if you can't
2907         but another Handler might.
2908         """
2909         m = req.get_method()
2910         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
2911                  or code in (301, 302, 303) and m == "POST")):
2912             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
2913         # Strictly (according to RFC 2616), 301 or 302 in response to
2914         # a POST MUST NOT cause a redirection without confirmation
2915         # from the user (of urllib.request, in this case).  In practice,
2916         # essentially all clients do redirect in this case, so we do
2917         # the same.
2918
2919         # On python 2 urlh.geturl() may sometimes return redirect URL
2920         # as byte string instead of unicode. This workaround allows
2921         # to force it always return unicode.
2922         if sys.version_info[0] < 3:
2923             newurl = compat_str(newurl)
2924
2925         # Be conciliant with URIs containing a space.  This is mainly
2926         # redundant with the more complete encoding done in http_error_302(),
2927         # but it is kept for compatibility with other callers.
2928         newurl = newurl.replace(' ', '%20')
2929
2930         CONTENT_HEADERS = ("content-length", "content-type")
2931         # NB: don't use dict comprehension for python 2.6 compatibility
2932         newheaders = dict((k, v) for k, v in req.headers.items()
2933                           if k.lower() not in CONTENT_HEADERS)
2934         return compat_urllib_request.Request(
2935             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
2936             unverifiable=True)
2937
2938
2939 def extract_timezone(date_str):
2940     m = re.search(
2941         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2942         date_str)
2943     if not m:
2944         timezone = datetime.timedelta()
2945     else:
2946         date_str = date_str[:-len(m.group('tz'))]
2947         if not m.group('sign'):
2948             timezone = datetime.timedelta()
2949         else:
2950             sign = 1 if m.group('sign') == '+' else -1
2951             timezone = datetime.timedelta(
2952                 hours=sign * int(m.group('hours')),
2953                 minutes=sign * int(m.group('minutes')))
2954     return timezone, date_str
2955
2956
2957 def parse_iso8601(date_str, delimiter='T', timezone=None):
2958     """ Return a UNIX timestamp from the given date """
2959
2960     if date_str is None:
2961         return None
2962
2963     date_str = re.sub(r'\.[0-9]+', '', date_str)
2964
2965     if timezone is None:
2966         timezone, date_str = extract_timezone(date_str)
2967
2968     try:
2969         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2970         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2971         return calendar.timegm(dt.timetuple())
2972     except ValueError:
2973         pass
2974
2975
2976 def date_formats(day_first=True):
2977     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2978
2979
2980 def unified_strdate(date_str, day_first=True):
2981     """Return a string with the date in the format YYYYMMDD"""
2982
2983     if date_str is None:
2984         return None
2985     upload_date = None
2986     # Replace commas
2987     date_str = date_str.replace(',', ' ')
2988     # Remove AM/PM + timezone
2989     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2990     _, date_str = extract_timezone(date_str)
2991
2992     for expression in date_formats(day_first):
2993         try:
2994             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2995         except ValueError:
2996             pass
2997     if upload_date is None:
2998         timetuple = email.utils.parsedate_tz(date_str)
2999         if timetuple:
3000             try:
3001                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3002             except ValueError:
3003                 pass
3004     if upload_date is not None:
3005         return compat_str(upload_date)
3006
3007
3008 def unified_timestamp(date_str, day_first=True):
3009     if date_str is None:
3010         return None
3011
3012     date_str = re.sub(r'[,|]', '', date_str)
3013
3014     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3015     timezone, date_str = extract_timezone(date_str)
3016
3017     # Remove AM/PM + timezone
3018     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3019
3020     # Remove unrecognized timezones from ISO 8601 alike timestamps
3021     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3022     if m:
3023         date_str = date_str[:-len(m.group('tz'))]
3024
3025     # Python only supports microseconds, so remove nanoseconds
3026     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3027     if m:
3028         date_str = m.group(1)
3029
3030     for expression in date_formats(day_first):
3031         try:
3032             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3033             return calendar.timegm(dt.timetuple())
3034         except ValueError:
3035             pass
3036     timetuple = email.utils.parsedate_tz(date_str)
3037     if timetuple:
3038         return calendar.timegm(timetuple) + pm_delta * 3600
3039
3040
3041 def determine_ext(url, default_ext='unknown_video'):
3042     if url is None or '.' not in url:
3043         return default_ext
3044     guess = url.partition('?')[0].rpartition('.')[2]
3045     if re.match(r'^[A-Za-z0-9]+$', guess):
3046         return guess
3047     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3048     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3049         return guess.rstrip('/')
3050     else:
3051         return default_ext
3052
3053
3054 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3055     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3056
3057
3058 def date_from_str(date_str):
3059     """
3060     Return a datetime object from a string in the format YYYYMMDD or
3061     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3062     today = datetime.date.today()
3063     if date_str in ('now', 'today'):
3064         return today
3065     if date_str == 'yesterday':
3066         return today - datetime.timedelta(days=1)
3067     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3068     if match is not None:
3069         sign = match.group('sign')
3070         time = int(match.group('time'))
3071         if sign == '-':
3072             time = -time
3073         unit = match.group('unit')
3074         # A bad approximation?
3075         if unit == 'month':
3076             unit = 'day'
3077             time *= 30
3078         elif unit == 'year':
3079             unit = 'day'
3080             time *= 365
3081         unit += 's'
3082         delta = datetime.timedelta(**{unit: time})
3083         return today + delta
3084     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3085
3086
3087 def hyphenate_date(date_str):
3088     """
3089     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3090     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3091     if match is not None:
3092         return '-'.join(match.groups())
3093     else:
3094         return date_str
3095
3096
3097 class DateRange(object):
3098     """Represents a time interval between two dates"""
3099
3100     def __init__(self, start=None, end=None):
3101         """start and end must be strings in the format accepted by date"""
3102         if start is not None:
3103             self.start = date_from_str(start)
3104         else:
3105             self.start = datetime.datetime.min.date()
3106         if end is not None:
3107             self.end = date_from_str(end)
3108         else:
3109             self.end = datetime.datetime.max.date()
3110         if self.start > self.end:
3111             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3112
3113     @classmethod
3114     def day(cls, day):
3115         """Returns a range that only contains the given day"""
3116         return cls(day, day)
3117
3118     def __contains__(self, date):
3119         """Check if the date is in the range"""
3120         if not isinstance(date, datetime.date):
3121             date = date_from_str(date)
3122         return self.start <= date <= self.end
3123
3124     def __str__(self):
3125         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3126
3127
3128 def platform_name():
3129     """ Returns the platform name as a compat_str """
3130     res = platform.platform()
3131     if isinstance(res, bytes):
3132         res = res.decode(preferredencoding())
3133
3134     assert isinstance(res, compat_str)
3135     return res
3136
3137
3138 def _windows_write_string(s, out):
3139     """ Returns True if the string was written using special methods,
3140     False if it has yet to be written out."""
3141     # Adapted from http://stackoverflow.com/a/3259271/35070
3142
3143     import ctypes
3144     import ctypes.wintypes
3145
3146     WIN_OUTPUT_IDS = {
3147         1: -11,
3148         2: -12,
3149     }
3150
3151     try:
3152         fileno = out.fileno()
3153     except AttributeError:
3154         # If the output stream doesn't have a fileno, it's virtual
3155         return False
3156     except io.UnsupportedOperation:
3157         # Some strange Windows pseudo files?
3158         return False
3159     if fileno not in WIN_OUTPUT_IDS:
3160         return False
3161
3162     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3163         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3164         ('GetStdHandle', ctypes.windll.kernel32))
3165     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3166
3167     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3168         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3169         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3170         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3171     written = ctypes.wintypes.DWORD(0)
3172
3173     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3174     FILE_TYPE_CHAR = 0x0002
3175     FILE_TYPE_REMOTE = 0x8000
3176     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3177         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3178         ctypes.POINTER(ctypes.wintypes.DWORD))(
3179         ('GetConsoleMode', ctypes.windll.kernel32))
3180     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3181
3182     def not_a_console(handle):
3183         if handle == INVALID_HANDLE_VALUE or handle is None:
3184             return True
3185         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3186                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3187
3188     if not_a_console(h):
3189         return False
3190
3191     def next_nonbmp_pos(s):
3192         try:
3193             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3194         except StopIteration:
3195             return len(s)
3196
3197     while s:
3198         count = min(next_nonbmp_pos(s), 1024)
3199
3200         ret = WriteConsoleW(
3201             h, s, count if count else 2, ctypes.byref(written), None)
3202         if ret == 0:
3203             raise OSError('Failed to write string')
3204         if not count:  # We just wrote a non-BMP character
3205             assert written.value == 2
3206             s = s[1:]
3207         else:
3208             assert written.value > 0
3209             s = s[written.value:]
3210     return True
3211
3212
3213 def write_string(s, out=None, encoding=None):
3214     if out is None:
3215         out = sys.stderr
3216     assert type(s) == compat_str
3217
3218     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3219         if _windows_write_string(s, out):
3220             return
3221
3222     if ('b' in getattr(out, 'mode', '')
3223             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3224         byt = s.encode(encoding or preferredencoding(), 'ignore')
3225         out.write(byt)
3226     elif hasattr(out, 'buffer'):
3227         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3228         byt = s.encode(enc, 'ignore')
3229         out.buffer.write(byt)
3230     else:
3231         out.write(s)
3232     out.flush()
3233
3234
3235 def bytes_to_intlist(bs):
3236     if not bs:
3237         return []
3238     if isinstance(bs[0], int):  # Python 3
3239         return list(bs)
3240     else:
3241         return [ord(c) for c in bs]
3242
3243
3244 def intlist_to_bytes(xs):
3245     if not xs:
3246         return b''
3247     return compat_struct_pack('%dB' % len(xs), *xs)
3248
3249
3250 # Cross-platform file locking
3251 if sys.platform == 'win32':
3252     import ctypes.wintypes
3253     import msvcrt
3254
3255     class OVERLAPPED(ctypes.Structure):
3256         _fields_ = [
3257             ('Internal', ctypes.wintypes.LPVOID),
3258             ('InternalHigh', ctypes.wintypes.LPVOID),
3259             ('Offset', ctypes.wintypes.DWORD),
3260             ('OffsetHigh', ctypes.wintypes.DWORD),
3261             ('hEvent', ctypes.wintypes.HANDLE),
3262         ]
3263
3264     kernel32 = ctypes.windll.kernel32
3265     LockFileEx = kernel32.LockFileEx
3266     LockFileEx.argtypes = [
3267         ctypes.wintypes.HANDLE,     # hFile
3268         ctypes.wintypes.DWORD,      # dwFlags
3269         ctypes.wintypes.DWORD,      # dwReserved
3270         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3271         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3272         ctypes.POINTER(OVERLAPPED)  # Overlapped
3273     ]
3274     LockFileEx.restype = ctypes.wintypes.BOOL
3275     UnlockFileEx = kernel32.UnlockFileEx
3276     UnlockFileEx.argtypes = [
3277         ctypes.wintypes.HANDLE,     # hFile
3278         ctypes.wintypes.DWORD,      # dwReserved
3279         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3280         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3281         ctypes.POINTER(OVERLAPPED)  # Overlapped
3282     ]
3283     UnlockFileEx.restype = ctypes.wintypes.BOOL
3284     whole_low = 0xffffffff
3285     whole_high = 0x7fffffff
3286
3287     def _lock_file(f, exclusive):
3288         overlapped = OVERLAPPED()
3289         overlapped.Offset = 0
3290         overlapped.OffsetHigh = 0
3291         overlapped.hEvent = 0
3292         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3293         handle = msvcrt.get_osfhandle(f.fileno())
3294         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3295                           whole_low, whole_high, f._lock_file_overlapped_p):
3296             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3297
3298     def _unlock_file(f):
3299         assert f._lock_file_overlapped_p
3300         handle = msvcrt.get_osfhandle(f.fileno())
3301         if not UnlockFileEx(handle, 0,
3302                             whole_low, whole_high, f._lock_file_overlapped_p):
3303             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3304
3305 else:
3306     # Some platforms, such as Jython, is missing fcntl
3307     try:
3308         import fcntl
3309
3310         def _lock_file(f, exclusive):
3311             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3312
3313         def _unlock_file(f):
3314             fcntl.flock(f, fcntl.LOCK_UN)
3315     except ImportError:
3316         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3317
3318         def _lock_file(f, exclusive):
3319             raise IOError(UNSUPPORTED_MSG)
3320
3321         def _unlock_file(f):
3322             raise IOError(UNSUPPORTED_MSG)
3323
3324
3325 class locked_file(object):
3326     def __init__(self, filename, mode, encoding=None):
3327         assert mode in ['r', 'a', 'w']
3328         self.f = io.open(filename, mode, encoding=encoding)
3329         self.mode = mode
3330
3331     def __enter__(self):
3332         exclusive = self.mode != 'r'
3333         try:
3334             _lock_file(self.f, exclusive)
3335         except IOError:
3336             self.f.close()
3337             raise
3338         return self
3339
3340     def __exit__(self, etype, value, traceback):
3341         try:
3342             _unlock_file(self.f)
3343         finally:
3344             self.f.close()
3345
3346     def __iter__(self):
3347         return iter(self.f)
3348
3349     def write(self, *args):
3350         return self.f.write(*args)
3351
3352     def read(self, *args):
3353         return self.f.read(*args)
3354
3355
3356 def get_filesystem_encoding():
3357     encoding = sys.getfilesystemencoding()
3358     return encoding if encoding is not None else 'utf-8'
3359
3360
3361 def shell_quote(args):
3362     quoted_args = []
3363     encoding = get_filesystem_encoding()
3364     for a in args:
3365         if isinstance(a, bytes):
3366             # We may get a filename encoded with 'encodeFilename'
3367             a = a.decode(encoding)
3368         quoted_args.append(compat_shlex_quote(a))
3369     return ' '.join(quoted_args)
3370
3371
3372 def smuggle_url(url, data):
3373     """ Pass additional data in a URL for internal use. """
3374
3375     url, idata = unsmuggle_url(url, {})
3376     data.update(idata)
3377     sdata = compat_urllib_parse_urlencode(
3378         {'__youtubedl_smuggle': json.dumps(data)})
3379     return url + '#' + sdata
3380
3381
3382 def unsmuggle_url(smug_url, default=None):
3383     if '#__youtubedl_smuggle' not in smug_url:
3384         return smug_url, default
3385     url, _, sdata = smug_url.rpartition('#')
3386     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3387     data = json.loads(jsond)
3388     return url, data
3389
3390
3391 def format_bytes(bytes):
3392     if bytes is None:
3393         return 'N/A'
3394     if type(bytes) is str:
3395         bytes = float(bytes)
3396     if bytes == 0.0:
3397         exponent = 0
3398     else:
3399         exponent = int(math.log(bytes, 1024.0))
3400     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3401     converted = float(bytes) / float(1024 ** exponent)
3402     return '%.2f%s' % (converted, suffix)
3403
3404
3405 def lookup_unit_table(unit_table, s):
3406     units_re = '|'.join(re.escape(u) for u in unit_table)
3407     m = re.match(
3408         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3409     if not m:
3410         return None
3411     num_str = m.group('num').replace(',', '.')
3412     mult = unit_table[m.group('unit')]
3413     return int(float(num_str) * mult)
3414
3415
3416 def parse_filesize(s):
3417     if s is None:
3418         return None
3419
3420     # The lower-case forms are of course incorrect and unofficial,
3421     # but we support those too
3422     _UNIT_TABLE = {
3423         'B': 1,
3424         'b': 1,
3425         'bytes': 1,
3426         'KiB': 1024,
3427         'KB': 1000,
3428         'kB': 1024,
3429         'Kb': 1000,
3430         'kb': 1000,
3431         'kilobytes': 1000,
3432         'kibibytes': 1024,
3433         'MiB': 1024 ** 2,
3434         'MB': 1000 ** 2,
3435         'mB': 1024 ** 2,
3436         'Mb': 1000 ** 2,
3437         'mb': 1000 ** 2,
3438         'megabytes': 1000 ** 2,
3439         'mebibytes': 1024 ** 2,
3440         'GiB': 1024 ** 3,
3441         'GB': 1000 ** 3,
3442         'gB': 1024 ** 3,
3443         'Gb': 1000 ** 3,
3444         'gb': 1000 ** 3,
3445         'gigabytes': 1000 ** 3,
3446         'gibibytes': 1024 ** 3,
3447         'TiB': 1024 ** 4,
3448         'TB': 1000 ** 4,
3449         'tB': 1024 ** 4,
3450         'Tb': 1000 ** 4,
3451         'tb': 1000 ** 4,
3452         'terabytes': 1000 ** 4,
3453         'tebibytes': 1024 ** 4,
3454         'PiB': 1024 ** 5,
3455         'PB': 1000 ** 5,
3456         'pB': 1024 ** 5,
3457         'Pb': 1000 ** 5,
3458         'pb': 1000 ** 5,
3459         'petabytes': 1000 ** 5,
3460         'pebibytes': 1024 ** 5,
3461         'EiB': 1024 ** 6,
3462         'EB': 1000 ** 6,
3463         'eB': 1024 ** 6,
3464         'Eb': 1000 ** 6,
3465         'eb': 1000 ** 6,
3466         'exabytes': 1000 ** 6,
3467         'exbibytes': 1024 ** 6,
3468         'ZiB': 1024 ** 7,
3469         'ZB': 1000 ** 7,
3470         'zB': 1024 ** 7,
3471         'Zb': 1000 ** 7,
3472         'zb': 1000 ** 7,
3473         'zettabytes': 1000 ** 7,
3474         'zebibytes': 1024 ** 7,
3475         'YiB': 1024 ** 8,
3476         'YB': 1000 ** 8,
3477         'yB': 1024 ** 8,
3478         'Yb': 1000 ** 8,
3479         'yb': 1000 ** 8,
3480         'yottabytes': 1000 ** 8,
3481         'yobibytes': 1024 ** 8,
3482     }
3483
3484     return lookup_unit_table(_UNIT_TABLE, s)
3485
3486
3487 def parse_count(s):
3488     if s is None:
3489         return None
3490
3491     s = s.strip()
3492
3493     if re.match(r'^[\d,.]+$', s):
3494         return str_to_int(s)
3495
3496     _UNIT_TABLE = {
3497         'k': 1000,
3498         'K': 1000,
3499         'm': 1000 ** 2,
3500         'M': 1000 ** 2,
3501         'kk': 1000 ** 2,
3502         'KK': 1000 ** 2,
3503     }
3504
3505     return lookup_unit_table(_UNIT_TABLE, s)
3506
3507
3508 def parse_resolution(s):
3509     if s is None:
3510         return {}
3511
3512     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3513     if mobj:
3514         return {
3515             'width': int(mobj.group('w')),
3516             'height': int(mobj.group('h')),
3517         }
3518
3519     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3520     if mobj:
3521         return {'height': int(mobj.group(1))}
3522
3523     mobj = re.search(r'\b([48])[kK]\b', s)
3524     if mobj:
3525         return {'height': int(mobj.group(1)) * 540}
3526
3527     return {}
3528
3529
3530 def parse_bitrate(s):
3531     if not isinstance(s, compat_str):
3532         return
3533     mobj = re.search(r'\b(\d+)\s*kbps', s)
3534     if mobj:
3535         return int(mobj.group(1))
3536
3537
3538 def month_by_name(name, lang='en'):
3539     """ Return the number of a month by (locale-independently) English name """
3540
3541     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3542
3543     try:
3544         return month_names.index(name) + 1
3545     except ValueError:
3546         return None
3547
3548
3549 def month_by_abbreviation(abbrev):
3550     """ Return the number of a month by (locale-independently) English
3551         abbreviations """
3552
3553     try:
3554         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3555     except ValueError:
3556         return None
3557
3558
3559 def fix_xml_ampersands(xml_str):
3560     """Replace all the '&' by '&amp;' in XML"""
3561     return re.sub(
3562         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3563         '&amp;',
3564         xml_str)
3565
3566
3567 def setproctitle(title):
3568     assert isinstance(title, compat_str)
3569
3570     # ctypes in Jython is not complete
3571     # http://bugs.jython.org/issue2148
3572     if sys.platform.startswith('java'):
3573         return
3574
3575     try:
3576         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3577     except OSError:
3578         return
3579     except TypeError:
3580         # LoadLibrary in Windows Python 2.7.13 only expects
3581         # a bytestring, but since unicode_literals turns
3582         # every string into a unicode string, it fails.
3583         return
3584     title_bytes = title.encode('utf-8')
3585     buf = ctypes.create_string_buffer(len(title_bytes))
3586     buf.value = title_bytes
3587     try:
3588         libc.prctl(15, buf, 0, 0, 0)
3589     except AttributeError:
3590         return  # Strange libc, just skip this
3591
3592
3593 def remove_start(s, start):
3594     return s[len(start):] if s is not None and s.startswith(start) else s
3595
3596
3597 def remove_end(s, end):
3598     return s[:-len(end)] if s is not None and s.endswith(end) else s
3599
3600
3601 def remove_quotes(s):
3602     if s is None or len(s) < 2:
3603         return s
3604     for quote in ('"', "'", ):
3605         if s[0] == quote and s[-1] == quote:
3606             return s[1:-1]
3607     return s
3608
3609
3610 def url_basename(url):
3611     path = compat_urlparse.urlparse(url).path
3612     return path.strip('/').split('/')[-1]
3613
3614
3615 def base_url(url):
3616     return re.match(r'https?://[^?#&]+/', url).group()
3617
3618
3619 def urljoin(base, path):
3620     if isinstance(path, bytes):
3621         path = path.decode('utf-8')
3622     if not isinstance(path, compat_str) or not path:
3623         return None
3624     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3625         return path
3626     if isinstance(base, bytes):
3627         base = base.decode('utf-8')
3628     if not isinstance(base, compat_str) or not re.match(
3629             r'^(?:https?:)?//', base):
3630         return None
3631     return compat_urlparse.urljoin(base, path)
3632
3633
3634 class HEADRequest(compat_urllib_request.Request):
3635     def get_method(self):
3636         return 'HEAD'
3637
3638
3639 class PUTRequest(compat_urllib_request.Request):
3640     def get_method(self):
3641         return 'PUT'
3642
3643
3644 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3645     if get_attr:
3646         if v is not None:
3647             v = getattr(v, get_attr, None)
3648     if v == '':
3649         v = None
3650     if v is None:
3651         return default
3652     try:
3653         return int(v) * invscale // scale
3654     except (ValueError, TypeError):
3655         return default
3656
3657
3658 def str_or_none(v, default=None):
3659     return default if v is None else compat_str(v)
3660
3661
3662 def str_to_int(int_str):
3663     """ A more relaxed version of int_or_none """
3664     if isinstance(int_str, compat_integer_types):
3665         return int_str
3666     elif isinstance(int_str, compat_str):
3667         int_str = re.sub(r'[,\.\+]', '', int_str)
3668         return int_or_none(int_str)
3669
3670
3671 def float_or_none(v, scale=1, invscale=1, default=None):
3672     if v is None:
3673         return default
3674     try:
3675         return float(v) * invscale / scale
3676     except (ValueError, TypeError):
3677         return default
3678
3679
3680 def bool_or_none(v, default=None):
3681     return v if isinstance(v, bool) else default
3682
3683
3684 def strip_or_none(v, default=None):
3685     return v.strip() if isinstance(v, compat_str) else default
3686
3687
3688 def url_or_none(url):
3689     if not url or not isinstance(url, compat_str):
3690         return None
3691     url = url.strip()
3692     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3693
3694
3695 def parse_duration(s):
3696     if not isinstance(s, compat_basestring):
3697         return None
3698
3699     s = s.strip()
3700
3701     days, hours, mins, secs, ms = [None] * 5
3702     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3703     if m:
3704         days, hours, mins, secs, ms = m.groups()
3705     else:
3706         m = re.match(
3707             r'''(?ix)(?:P?
3708                 (?:
3709                     [0-9]+\s*y(?:ears?)?\s*
3710                 )?
3711                 (?:
3712                     [0-9]+\s*m(?:onths?)?\s*
3713                 )?
3714                 (?:
3715                     [0-9]+\s*w(?:eeks?)?\s*
3716                 )?
3717                 (?:
3718                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3719                 )?
3720                 T)?
3721                 (?:
3722                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3723                 )?
3724                 (?:
3725                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3726                 )?
3727                 (?:
3728                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3729                 )?Z?$''', s)
3730         if m:
3731             days, hours, mins, secs, ms = m.groups()
3732         else:
3733             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3734             if m:
3735                 hours, mins = m.groups()
3736             else:
3737                 return None
3738
3739     duration = 0
3740     if secs:
3741         duration += float(secs)
3742     if mins:
3743         duration += float(mins) * 60
3744     if hours:
3745         duration += float(hours) * 60 * 60
3746     if days:
3747         duration += float(days) * 24 * 60 * 60
3748     if ms:
3749         duration += float(ms)
3750     return duration
3751
3752
3753 def prepend_extension(filename, ext, expected_real_ext=None):
3754     name, real_ext = os.path.splitext(filename)
3755     return (
3756         '{0}.{1}{2}'.format(name, ext, real_ext)
3757         if not expected_real_ext or real_ext[1:] == expected_real_ext
3758         else '{0}.{1}'.format(filename, ext))
3759
3760
3761 def replace_extension(filename, ext, expected_real_ext=None):
3762     name, real_ext = os.path.splitext(filename)
3763     return '{0}.{1}'.format(
3764         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3765         ext)
3766
3767
3768 def check_executable(exe, args=[]):
3769     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3770     args can be a list of arguments for a short output (like -version) """
3771     try:
3772         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3773     except OSError:
3774         return False
3775     return exe
3776
3777
3778 def get_exe_version(exe, args=['--version'],
3779                     version_re=None, unrecognized='present'):
3780     """ Returns the version of the specified executable,
3781     or False if the executable is not present """
3782     try:
3783         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3784         # SIGTTOU if youtube-dl is run in the background.
3785         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3786         out, _ = subprocess.Popen(
3787             [encodeArgument(exe)] + args,
3788             stdin=subprocess.PIPE,
3789             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3790     except OSError:
3791         return False
3792     if isinstance(out, bytes):  # Python 2.x
3793         out = out.decode('ascii', 'ignore')
3794     return detect_exe_version(out, version_re, unrecognized)
3795
3796
3797 def detect_exe_version(output, version_re=None, unrecognized='present'):
3798     assert isinstance(output, compat_str)
3799     if version_re is None:
3800         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3801     m = re.search(version_re, output)
3802     if m:
3803         return m.group(1)
3804     else:
3805         return unrecognized
3806
3807
3808 class PagedList(object):
3809     def __len__(self):
3810         # This is only useful for tests
3811         return len(self.getslice())
3812
3813
3814 class OnDemandPagedList(PagedList):
3815     def __init__(self, pagefunc, pagesize, use_cache=True):
3816         self._pagefunc = pagefunc
3817         self._pagesize = pagesize
3818         self._use_cache = use_cache
3819         if use_cache:
3820             self._cache = {}
3821
3822     def getslice(self, start=0, end=None):
3823         res = []
3824         for pagenum in itertools.count(start // self._pagesize):
3825             firstid = pagenum * self._pagesize
3826             nextfirstid = pagenum * self._pagesize + self._pagesize
3827             if start >= nextfirstid:
3828                 continue
3829
3830             page_results = None
3831             if self._use_cache:
3832                 page_results = self._cache.get(pagenum)
3833             if page_results is None:
3834                 page_results = list(self._pagefunc(pagenum))
3835             if self._use_cache:
3836                 self._cache[pagenum] = page_results
3837
3838             startv = (
3839                 start % self._pagesize
3840                 if firstid <= start < nextfirstid
3841                 else 0)
3842
3843             endv = (
3844                 ((end - 1) % self._pagesize) + 1
3845                 if (end is not None and firstid <= end <= nextfirstid)
3846                 else None)
3847
3848             if startv != 0 or endv is not None:
3849                 page_results = page_results[startv:endv]
3850             res.extend(page_results)
3851
3852             # A little optimization - if current page is not "full", ie. does
3853             # not contain page_size videos then we can assume that this page
3854             # is the last one - there are no more ids on further pages -
3855             # i.e. no need to query again.
3856             if len(page_results) + startv < self._pagesize:
3857                 break
3858
3859             # If we got the whole page, but the next page is not interesting,
3860             # break out early as well
3861             if end == nextfirstid:
3862                 break
3863         return res
3864
3865
3866 class InAdvancePagedList(PagedList):
3867     def __init__(self, pagefunc, pagecount, pagesize):
3868         self._pagefunc = pagefunc
3869         self._pagecount = pagecount
3870         self._pagesize = pagesize
3871
3872     def getslice(self, start=0, end=None):
3873         res = []
3874         start_page = start // self._pagesize
3875         end_page = (
3876             self._pagecount if end is None else (end // self._pagesize + 1))
3877         skip_elems = start - start_page * self._pagesize
3878         only_more = None if end is None else end - start
3879         for pagenum in range(start_page, end_page):
3880             page = list(self._pagefunc(pagenum))
3881             if skip_elems:
3882                 page = page[skip_elems:]
3883                 skip_elems = None
3884             if only_more is not None:
3885                 if len(page) < only_more:
3886                     only_more -= len(page)
3887                 else:
3888                     page = page[:only_more]
3889                     res.extend(page)
3890                     break
3891             res.extend(page)
3892         return res
3893
3894
3895 def uppercase_escape(s):
3896     unicode_escape = codecs.getdecoder('unicode_escape')
3897     return re.sub(
3898         r'\\U[0-9a-fA-F]{8}',
3899         lambda m: unicode_escape(m.group(0))[0],
3900         s)
3901
3902
3903 def lowercase_escape(s):
3904     unicode_escape = codecs.getdecoder('unicode_escape')
3905     return re.sub(
3906         r'\\u[0-9a-fA-F]{4}',
3907         lambda m: unicode_escape(m.group(0))[0],
3908         s)
3909
3910
3911 def escape_rfc3986(s):
3912     """Escape non-ASCII characters as suggested by RFC 3986"""
3913     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3914         s = s.encode('utf-8')
3915     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3916
3917
3918 def escape_url(url):
3919     """Escape URL as suggested by RFC 3986"""
3920     url_parsed = compat_urllib_parse_urlparse(url)
3921     return url_parsed._replace(
3922         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3923         path=escape_rfc3986(url_parsed.path),
3924         params=escape_rfc3986(url_parsed.params),
3925         query=escape_rfc3986(url_parsed.query),
3926         fragment=escape_rfc3986(url_parsed.fragment)
3927     ).geturl()
3928
3929
3930 def read_batch_urls(batch_fd):
3931     def fixup(url):
3932         if not isinstance(url, compat_str):
3933             url = url.decode('utf-8', 'replace')
3934         BOM_UTF8 = '\xef\xbb\xbf'
3935         if url.startswith(BOM_UTF8):
3936             url = url[len(BOM_UTF8):]
3937         url = url.strip()
3938         if url.startswith(('#', ';', ']')):
3939             return False
3940         return url
3941
3942     with contextlib.closing(batch_fd) as fd:
3943         return [url for url in map(fixup, fd) if url]
3944
3945
3946 def urlencode_postdata(*args, **kargs):
3947     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3948
3949
3950 def update_url_query(url, query):
3951     if not query:
3952         return url
3953     parsed_url = compat_urlparse.urlparse(url)
3954     qs = compat_parse_qs(parsed_url.query)
3955     qs.update(query)
3956     return compat_urlparse.urlunparse(parsed_url._replace(
3957         query=compat_urllib_parse_urlencode(qs, True)))
3958
3959
3960 def update_Request(req, url=None, data=None, headers={}, query={}):
3961     req_headers = req.headers.copy()
3962     req_headers.update(headers)
3963     req_data = data or req.data
3964     req_url = update_url_query(url or req.get_full_url(), query)
3965     req_get_method = req.get_method()
3966     if req_get_method == 'HEAD':
3967         req_type = HEADRequest
3968     elif req_get_method == 'PUT':
3969         req_type = PUTRequest
3970     else:
3971         req_type = compat_urllib_request.Request
3972     new_req = req_type(
3973         req_url, data=req_data, headers=req_headers,
3974         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3975     if hasattr(req, 'timeout'):
3976         new_req.timeout = req.timeout
3977     return new_req
3978
3979
3980 def _multipart_encode_impl(data, boundary):
3981     content_type = 'multipart/form-data; boundary=%s' % boundary
3982
3983     out = b''
3984     for k, v in data.items():
3985         out += b'--' + boundary.encode('ascii') + b'\r\n'
3986         if isinstance(k, compat_str):
3987             k = k.encode('utf-8')
3988         if isinstance(v, compat_str):
3989             v = v.encode('utf-8')
3990         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3991         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3992         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3993         if boundary.encode('ascii') in content:
3994             raise ValueError('Boundary overlaps with data')
3995         out += content
3996
3997     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3998
3999     return out, content_type
4000
4001
4002 def multipart_encode(data, boundary=None):
4003     '''
4004     Encode a dict to RFC 7578-compliant form-data
4005
4006     data:
4007         A dict where keys and values can be either Unicode or bytes-like
4008         objects.
4009     boundary:
4010         If specified a Unicode object, it's used as the boundary. Otherwise
4011         a random boundary is generated.
4012
4013     Reference: https://tools.ietf.org/html/rfc7578
4014     '''
4015     has_specified_boundary = boundary is not None
4016
4017     while True:
4018         if boundary is None:
4019             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4020
4021         try:
4022             out, content_type = _multipart_encode_impl(data, boundary)
4023             break
4024         except ValueError:
4025             if has_specified_boundary:
4026                 raise
4027             boundary = None
4028
4029     return out, content_type
4030
4031
4032 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4033     if isinstance(key_or_keys, (list, tuple)):
4034         for key in key_or_keys:
4035             if key not in d or d[key] is None or skip_false_values and not d[key]:
4036                 continue
4037             return d[key]
4038         return default
4039     return d.get(key_or_keys, default)
4040
4041
4042 def try_get(src, getter, expected_type=None):
4043     if not isinstance(getter, (list, tuple)):
4044         getter = [getter]
4045     for get in getter:
4046         try:
4047             v = get(src)
4048         except (AttributeError, KeyError, TypeError, IndexError):
4049             pass
4050         else:
4051             if expected_type is None or isinstance(v, expected_type):
4052                 return v
4053
4054
4055 def merge_dicts(*dicts):
4056     merged = {}
4057     for a_dict in dicts:
4058         for k, v in a_dict.items():
4059             if v is None:
4060                 continue
4061             if (k not in merged
4062                     or (isinstance(v, compat_str) and v
4063                         and isinstance(merged[k], compat_str)
4064                         and not merged[k])):
4065                 merged[k] = v
4066     return merged
4067
4068
4069 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4070     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4071
4072
4073 US_RATINGS = {
4074     'G': 0,
4075     'PG': 10,
4076     'PG-13': 13,
4077     'R': 16,
4078     'NC': 18,
4079 }
4080
4081
4082 TV_PARENTAL_GUIDELINES = {
4083     'TV-Y': 0,
4084     'TV-Y7': 7,
4085     'TV-G': 0,
4086     'TV-PG': 0,
4087     'TV-14': 14,
4088     'TV-MA': 17,
4089 }
4090
4091
4092 def parse_age_limit(s):
4093     if type(s) == int:
4094         return s if 0 <= s <= 21 else None
4095     if not isinstance(s, compat_basestring):
4096         return None
4097     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4098     if m:
4099         return int(m.group('age'))
4100     if s in US_RATINGS:
4101         return US_RATINGS[s]
4102     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4103     if m:
4104         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4105     return None
4106
4107
4108 def strip_jsonp(code):
4109     return re.sub(
4110         r'''(?sx)^
4111             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4112             (?:\s*&&\s*(?P=func_name))?
4113             \s*\(\s*(?P<callback_data>.*)\);?
4114             \s*?(?://[^\n]*)*$''',
4115         r'\g<callback_data>', code)
4116
4117
4118 def js_to_json(code):
4119     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4120     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4121     INTEGER_TABLE = (
4122         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4123         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4124     )
4125
4126     def fix_kv(m):
4127         v = m.group(0)
4128         if v in ('true', 'false', 'null'):
4129             return v
4130         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4131             return ""
4132
4133         if v[0] in ("'", '"'):
4134             v = re.sub(r'(?s)\\.|"', lambda m: {
4135                 '"': '\\"',
4136                 "\\'": "'",
4137                 '\\\n': '',
4138                 '\\x': '\\u00',
4139             }.get(m.group(0), m.group(0)), v[1:-1])
4140         else:
4141             for regex, base in INTEGER_TABLE:
4142                 im = re.match(regex, v)
4143                 if im:
4144                     i = int(im.group(1), base)
4145                     return '"%d":' % i if v.endswith(':') else '%d' % i
4146
4147         return '"%s"' % v
4148
4149     return re.sub(r'''(?sx)
4150         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4151         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4152         {comment}|,(?={skip}[\]}}])|
4153         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4154         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4155         [0-9]+(?={skip}:)|
4156         !+
4157         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4158
4159
4160 def qualities(quality_ids):
4161     """ Get a numeric quality value out of a list of possible values """
4162     def q(qid):
4163         try:
4164             return quality_ids.index(qid)
4165         except ValueError:
4166             return -1
4167     return q
4168
4169
4170 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4171
4172
4173 def limit_length(s, length):
4174     """ Add ellipses to overly long strings """
4175     if s is None:
4176         return None
4177     ELLIPSES = '...'
4178     if len(s) > length:
4179         return s[:length - len(ELLIPSES)] + ELLIPSES
4180     return s
4181
4182
4183 def version_tuple(v):
4184     return tuple(int(e) for e in re.split(r'[-.]', v))
4185
4186
4187 def is_outdated_version(version, limit, assume_new=True):
4188     if not version:
4189         return not assume_new
4190     try:
4191         return version_tuple(version) < version_tuple(limit)
4192     except ValueError:
4193         return not assume_new
4194
4195
4196 def ytdl_is_updateable():
4197     """ Returns if youtube-dl can be updated with -U """
4198     from zipimport import zipimporter
4199
4200     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4201
4202
4203 def args_to_str(args):
4204     # Get a short string representation for a subprocess command
4205     return ' '.join(compat_shlex_quote(a) for a in args)
4206
4207
4208 def error_to_compat_str(err):
4209     err_str = str(err)
4210     # On python 2 error byte string must be decoded with proper
4211     # encoding rather than ascii
4212     if sys.version_info[0] < 3:
4213         err_str = err_str.decode(preferredencoding())
4214     return err_str
4215
4216
4217 def mimetype2ext(mt):
4218     if mt is None:
4219         return None
4220
4221     ext = {
4222         'audio/mp4': 'm4a',
4223         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4224         # it's the most popular one
4225         'audio/mpeg': 'mp3',
4226     }.get(mt)
4227     if ext is not None:
4228         return ext
4229
4230     _, _, res = mt.rpartition('/')
4231     res = res.split(';')[0].strip().lower()
4232
4233     return {
4234         '3gpp': '3gp',
4235         'smptett+xml': 'tt',
4236         'ttaf+xml': 'dfxp',
4237         'ttml+xml': 'ttml',
4238         'x-flv': 'flv',
4239         'x-mp4-fragmented': 'mp4',
4240         'x-ms-sami': 'sami',
4241         'x-ms-wmv': 'wmv',
4242         'mpegurl': 'm3u8',
4243         'x-mpegurl': 'm3u8',
4244         'vnd.apple.mpegurl': 'm3u8',
4245         'dash+xml': 'mpd',
4246         'f4m+xml': 'f4m',
4247         'hds+xml': 'f4m',
4248         'vnd.ms-sstr+xml': 'ism',
4249         'quicktime': 'mov',
4250         'mp2t': 'ts',
4251         'x-wav': 'wav',
4252     }.get(res, res)
4253
4254
4255 def parse_codecs(codecs_str):
4256     # http://tools.ietf.org/html/rfc6381
4257     if not codecs_str:
4258         return {}
4259     split_codecs = list(filter(None, map(
4260         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4261     vcodec, acodec = None, None
4262     for full_codec in split_codecs:
4263         codec = full_codec.split('.')[0]
4264         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4265             if not vcodec:
4266                 vcodec = full_codec
4267         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4268             if not acodec:
4269                 acodec = full_codec
4270         else:
4271             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4272     if not vcodec and not acodec:
4273         if len(split_codecs) == 2:
4274             return {
4275                 'vcodec': split_codecs[0],
4276                 'acodec': split_codecs[1],
4277             }
4278     else:
4279         return {
4280             'vcodec': vcodec or 'none',
4281             'acodec': acodec or 'none',
4282         }
4283     return {}
4284
4285
4286 def urlhandle_detect_ext(url_handle):
4287     getheader = url_handle.headers.get
4288
4289     cd = getheader('Content-Disposition')
4290     if cd:
4291         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4292         if m:
4293             e = determine_ext(m.group('filename'), default_ext=None)
4294             if e:
4295                 return e
4296
4297     return mimetype2ext(getheader('Content-Type'))
4298
4299
4300 def encode_data_uri(data, mime_type):
4301     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4302
4303
4304 def age_restricted(content_limit, age_limit):
4305     """ Returns True iff the content should be blocked """
4306
4307     if age_limit is None:  # No limit set
4308         return False
4309     if content_limit is None:
4310         return False  # Content available for everyone
4311     return age_limit < content_limit
4312
4313
4314 def is_html(first_bytes):
4315     """ Detect whether a file contains HTML by examining its first bytes. """
4316
4317     BOMS = [
4318         (b'\xef\xbb\xbf', 'utf-8'),
4319         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4320         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4321         (b'\xff\xfe', 'utf-16-le'),
4322         (b'\xfe\xff', 'utf-16-be'),
4323     ]
4324     for bom, enc in BOMS:
4325         if first_bytes.startswith(bom):
4326             s = first_bytes[len(bom):].decode(enc, 'replace')
4327             break
4328     else:
4329         s = first_bytes.decode('utf-8', 'replace')
4330
4331     return re.match(r'^\s*<', s)
4332
4333
4334 def determine_protocol(info_dict):
4335     protocol = info_dict.get('protocol')
4336     if protocol is not None:
4337         return protocol
4338
4339     url = info_dict['url']
4340     if url.startswith('rtmp'):
4341         return 'rtmp'
4342     elif url.startswith('mms'):
4343         return 'mms'
4344     elif url.startswith('rtsp'):
4345         return 'rtsp'
4346
4347     ext = determine_ext(url)
4348     if ext == 'm3u8':
4349         return 'm3u8'
4350     elif ext == 'f4m':
4351         return 'f4m'
4352
4353     return compat_urllib_parse_urlparse(url).scheme
4354
4355
4356 def render_table(header_row, data):
4357     """ Render a list of rows, each as a list of values """
4358     table = [header_row] + data
4359     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4360     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4361     return '\n'.join(format_str % tuple(row) for row in table)
4362
4363
4364 def _match_one(filter_part, dct):
4365     COMPARISON_OPERATORS = {
4366         '<': operator.lt,
4367         '<=': operator.le,
4368         '>': operator.gt,
4369         '>=': operator.ge,
4370         '=': operator.eq,
4371         '!=': operator.ne,
4372     }
4373     operator_rex = re.compile(r'''(?x)\s*
4374         (?P<key>[a-z_]+)
4375         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4376         (?:
4377             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4378             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4379             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4380         )
4381         \s*$
4382         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4383     m = operator_rex.search(filter_part)
4384     if m:
4385         op = COMPARISON_OPERATORS[m.group('op')]
4386         actual_value = dct.get(m.group('key'))
4387         if (m.group('quotedstrval') is not None
4388             or m.group('strval') is not None
4389             # If the original field is a string and matching comparisonvalue is
4390             # a number we should respect the origin of the original field
4391             # and process comparison value as a string (see
4392             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4393             or actual_value is not None and m.group('intval') is not None
4394                 and isinstance(actual_value, compat_str)):
4395             if m.group('op') not in ('=', '!='):
4396                 raise ValueError(
4397                     'Operator %s does not support string values!' % m.group('op'))
4398             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4399             quote = m.group('quote')
4400             if quote is not None:
4401                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4402         else:
4403             try:
4404                 comparison_value = int(m.group('intval'))
4405             except ValueError:
4406                 comparison_value = parse_filesize(m.group('intval'))
4407                 if comparison_value is None:
4408                     comparison_value = parse_filesize(m.group('intval') + 'B')
4409                 if comparison_value is None:
4410                     raise ValueError(
4411                         'Invalid integer value %r in filter part %r' % (
4412                             m.group('intval'), filter_part))
4413         if actual_value is None:
4414             return m.group('none_inclusive')
4415         return op(actual_value, comparison_value)
4416
4417     UNARY_OPERATORS = {
4418         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4419         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4420     }
4421     operator_rex = re.compile(r'''(?x)\s*
4422         (?P<op>%s)\s*(?P<key>[a-z_]+)
4423         \s*$
4424         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4425     m = operator_rex.search(filter_part)
4426     if m:
4427         op = UNARY_OPERATORS[m.group('op')]
4428         actual_value = dct.get(m.group('key'))
4429         return op(actual_value)
4430
4431     raise ValueError('Invalid filter part %r' % filter_part)
4432
4433
4434 def match_str(filter_str, dct):
4435     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4436
4437     return all(
4438         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4439
4440
4441 def match_filter_func(filter_str):
4442     def _match_func(info_dict):
4443         if match_str(filter_str, info_dict):
4444             return None
4445         else:
4446             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4447             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4448     return _match_func
4449
4450
4451 def parse_dfxp_time_expr(time_expr):
4452     if not time_expr:
4453         return
4454
4455     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4456     if mobj:
4457         return float(mobj.group('time_offset'))
4458
4459     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4460     if mobj:
4461         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4462
4463
4464 def srt_subtitles_timecode(seconds):
4465     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4466
4467
4468 def dfxp2srt(dfxp_data):
4469     '''
4470     @param dfxp_data A bytes-like object containing DFXP data
4471     @returns A unicode object containing converted SRT data
4472     '''
4473     LEGACY_NAMESPACES = (
4474         (b'http://www.w3.org/ns/ttml', [
4475             b'http://www.w3.org/2004/11/ttaf1',
4476             b'http://www.w3.org/2006/04/ttaf1',
4477             b'http://www.w3.org/2006/10/ttaf1',
4478         ]),
4479         (b'http://www.w3.org/ns/ttml#styling', [
4480             b'http://www.w3.org/ns/ttml#style',
4481         ]),
4482     )
4483
4484     SUPPORTED_STYLING = [
4485         'color',
4486         'fontFamily',
4487         'fontSize',
4488         'fontStyle',
4489         'fontWeight',
4490         'textDecoration'
4491     ]
4492
4493     _x = functools.partial(xpath_with_ns, ns_map={
4494         'xml': 'http://www.w3.org/XML/1998/namespace',
4495         'ttml': 'http://www.w3.org/ns/ttml',
4496         'tts': 'http://www.w3.org/ns/ttml#styling',
4497     })
4498
4499     styles = {}
4500     default_style = {}
4501
4502     class TTMLPElementParser(object):
4503         _out = ''
4504         _unclosed_elements = []
4505         _applied_styles = []
4506
4507         def start(self, tag, attrib):
4508             if tag in (_x('ttml:br'), 'br'):
4509                 self._out += '\n'
4510             else:
4511                 unclosed_elements = []
4512                 style = {}
4513                 element_style_id = attrib.get('style')
4514                 if default_style:
4515                     style.update(default_style)
4516                 if element_style_id:
4517                     style.update(styles.get(element_style_id, {}))
4518                 for prop in SUPPORTED_STYLING:
4519                     prop_val = attrib.get(_x('tts:' + prop))
4520                     if prop_val:
4521                         style[prop] = prop_val
4522                 if style:
4523                     font = ''
4524                     for k, v in sorted(style.items()):
4525                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4526                             continue
4527                         if k == 'color':
4528                             font += ' color="%s"' % v
4529                         elif k == 'fontSize':
4530                             font += ' size="%s"' % v
4531                         elif k == 'fontFamily':
4532                             font += ' face="%s"' % v
4533                         elif k == 'fontWeight' and v == 'bold':
4534                             self._out += '<b>'
4535                             unclosed_elements.append('b')
4536                         elif k == 'fontStyle' and v == 'italic':
4537                             self._out += '<i>'
4538                             unclosed_elements.append('i')
4539                         elif k == 'textDecoration' and v == 'underline':
4540                             self._out += '<u>'
4541                             unclosed_elements.append('u')
4542                     if font:
4543                         self._out += '<font' + font + '>'
4544                         unclosed_elements.append('font')
4545                     applied_style = {}
4546                     if self._applied_styles:
4547                         applied_style.update(self._applied_styles[-1])
4548                     applied_style.update(style)
4549                     self._applied_styles.append(applied_style)
4550                 self._unclosed_elements.append(unclosed_elements)
4551
4552         def end(self, tag):
4553             if tag not in (_x('ttml:br'), 'br'):
4554                 unclosed_elements = self._unclosed_elements.pop()
4555                 for element in reversed(unclosed_elements):
4556                     self._out += '</%s>' % element
4557                 if unclosed_elements and self._applied_styles:
4558                     self._applied_styles.pop()
4559
4560         def data(self, data):
4561             self._out += data
4562
4563         def close(self):
4564             return self._out.strip()
4565
4566     def parse_node(node):
4567         target = TTMLPElementParser()
4568         parser = xml.etree.ElementTree.XMLParser(target=target)
4569         parser.feed(xml.etree.ElementTree.tostring(node))
4570         return parser.close()
4571
4572     for k, v in LEGACY_NAMESPACES:
4573         for ns in v:
4574             dfxp_data = dfxp_data.replace(ns, k)
4575
4576     dfxp = compat_etree_fromstring(dfxp_data)
4577     out = []
4578     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4579
4580     if not paras:
4581         raise ValueError('Invalid dfxp/TTML subtitle')
4582
4583     repeat = False
4584     while True:
4585         for style in dfxp.findall(_x('.//ttml:style')):
4586             style_id = style.get('id') or style.get(_x('xml:id'))
4587             if not style_id:
4588                 continue
4589             parent_style_id = style.get('style')
4590             if parent_style_id:
4591                 if parent_style_id not in styles:
4592                     repeat = True
4593                     continue
4594                 styles[style_id] = styles[parent_style_id].copy()
4595             for prop in SUPPORTED_STYLING:
4596                 prop_val = style.get(_x('tts:' + prop))
4597                 if prop_val:
4598                     styles.setdefault(style_id, {})[prop] = prop_val
4599         if repeat:
4600             repeat = False
4601         else:
4602             break
4603
4604     for p in ('body', 'div'):
4605         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4606         if ele is None:
4607             continue
4608         style = styles.get(ele.get('style'))
4609         if not style:
4610             continue
4611         default_style.update(style)
4612
4613     for para, index in zip(paras, itertools.count(1)):
4614         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4615         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4616         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4617         if begin_time is None:
4618             continue
4619         if not end_time:
4620             if not dur:
4621                 continue
4622             end_time = begin_time + dur
4623         out.append('%d\n%s --> %s\n%s\n\n' % (
4624             index,
4625             srt_subtitles_timecode(begin_time),
4626             srt_subtitles_timecode(end_time),
4627             parse_node(para)))
4628
4629     return ''.join(out)
4630
4631
4632 def cli_option(params, command_option, param):
4633     param = params.get(param)
4634     if param:
4635         param = compat_str(param)
4636     return [command_option, param] if param is not None else []
4637
4638
4639 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4640     param = params.get(param)
4641     if param is None:
4642         return []
4643     assert isinstance(param, bool)
4644     if separator:
4645         return [command_option + separator + (true_value if param else false_value)]
4646     return [command_option, true_value if param else false_value]
4647
4648
4649 def cli_valueless_option(params, command_option, param, expected_value=True):
4650     param = params.get(param)
4651     return [command_option] if param == expected_value else []
4652
4653
4654 def cli_configuration_args(params, param, default=[]):
4655     ex_args = params.get(param)
4656     if ex_args is None:
4657         return default
4658     assert isinstance(ex_args, list)
4659     return ex_args
4660
4661
4662 class ISO639Utils(object):
4663     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4664     _lang_map = {
4665         'aa': 'aar',
4666         'ab': 'abk',
4667         'ae': 'ave',
4668         'af': 'afr',
4669         'ak': 'aka',
4670         'am': 'amh',
4671         'an': 'arg',
4672         'ar': 'ara',
4673         'as': 'asm',
4674         'av': 'ava',
4675         'ay': 'aym',
4676         'az': 'aze',
4677         'ba': 'bak',
4678         'be': 'bel',
4679         'bg': 'bul',
4680         'bh': 'bih',
4681         'bi': 'bis',
4682         'bm': 'bam',
4683         'bn': 'ben',
4684         'bo': 'bod',
4685         'br': 'bre',
4686         'bs': 'bos',
4687         'ca': 'cat',
4688         'ce': 'che',
4689         'ch': 'cha',
4690         'co': 'cos',
4691         'cr': 'cre',
4692         'cs': 'ces',
4693         'cu': 'chu',
4694         'cv': 'chv',
4695         'cy': 'cym',
4696         'da': 'dan',
4697         'de': 'deu',
4698         'dv': 'div',
4699         'dz': 'dzo',
4700         'ee': 'ewe',
4701         'el': 'ell',
4702         'en': 'eng',
4703         'eo': 'epo',
4704         'es': 'spa',
4705         'et': 'est',
4706         'eu': 'eus',
4707         'fa': 'fas',
4708         'ff': 'ful',
4709         'fi': 'fin',
4710         'fj': 'fij',
4711         'fo': 'fao',
4712         'fr': 'fra',
4713         'fy': 'fry',
4714         'ga': 'gle',
4715         'gd': 'gla',
4716         'gl': 'glg',
4717         'gn': 'grn',
4718         'gu': 'guj',
4719         'gv': 'glv',
4720         'ha': 'hau',
4721         'he': 'heb',
4722         'iw': 'heb',  # Replaced by he in 1989 revision
4723         'hi': 'hin',
4724         'ho': 'hmo',
4725         'hr': 'hrv',
4726         'ht': 'hat',
4727         'hu': 'hun',
4728         'hy': 'hye',
4729         'hz': 'her',
4730         'ia': 'ina',
4731         'id': 'ind',
4732         'in': 'ind',  # Replaced by id in 1989 revision
4733         'ie': 'ile',
4734         'ig': 'ibo',
4735         'ii': 'iii',
4736         'ik': 'ipk',
4737         'io': 'ido',
4738         'is': 'isl',
4739         'it': 'ita',
4740         'iu': 'iku',
4741         'ja': 'jpn',
4742         'jv': 'jav',
4743         'ka': 'kat',
4744         'kg': 'kon',
4745         'ki': 'kik',
4746         'kj': 'kua',
4747         'kk': 'kaz',
4748         'kl': 'kal',
4749         'km': 'khm',
4750         'kn': 'kan',
4751         'ko': 'kor',
4752         'kr': 'kau',
4753         'ks': 'kas',
4754         'ku': 'kur',
4755         'kv': 'kom',
4756         'kw': 'cor',
4757         'ky': 'kir',
4758         'la': 'lat',
4759         'lb': 'ltz',
4760         'lg': 'lug',
4761         'li': 'lim',
4762         'ln': 'lin',
4763         'lo': 'lao',
4764         'lt': 'lit',
4765         'lu': 'lub',
4766         'lv': 'lav',
4767         'mg': 'mlg',
4768         'mh': 'mah',
4769         'mi': 'mri',
4770         'mk': 'mkd',
4771         'ml': 'mal',
4772         'mn': 'mon',
4773         'mr': 'mar',
4774         'ms': 'msa',
4775         'mt': 'mlt',
4776         'my': 'mya',
4777         'na': 'nau',
4778         'nb': 'nob',
4779         'nd': 'nde',
4780         'ne': 'nep',
4781         'ng': 'ndo',
4782         'nl': 'nld',
4783         'nn': 'nno',
4784         'no': 'nor',
4785         'nr': 'nbl',
4786         'nv': 'nav',
4787         'ny': 'nya',
4788         'oc': 'oci',
4789         'oj': 'oji',
4790         'om': 'orm',
4791         'or': 'ori',
4792         'os': 'oss',
4793         'pa': 'pan',
4794         'pi': 'pli',
4795         'pl': 'pol',
4796         'ps': 'pus',
4797         'pt': 'por',
4798         'qu': 'que',
4799         'rm': 'roh',
4800         'rn': 'run',
4801         'ro': 'ron',
4802         'ru': 'rus',
4803         'rw': 'kin',
4804         'sa': 'san',
4805         'sc': 'srd',
4806         'sd': 'snd',
4807         'se': 'sme',
4808         'sg': 'sag',
4809         'si': 'sin',
4810         'sk': 'slk',
4811         'sl': 'slv',
4812         'sm': 'smo',
4813         'sn': 'sna',
4814         'so': 'som',
4815         'sq': 'sqi',
4816         'sr': 'srp',
4817         'ss': 'ssw',
4818         'st': 'sot',
4819         'su': 'sun',
4820         'sv': 'swe',
4821         'sw': 'swa',
4822         'ta': 'tam',
4823         'te': 'tel',
4824         'tg': 'tgk',
4825         'th': 'tha',
4826         'ti': 'tir',
4827         'tk': 'tuk',
4828         'tl': 'tgl',
4829         'tn': 'tsn',
4830         'to': 'ton',
4831         'tr': 'tur',
4832         'ts': 'tso',
4833         'tt': 'tat',
4834         'tw': 'twi',
4835         'ty': 'tah',
4836         'ug': 'uig',
4837         'uk': 'ukr',
4838         'ur': 'urd',
4839         'uz': 'uzb',
4840         've': 'ven',
4841         'vi': 'vie',
4842         'vo': 'vol',
4843         'wa': 'wln',
4844         'wo': 'wol',
4845         'xh': 'xho',
4846         'yi': 'yid',
4847         'ji': 'yid',  # Replaced by yi in 1989 revision
4848         'yo': 'yor',
4849         'za': 'zha',
4850         'zh': 'zho',
4851         'zu': 'zul',
4852     }
4853
4854     @classmethod
4855     def short2long(cls, code):
4856         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4857         return cls._lang_map.get(code[:2])
4858
4859     @classmethod
4860     def long2short(cls, code):
4861         """Convert language code from ISO 639-2/T to ISO 639-1"""
4862         for short_name, long_name in cls._lang_map.items():
4863             if long_name == code:
4864                 return short_name
4865
4866
4867 class ISO3166Utils(object):
4868     # From http://data.okfn.org/data/core/country-list
4869     _country_map = {
4870         'AF': 'Afghanistan',
4871         'AX': 'Åland Islands',
4872         'AL': 'Albania',
4873         'DZ': 'Algeria',
4874         'AS': 'American Samoa',
4875         'AD': 'Andorra',
4876         'AO': 'Angola',
4877         'AI': 'Anguilla',
4878         'AQ': 'Antarctica',
4879         'AG': 'Antigua and Barbuda',
4880         'AR': 'Argentina',
4881         'AM': 'Armenia',
4882         'AW': 'Aruba',
4883         'AU': 'Australia',
4884         'AT': 'Austria',
4885         'AZ': 'Azerbaijan',
4886         'BS': 'Bahamas',
4887         'BH': 'Bahrain',
4888         'BD': 'Bangladesh',
4889         'BB': 'Barbados',
4890         'BY': 'Belarus',
4891         'BE': 'Belgium',
4892         'BZ': 'Belize',
4893         'BJ': 'Benin',
4894         'BM': 'Bermuda',
4895         'BT': 'Bhutan',
4896         'BO': 'Bolivia, Plurinational State of',
4897         'BQ': 'Bonaire, Sint Eustatius and Saba',
4898         'BA': 'Bosnia and Herzegovina',
4899         'BW': 'Botswana',
4900         'BV': 'Bouvet Island',
4901         'BR': 'Brazil',
4902         'IO': 'British Indian Ocean Territory',
4903         'BN': 'Brunei Darussalam',
4904         'BG': 'Bulgaria',
4905         'BF': 'Burkina Faso',
4906         'BI': 'Burundi',
4907         'KH': 'Cambodia',
4908         'CM': 'Cameroon',
4909         'CA': 'Canada',
4910         'CV': 'Cape Verde',
4911         'KY': 'Cayman Islands',
4912         'CF': 'Central African Republic',
4913         'TD': 'Chad',
4914         'CL': 'Chile',
4915         'CN': 'China',
4916         'CX': 'Christmas Island',
4917         'CC': 'Cocos (Keeling) Islands',
4918         'CO': 'Colombia',
4919         'KM': 'Comoros',
4920         'CG': 'Congo',
4921         'CD': 'Congo, the Democratic Republic of the',
4922         'CK': 'Cook Islands',
4923         'CR': 'Costa Rica',
4924         'CI': 'Côte d\'Ivoire',
4925         'HR': 'Croatia',
4926         'CU': 'Cuba',
4927         'CW': 'Curaçao',
4928         'CY': 'Cyprus',
4929         'CZ': 'Czech Republic',
4930         'DK': 'Denmark',
4931         'DJ': 'Djibouti',
4932         'DM': 'Dominica',
4933         'DO': 'Dominican Republic',
4934         'EC': 'Ecuador',
4935         'EG': 'Egypt',
4936         'SV': 'El Salvador',
4937         'GQ': 'Equatorial Guinea',
4938         'ER': 'Eritrea',
4939         'EE': 'Estonia',
4940         'ET': 'Ethiopia',
4941         'FK': 'Falkland Islands (Malvinas)',
4942         'FO': 'Faroe Islands',
4943         'FJ': 'Fiji',
4944         'FI': 'Finland',
4945         'FR': 'France',
4946         'GF': 'French Guiana',
4947         'PF': 'French Polynesia',
4948         'TF': 'French Southern Territories',
4949         'GA': 'Gabon',
4950         'GM': 'Gambia',
4951         'GE': 'Georgia',
4952         'DE': 'Germany',
4953         'GH': 'Ghana',
4954         'GI': 'Gibraltar',
4955         'GR': 'Greece',
4956         'GL': 'Greenland',
4957         'GD': 'Grenada',
4958         'GP': 'Guadeloupe',
4959         'GU': 'Guam',
4960         'GT': 'Guatemala',
4961         'GG': 'Guernsey',
4962         'GN': 'Guinea',
4963         'GW': 'Guinea-Bissau',
4964         'GY': 'Guyana',
4965         'HT': 'Haiti',
4966         'HM': 'Heard Island and McDonald Islands',
4967         'VA': 'Holy See (Vatican City State)',
4968         'HN': 'Honduras',
4969         'HK': 'Hong Kong',
4970         'HU': 'Hungary',
4971         'IS': 'Iceland',
4972         'IN': 'India',
4973         'ID': 'Indonesia',
4974         'IR': 'Iran, Islamic Republic of',
4975         'IQ': 'Iraq',
4976         'IE': 'Ireland',
4977         'IM': 'Isle of Man',
4978         'IL': 'Israel',
4979         'IT': 'Italy',
4980         'JM': 'Jamaica',
4981         'JP': 'Japan',
4982         'JE': 'Jersey',
4983         'JO': 'Jordan',
4984         'KZ': 'Kazakhstan',
4985         'KE': 'Kenya',
4986         'KI': 'Kiribati',
4987         'KP': 'Korea, Democratic People\'s Republic of',
4988         'KR': 'Korea, Republic of',
4989         'KW': 'Kuwait',
4990         'KG': 'Kyrgyzstan',
4991         'LA': 'Lao People\'s Democratic Republic',
4992         'LV': 'Latvia',
4993         'LB': 'Lebanon',
4994         'LS': 'Lesotho',
4995         'LR': 'Liberia',
4996         'LY': 'Libya',
4997         'LI': 'Liechtenstein',
4998         'LT': 'Lithuania',
4999         'LU': 'Luxembourg',
5000         'MO': 'Macao',
5001         'MK': 'Macedonia, the Former Yugoslav Republic of',
5002         'MG': 'Madagascar',
5003         'MW': 'Malawi',
5004         'MY': 'Malaysia',
5005         'MV': 'Maldives',
5006         'ML': 'Mali',
5007         'MT': 'Malta',
5008         'MH': 'Marshall Islands',
5009         'MQ': 'Martinique',
5010         'MR': 'Mauritania',
5011         'MU': 'Mauritius',
5012         'YT': 'Mayotte',
5013         'MX': 'Mexico',
5014         'FM': 'Micronesia, Federated States of',
5015         'MD': 'Moldova, Republic of',
5016         'MC': 'Monaco',
5017         'MN': 'Mongolia',
5018         'ME': 'Montenegro',
5019         'MS': 'Montserrat',
5020         'MA': 'Morocco',
5021         'MZ': 'Mozambique',
5022         'MM': 'Myanmar',
5023         'NA': 'Namibia',
5024         'NR': 'Nauru',
5025         'NP': 'Nepal',
5026         'NL': 'Netherlands',
5027         'NC': 'New Caledonia',
5028         'NZ': 'New Zealand',
5029         'NI': 'Nicaragua',
5030         'NE': 'Niger',
5031         'NG': 'Nigeria',
5032         'NU': 'Niue',
5033         'NF': 'Norfolk Island',
5034         'MP': 'Northern Mariana Islands',
5035         'NO': 'Norway',
5036         'OM': 'Oman',
5037         'PK': 'Pakistan',
5038         'PW': 'Palau',
5039         'PS': 'Palestine, State of',
5040         'PA': 'Panama',
5041         'PG': 'Papua New Guinea',
5042         'PY': 'Paraguay',
5043         'PE': 'Peru',
5044         'PH': 'Philippines',
5045         'PN': 'Pitcairn',
5046         'PL': 'Poland',
5047         'PT': 'Portugal',
5048         'PR': 'Puerto Rico',
5049         'QA': 'Qatar',
5050         'RE': 'Réunion',
5051         'RO': 'Romania',
5052         'RU': 'Russian Federation',
5053         'RW': 'Rwanda',
5054         'BL': 'Saint Barthélemy',
5055         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5056         'KN': 'Saint Kitts and Nevis',
5057         'LC': 'Saint Lucia',
5058         'MF': 'Saint Martin (French part)',
5059         'PM': 'Saint Pierre and Miquelon',
5060         'VC': 'Saint Vincent and the Grenadines',
5061         'WS': 'Samoa',
5062         'SM': 'San Marino',
5063         'ST': 'Sao Tome and Principe',
5064         'SA': 'Saudi Arabia',
5065         'SN': 'Senegal',
5066         'RS': 'Serbia',
5067         'SC': 'Seychelles',
5068         'SL': 'Sierra Leone',
5069         'SG': 'Singapore',
5070         'SX': 'Sint Maarten (Dutch part)',
5071         'SK': 'Slovakia',
5072         'SI': 'Slovenia',
5073         'SB': 'Solomon Islands',
5074         'SO': 'Somalia',
5075         'ZA': 'South Africa',
5076         'GS': 'South Georgia and the South Sandwich Islands',
5077         'SS': 'South Sudan',
5078         'ES': 'Spain',
5079         'LK': 'Sri Lanka',
5080         'SD': 'Sudan',
5081         'SR': 'Suriname',
5082         'SJ': 'Svalbard and Jan Mayen',
5083         'SZ': 'Swaziland',
5084         'SE': 'Sweden',
5085         'CH': 'Switzerland',
5086         'SY': 'Syrian Arab Republic',
5087         'TW': 'Taiwan, Province of China',
5088         'TJ': 'Tajikistan',
5089         'TZ': 'Tanzania, United Republic of',
5090         'TH': 'Thailand',
5091         'TL': 'Timor-Leste',
5092         'TG': 'Togo',
5093         'TK': 'Tokelau',
5094         'TO': 'Tonga',
5095         'TT': 'Trinidad and Tobago',
5096         'TN': 'Tunisia',
5097         'TR': 'Turkey',
5098         'TM': 'Turkmenistan',
5099         'TC': 'Turks and Caicos Islands',
5100         'TV': 'Tuvalu',
5101         'UG': 'Uganda',
5102         'UA': 'Ukraine',
5103         'AE': 'United Arab Emirates',
5104         'GB': 'United Kingdom',
5105         'US': 'United States',
5106         'UM': 'United States Minor Outlying Islands',
5107         'UY': 'Uruguay',
5108         'UZ': 'Uzbekistan',
5109         'VU': 'Vanuatu',
5110         'VE': 'Venezuela, Bolivarian Republic of',
5111         'VN': 'Viet Nam',
5112         'VG': 'Virgin Islands, British',
5113         'VI': 'Virgin Islands, U.S.',
5114         'WF': 'Wallis and Futuna',
5115         'EH': 'Western Sahara',
5116         'YE': 'Yemen',
5117         'ZM': 'Zambia',
5118         'ZW': 'Zimbabwe',
5119     }
5120
5121     @classmethod
5122     def short2full(cls, code):
5123         """Convert an ISO 3166-2 country code to the corresponding full name"""
5124         return cls._country_map.get(code.upper())
5125
5126
5127 class GeoUtils(object):
5128     # Major IPv4 address blocks per country
5129     _country_ip_map = {
5130         'AD': '46.172.224.0/19',
5131         'AE': '94.200.0.0/13',
5132         'AF': '149.54.0.0/17',
5133         'AG': '209.59.64.0/18',
5134         'AI': '204.14.248.0/21',
5135         'AL': '46.99.0.0/16',
5136         'AM': '46.70.0.0/15',
5137         'AO': '105.168.0.0/13',
5138         'AP': '182.50.184.0/21',
5139         'AQ': '23.154.160.0/24',
5140         'AR': '181.0.0.0/12',
5141         'AS': '202.70.112.0/20',
5142         'AT': '77.116.0.0/14',
5143         'AU': '1.128.0.0/11',
5144         'AW': '181.41.0.0/18',
5145         'AX': '185.217.4.0/22',
5146         'AZ': '5.197.0.0/16',
5147         'BA': '31.176.128.0/17',
5148         'BB': '65.48.128.0/17',
5149         'BD': '114.130.0.0/16',
5150         'BE': '57.0.0.0/8',
5151         'BF': '102.178.0.0/15',
5152         'BG': '95.42.0.0/15',
5153         'BH': '37.131.0.0/17',
5154         'BI': '154.117.192.0/18',
5155         'BJ': '137.255.0.0/16',
5156         'BL': '185.212.72.0/23',
5157         'BM': '196.12.64.0/18',
5158         'BN': '156.31.0.0/16',
5159         'BO': '161.56.0.0/16',
5160         'BQ': '161.0.80.0/20',
5161         'BR': '191.128.0.0/12',
5162         'BS': '24.51.64.0/18',
5163         'BT': '119.2.96.0/19',
5164         'BW': '168.167.0.0/16',
5165         'BY': '178.120.0.0/13',
5166         'BZ': '179.42.192.0/18',
5167         'CA': '99.224.0.0/11',
5168         'CD': '41.243.0.0/16',
5169         'CF': '197.242.176.0/21',
5170         'CG': '160.113.0.0/16',
5171         'CH': '85.0.0.0/13',
5172         'CI': '102.136.0.0/14',
5173         'CK': '202.65.32.0/19',
5174         'CL': '152.172.0.0/14',
5175         'CM': '102.244.0.0/14',
5176         'CN': '36.128.0.0/10',
5177         'CO': '181.240.0.0/12',
5178         'CR': '201.192.0.0/12',
5179         'CU': '152.206.0.0/15',
5180         'CV': '165.90.96.0/19',
5181         'CW': '190.88.128.0/17',
5182         'CY': '31.153.0.0/16',
5183         'CZ': '88.100.0.0/14',
5184         'DE': '53.0.0.0/8',
5185         'DJ': '197.241.0.0/17',
5186         'DK': '87.48.0.0/12',
5187         'DM': '192.243.48.0/20',
5188         'DO': '152.166.0.0/15',
5189         'DZ': '41.96.0.0/12',
5190         'EC': '186.68.0.0/15',
5191         'EE': '90.190.0.0/15',
5192         'EG': '156.160.0.0/11',
5193         'ER': '196.200.96.0/20',
5194         'ES': '88.0.0.0/11',
5195         'ET': '196.188.0.0/14',
5196         'EU': '2.16.0.0/13',
5197         'FI': '91.152.0.0/13',
5198         'FJ': '144.120.0.0/16',
5199         'FK': '80.73.208.0/21',
5200         'FM': '119.252.112.0/20',
5201         'FO': '88.85.32.0/19',
5202         'FR': '90.0.0.0/9',
5203         'GA': '41.158.0.0/15',
5204         'GB': '25.0.0.0/8',
5205         'GD': '74.122.88.0/21',
5206         'GE': '31.146.0.0/16',
5207         'GF': '161.22.64.0/18',
5208         'GG': '62.68.160.0/19',
5209         'GH': '154.160.0.0/12',
5210         'GI': '95.164.0.0/16',
5211         'GL': '88.83.0.0/19',
5212         'GM': '160.182.0.0/15',
5213         'GN': '197.149.192.0/18',
5214         'GP': '104.250.0.0/19',
5215         'GQ': '105.235.224.0/20',
5216         'GR': '94.64.0.0/13',
5217         'GT': '168.234.0.0/16',
5218         'GU': '168.123.0.0/16',
5219         'GW': '197.214.80.0/20',
5220         'GY': '181.41.64.0/18',
5221         'HK': '113.252.0.0/14',
5222         'HN': '181.210.0.0/16',
5223         'HR': '93.136.0.0/13',
5224         'HT': '148.102.128.0/17',
5225         'HU': '84.0.0.0/14',
5226         'ID': '39.192.0.0/10',
5227         'IE': '87.32.0.0/12',
5228         'IL': '79.176.0.0/13',
5229         'IM': '5.62.80.0/20',
5230         'IN': '117.192.0.0/10',
5231         'IO': '203.83.48.0/21',
5232         'IQ': '37.236.0.0/14',
5233         'IR': '2.176.0.0/12',
5234         'IS': '82.221.0.0/16',
5235         'IT': '79.0.0.0/10',
5236         'JE': '87.244.64.0/18',
5237         'JM': '72.27.0.0/17',
5238         'JO': '176.29.0.0/16',
5239         'JP': '133.0.0.0/8',
5240         'KE': '105.48.0.0/12',
5241         'KG': '158.181.128.0/17',
5242         'KH': '36.37.128.0/17',
5243         'KI': '103.25.140.0/22',
5244         'KM': '197.255.224.0/20',
5245         'KN': '198.167.192.0/19',
5246         'KP': '175.45.176.0/22',
5247         'KR': '175.192.0.0/10',
5248         'KW': '37.36.0.0/14',
5249         'KY': '64.96.0.0/15',
5250         'KZ': '2.72.0.0/13',
5251         'LA': '115.84.64.0/18',
5252         'LB': '178.135.0.0/16',
5253         'LC': '24.92.144.0/20',
5254         'LI': '82.117.0.0/19',
5255         'LK': '112.134.0.0/15',
5256         'LR': '102.183.0.0/16',
5257         'LS': '129.232.0.0/17',
5258         'LT': '78.56.0.0/13',
5259         'LU': '188.42.0.0/16',
5260         'LV': '46.109.0.0/16',
5261         'LY': '41.252.0.0/14',
5262         'MA': '105.128.0.0/11',
5263         'MC': '88.209.64.0/18',
5264         'MD': '37.246.0.0/16',
5265         'ME': '178.175.0.0/17',
5266         'MF': '74.112.232.0/21',
5267         'MG': '154.126.0.0/17',
5268         'MH': '117.103.88.0/21',
5269         'MK': '77.28.0.0/15',
5270         'ML': '154.118.128.0/18',
5271         'MM': '37.111.0.0/17',
5272         'MN': '49.0.128.0/17',
5273         'MO': '60.246.0.0/16',
5274         'MP': '202.88.64.0/20',
5275         'MQ': '109.203.224.0/19',
5276         'MR': '41.188.64.0/18',
5277         'MS': '208.90.112.0/22',
5278         'MT': '46.11.0.0/16',
5279         'MU': '105.16.0.0/12',
5280         'MV': '27.114.128.0/18',
5281         'MW': '102.70.0.0/15',
5282         'MX': '187.192.0.0/11',
5283         'MY': '175.136.0.0/13',
5284         'MZ': '197.218.0.0/15',
5285         'NA': '41.182.0.0/16',
5286         'NC': '101.101.0.0/18',
5287         'NE': '197.214.0.0/18',
5288         'NF': '203.17.240.0/22',
5289         'NG': '105.112.0.0/12',
5290         'NI': '186.76.0.0/15',
5291         'NL': '145.96.0.0/11',
5292         'NO': '84.208.0.0/13',
5293         'NP': '36.252.0.0/15',
5294         'NR': '203.98.224.0/19',
5295         'NU': '49.156.48.0/22',
5296         'NZ': '49.224.0.0/14',
5297         'OM': '5.36.0.0/15',
5298         'PA': '186.72.0.0/15',
5299         'PE': '186.160.0.0/14',
5300         'PF': '123.50.64.0/18',
5301         'PG': '124.240.192.0/19',
5302         'PH': '49.144.0.0/13',
5303         'PK': '39.32.0.0/11',
5304         'PL': '83.0.0.0/11',
5305         'PM': '70.36.0.0/20',
5306         'PR': '66.50.0.0/16',
5307         'PS': '188.161.0.0/16',
5308         'PT': '85.240.0.0/13',
5309         'PW': '202.124.224.0/20',
5310         'PY': '181.120.0.0/14',
5311         'QA': '37.210.0.0/15',
5312         'RE': '102.35.0.0/16',
5313         'RO': '79.112.0.0/13',
5314         'RS': '93.86.0.0/15',
5315         'RU': '5.136.0.0/13',
5316         'RW': '41.186.0.0/16',
5317         'SA': '188.48.0.0/13',
5318         'SB': '202.1.160.0/19',
5319         'SC': '154.192.0.0/11',
5320         'SD': '102.120.0.0/13',
5321         'SE': '78.64.0.0/12',
5322         'SG': '8.128.0.0/10',
5323         'SI': '188.196.0.0/14',
5324         'SK': '78.98.0.0/15',
5325         'SL': '102.143.0.0/17',
5326         'SM': '89.186.32.0/19',
5327         'SN': '41.82.0.0/15',
5328         'SO': '154.115.192.0/18',
5329         'SR': '186.179.128.0/17',
5330         'SS': '105.235.208.0/21',
5331         'ST': '197.159.160.0/19',
5332         'SV': '168.243.0.0/16',
5333         'SX': '190.102.0.0/20',
5334         'SY': '5.0.0.0/16',
5335         'SZ': '41.84.224.0/19',
5336         'TC': '65.255.48.0/20',
5337         'TD': '154.68.128.0/19',
5338         'TG': '196.168.0.0/14',
5339         'TH': '171.96.0.0/13',
5340         'TJ': '85.9.128.0/18',
5341         'TK': '27.96.24.0/21',
5342         'TL': '180.189.160.0/20',
5343         'TM': '95.85.96.0/19',
5344         'TN': '197.0.0.0/11',
5345         'TO': '175.176.144.0/21',
5346         'TR': '78.160.0.0/11',
5347         'TT': '186.44.0.0/15',
5348         'TV': '202.2.96.0/19',
5349         'TW': '120.96.0.0/11',
5350         'TZ': '156.156.0.0/14',
5351         'UA': '37.52.0.0/14',
5352         'UG': '102.80.0.0/13',
5353         'US': '6.0.0.0/8',
5354         'UY': '167.56.0.0/13',
5355         'UZ': '84.54.64.0/18',
5356         'VA': '212.77.0.0/19',
5357         'VC': '207.191.240.0/21',
5358         'VE': '186.88.0.0/13',
5359         'VG': '66.81.192.0/20',
5360         'VI': '146.226.0.0/16',
5361         'VN': '14.160.0.0/11',
5362         'VU': '202.80.32.0/20',
5363         'WF': '117.20.32.0/21',
5364         'WS': '202.4.32.0/19',
5365         'YE': '134.35.0.0/16',
5366         'YT': '41.242.116.0/22',
5367         'ZA': '41.0.0.0/11',
5368         'ZM': '102.144.0.0/13',
5369         'ZW': '102.177.192.0/18',
5370     }
5371
5372     @classmethod
5373     def random_ipv4(cls, code_or_block):
5374         if len(code_or_block) == 2:
5375             block = cls._country_ip_map.get(code_or_block.upper())
5376             if not block:
5377                 return None
5378         else:
5379             block = code_or_block
5380         addr, preflen = block.split('/')
5381         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5382         addr_max = addr_min | (0xffffffff >> int(preflen))
5383         return compat_str(socket.inet_ntoa(
5384             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5385
5386
5387 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5388     def __init__(self, proxies=None):
5389         # Set default handlers
5390         for type in ('http', 'https'):
5391             setattr(self, '%s_open' % type,
5392                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5393                         meth(r, proxy, type))
5394         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5395
5396     def proxy_open(self, req, proxy, type):
5397         req_proxy = req.headers.get('Ytdl-request-proxy')
5398         if req_proxy is not None:
5399             proxy = req_proxy
5400             del req.headers['Ytdl-request-proxy']
5401
5402         if proxy == '__noproxy__':
5403             return None  # No Proxy
5404         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5405             req.add_header('Ytdl-socks-proxy', proxy)
5406             # youtube-dl's http/https handlers do wrapping the socket with socks
5407             return None
5408         return compat_urllib_request.ProxyHandler.proxy_open(
5409             self, req, proxy, type)
5410
5411
5412 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5413 # released into Public Domain
5414 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5415
5416 def long_to_bytes(n, blocksize=0):
5417     """long_to_bytes(n:long, blocksize:int) : string
5418     Convert a long integer to a byte string.
5419
5420     If optional blocksize is given and greater than zero, pad the front of the
5421     byte string with binary zeros so that the length is a multiple of
5422     blocksize.
5423     """
5424     # after much testing, this algorithm was deemed to be the fastest
5425     s = b''
5426     n = int(n)
5427     while n > 0:
5428         s = compat_struct_pack('>I', n & 0xffffffff) + s
5429         n = n >> 32
5430     # strip off leading zeros
5431     for i in range(len(s)):
5432         if s[i] != b'\000'[0]:
5433             break
5434     else:
5435         # only happens when n == 0
5436         s = b'\000'
5437         i = 0
5438     s = s[i:]
5439     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5440     # de-padding being done above, but sigh...
5441     if blocksize > 0 and len(s) % blocksize:
5442         s = (blocksize - len(s) % blocksize) * b'\000' + s
5443     return s
5444
5445
5446 def bytes_to_long(s):
5447     """bytes_to_long(string) : long
5448     Convert a byte string to a long integer.
5449
5450     This is (essentially) the inverse of long_to_bytes().
5451     """
5452     acc = 0
5453     length = len(s)
5454     if length % 4:
5455         extra = (4 - length % 4)
5456         s = b'\000' * extra + s
5457         length = length + extra
5458     for i in range(0, length, 4):
5459         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5460     return acc
5461
5462
5463 def ohdave_rsa_encrypt(data, exponent, modulus):
5464     '''
5465     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5466
5467     Input:
5468         data: data to encrypt, bytes-like object
5469         exponent, modulus: parameter e and N of RSA algorithm, both integer
5470     Output: hex string of encrypted data
5471
5472     Limitation: supports one block encryption only
5473     '''
5474
5475     payload = int(binascii.hexlify(data[::-1]), 16)
5476     encrypted = pow(payload, exponent, modulus)
5477     return '%x' % encrypted
5478
5479
5480 def pkcs1pad(data, length):
5481     """
5482     Padding input data with PKCS#1 scheme
5483
5484     @param {int[]} data        input data
5485     @param {int}   length      target length
5486     @returns {int[]}           padded data
5487     """
5488     if len(data) > length - 11:
5489         raise ValueError('Input data too long for PKCS#1 padding')
5490
5491     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5492     return [0, 2] + pseudo_random + [0] + data
5493
5494
5495 def encode_base_n(num, n, table=None):
5496     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5497     if not table:
5498         table = FULL_TABLE[:n]
5499
5500     if n > len(table):
5501         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5502
5503     if num == 0:
5504         return table[0]
5505
5506     ret = ''
5507     while num:
5508         ret = table[num % n] + ret
5509         num = num // n
5510     return ret
5511
5512
5513 def decode_packed_codes(code):
5514     mobj = re.search(PACKED_CODES_RE, code)
5515     obfuscated_code, base, count, symbols = mobj.groups()
5516     base = int(base)
5517     count = int(count)
5518     symbols = symbols.split('|')
5519     symbol_table = {}
5520
5521     while count:
5522         count -= 1
5523         base_n_count = encode_base_n(count, base)
5524         symbol_table[base_n_count] = symbols[count] or base_n_count
5525
5526     return re.sub(
5527         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5528         obfuscated_code)
5529
5530
5531 def caesar(s, alphabet, shift):
5532     if shift == 0:
5533         return s
5534     l = len(alphabet)
5535     return ''.join(
5536         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5537         for c in s)
5538
5539
5540 def rot47(s):
5541     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5542
5543
5544 def parse_m3u8_attributes(attrib):
5545     info = {}
5546     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5547         if val.startswith('"'):
5548             val = val[1:-1]
5549         info[key] = val
5550     return info
5551
5552
5553 def urshift(val, n):
5554     return val >> n if val >= 0 else (val + 0x100000000) >> n
5555
5556
5557 # Based on png2str() written by @gdkchan and improved by @yokrysty
5558 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5559 def decode_png(png_data):
5560     # Reference: https://www.w3.org/TR/PNG/
5561     header = png_data[8:]
5562
5563     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5564         raise IOError('Not a valid PNG file.')
5565
5566     int_map = {1: '>B', 2: '>H', 4: '>I'}
5567     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5568
5569     chunks = []
5570
5571     while header:
5572         length = unpack_integer(header[:4])
5573         header = header[4:]
5574
5575         chunk_type = header[:4]
5576         header = header[4:]
5577
5578         chunk_data = header[:length]
5579         header = header[length:]
5580
5581         header = header[4:]  # Skip CRC
5582
5583         chunks.append({
5584             'type': chunk_type,
5585             'length': length,
5586             'data': chunk_data
5587         })
5588
5589     ihdr = chunks[0]['data']
5590
5591     width = unpack_integer(ihdr[:4])
5592     height = unpack_integer(ihdr[4:8])
5593
5594     idat = b''
5595
5596     for chunk in chunks:
5597         if chunk['type'] == b'IDAT':
5598             idat += chunk['data']
5599
5600     if not idat:
5601         raise IOError('Unable to read PNG data.')
5602
5603     decompressed_data = bytearray(zlib.decompress(idat))
5604
5605     stride = width * 3
5606     pixels = []
5607
5608     def _get_pixel(idx):
5609         x = idx % stride
5610         y = idx // stride
5611         return pixels[y][x]
5612
5613     for y in range(height):
5614         basePos = y * (1 + stride)
5615         filter_type = decompressed_data[basePos]
5616
5617         current_row = []
5618
5619         pixels.append(current_row)
5620
5621         for x in range(stride):
5622             color = decompressed_data[1 + basePos + x]
5623             basex = y * stride + x
5624             left = 0
5625             up = 0
5626
5627             if x > 2:
5628                 left = _get_pixel(basex - 3)
5629             if y > 0:
5630                 up = _get_pixel(basex - stride)
5631
5632             if filter_type == 1:  # Sub
5633                 color = (color + left) & 0xff
5634             elif filter_type == 2:  # Up
5635                 color = (color + up) & 0xff
5636             elif filter_type == 3:  # Average
5637                 color = (color + ((left + up) >> 1)) & 0xff
5638             elif filter_type == 4:  # Paeth
5639                 a = left
5640                 b = up
5641                 c = 0
5642
5643                 if x > 2 and y > 0:
5644                     c = _get_pixel(basex - stride - 3)
5645
5646                 p = a + b - c
5647
5648                 pa = abs(p - a)
5649                 pb = abs(p - b)
5650                 pc = abs(p - c)
5651
5652                 if pa <= pb and pa <= pc:
5653                     color = (color + a) & 0xff
5654                 elif pb <= pc:
5655                     color = (color + b) & 0xff
5656                 else:
5657                     color = (color + c) & 0xff
5658
5659             current_row.append(color)
5660
5661     return width, height, pixels
5662
5663
5664 def write_xattr(path, key, value):
5665     # This mess below finds the best xattr tool for the job
5666     try:
5667         # try the pyxattr module...
5668         import xattr
5669
5670         if hasattr(xattr, 'set'):  # pyxattr
5671             # Unicode arguments are not supported in python-pyxattr until
5672             # version 0.5.0
5673             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5674             pyxattr_required_version = '0.5.0'
5675             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5676                 # TODO: fallback to CLI tools
5677                 raise XAttrUnavailableError(
5678                     'python-pyxattr is detected but is too old. '
5679                     'youtube-dl requires %s or above while your version is %s. '
5680                     'Falling back to other xattr implementations' % (
5681                         pyxattr_required_version, xattr.__version__))
5682
5683             setxattr = xattr.set
5684         else:  # xattr
5685             setxattr = xattr.setxattr
5686
5687         try:
5688             setxattr(path, key, value)
5689         except EnvironmentError as e:
5690             raise XAttrMetadataError(e.errno, e.strerror)
5691
5692     except ImportError:
5693         if compat_os_name == 'nt':
5694             # Write xattrs to NTFS Alternate Data Streams:
5695             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5696             assert ':' not in key
5697             assert os.path.exists(path)
5698
5699             ads_fn = path + ':' + key
5700             try:
5701                 with open(ads_fn, 'wb') as f:
5702                     f.write(value)
5703             except EnvironmentError as e:
5704                 raise XAttrMetadataError(e.errno, e.strerror)
5705         else:
5706             user_has_setfattr = check_executable('setfattr', ['--version'])
5707             user_has_xattr = check_executable('xattr', ['-h'])
5708
5709             if user_has_setfattr or user_has_xattr:
5710
5711                 value = value.decode('utf-8')
5712                 if user_has_setfattr:
5713                     executable = 'setfattr'
5714                     opts = ['-n', key, '-v', value]
5715                 elif user_has_xattr:
5716                     executable = 'xattr'
5717                     opts = ['-w', key, value]
5718
5719                 cmd = ([encodeFilename(executable, True)]
5720                        + [encodeArgument(o) for o in opts]
5721                        + [encodeFilename(path, True)])
5722
5723                 try:
5724                     p = subprocess.Popen(
5725                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5726                 except EnvironmentError as e:
5727                     raise XAttrMetadataError(e.errno, e.strerror)
5728                 stdout, stderr = p.communicate()
5729                 stderr = stderr.decode('utf-8', 'replace')
5730                 if p.returncode != 0:
5731                     raise XAttrMetadataError(p.returncode, stderr)
5732
5733             else:
5734                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5735                 if sys.platform.startswith('linux'):
5736                     raise XAttrUnavailableError(
5737                         "Couldn't find a tool to set the xattrs. "
5738                         "Install either the python 'pyxattr' or 'xattr' "
5739                         "modules, or the GNU 'attr' package "
5740                         "(which contains the 'setfattr' tool).")
5741                 else:
5742                     raise XAttrUnavailableError(
5743                         "Couldn't find a tool to set the xattrs. "
5744                         "Install either the python 'xattr' module, "
5745                         "or the 'xattr' binary.")
5746
5747
5748 def random_birthday(year_field, month_field, day_field):
5749     start_date = datetime.date(1950, 1, 1)
5750     end_date = datetime.date(1995, 12, 31)
5751     offset = random.randint(0, (end_date - start_date).days)
5752     random_date = start_date + datetime.timedelta(offset)
5753     return {
5754         year_field: str(random_date.year),
5755         month_field: str(random_date.month),
5756         day_field: str(random_date.day),
5757     }
5758
5759
5760 def clean_podcast_url(url):
5761     return re.sub(r'''(?x)
5762         (?:
5763             (?:
5764                 chtbl\.com/track|
5765                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5766                 play\.podtrac\.com
5767             )/[^/]+|
5768             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5769             flex\.acast\.com|
5770             pd(?:
5771                 cn\.co| # https://podcorn.com/analytics-prefix/
5772                 st\.fm # https://podsights.com/docs/
5773             )/e
5774         )/''', '', url)