Skip to content

Commit 15c3e32

Browse files
Merge branch 'yt-dlp:master' into master
2 parents 55a9c97 + 8820101 commit 15c3e32

File tree

9 files changed

+83
-35
lines changed

9 files changed

+83
-35
lines changed

yt_dlp/YoutubeDL.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
SSLError,
5353
network_exceptions,
5454
)
55-
from .networking.impersonate import ImpersonateRequestHandler
55+
from .networking.impersonate import ImpersonateRequestHandler, ImpersonateTarget
5656
from .plugins import directories as plugin_directories, load_all_plugins
5757
from .postprocessor import (
5858
EmbedThumbnailPP,
@@ -3231,6 +3231,16 @@ def dl(self, name, info, subtitle=False, test=False):
32313231
}
32323232
else:
32333233
params = self.params
3234+
3235+
impersonate = info.pop('impersonate', None)
3236+
# Do not override --impersonate with extractor-specified impersonation
3237+
if params.get('impersonate') is None:
3238+
available_target, requested_targets = self._parse_impersonate_targets(impersonate)
3239+
if available_target:
3240+
info['impersonate'] = available_target
3241+
elif requested_targets:
3242+
self.report_warning(self._unavailable_targets_message(requested_targets), only_once=True)
3243+
32343244
fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
32353245
if not test:
32363246
for ph in self._progress_hooks:
@@ -4185,6 +4195,31 @@ def _impersonate_target_available(self, target):
41854195
for rh in self._request_director.handlers.values()
41864196
if isinstance(rh, ImpersonateRequestHandler))
41874197

4198+
def _parse_impersonate_targets(self, impersonate):
4199+
if impersonate in (True, ''):
4200+
impersonate = ImpersonateTarget()
4201+
4202+
requested_targets = [
4203+
t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
4204+
for t in variadic(impersonate)
4205+
] if impersonate else []
4206+
4207+
available_target = next(filter(self._impersonate_target_available, requested_targets), None)
4208+
4209+
return available_target, requested_targets
4210+
4211+
@staticmethod
4212+
def _unavailable_targets_message(requested_targets, note=None, is_error=False):
4213+
note = note or 'The extractor specified to use impersonation for this download'
4214+
specific_targets = ', '.join(filter(None, map(str, requested_targets)))
4215+
message = (
4216+
'no impersonate target is available' if not specific_targets
4217+
else f'none of these impersonate targets are available: {specific_targets}')
4218+
return (
4219+
f'{note}, but {message}. {"See" if is_error else "If you encounter errors, then see"}'
4220+
f' https://github.com/yt-dlp/yt-dlp#impersonation '
4221+
f'for information on installing the required dependencies')
4222+
41884223
def urlopen(self, req):
41894224
""" Start an HTTP download """
41904225
if isinstance(req, str):

yt_dlp/downloader/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
9999
if external_downloader is None:
100100
if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params):
101101
return FFmpegFD
102-
elif external_downloader.lower() != 'native':
102+
elif external_downloader.lower() != 'native' and info_dict.get('impersonate') is None:
103103
ed = get_external_downloader(external_downloader)
104104
if ed.can_download(info_dict, external_downloader):
105105
return ed

yt_dlp/downloader/http.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ class HttpFD(FileDownloader):
2727
def real_download(self, filename, info_dict):
2828
url = info_dict['url']
2929
request_data = info_dict.get('request_data', None)
30+
request_extensions = {}
31+
if info_dict.get('impersonate') is not None:
32+
request_extensions['impersonate'] = info_dict['impersonate']
3033

3134
class DownloadContext(dict):
3235
__getattr__ = dict.get
@@ -109,7 +112,7 @@ def establish_connection():
109112
if try_call(lambda: range_end >= ctx.content_len):
110113
range_end = ctx.content_len - 1
111114

112-
request = Request(url, request_data, headers)
115+
request = Request(url, request_data, headers, extensions=request_extensions)
113116
has_range = range_start is not None
114117
if has_range:
115118
request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'

yt_dlp/extractor/bandcamp.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from ..utils import (
88
KNOWN_EXTENSIONS,
99
ExtractorError,
10+
clean_html,
1011
extract_attributes,
1112
float_or_none,
1213
int_or_none,
@@ -19,7 +20,7 @@
1920
url_or_none,
2021
urljoin,
2122
)
22-
from ..utils.traversal import find_element, traverse_obj
23+
from ..utils.traversal import find_element, find_elements, traverse_obj
2324

2425

2526
class BandcampIE(InfoExtractor):
@@ -70,6 +71,9 @@ class BandcampIE(InfoExtractor):
7071
'album': 'FTL: Advanced Edition Soundtrack',
7172
'uploader_url': 'https://benprunty.bandcamp.com',
7273
'uploader_id': 'benprunty',
74+
'tags': ['soundtrack', 'chiptunes', 'cinematic', 'electronic', 'video game music', 'California'],
75+
'artists': ['Ben Prunty'],
76+
'album_artists': ['Ben Prunty'],
7377
},
7478
}, {
7579
# no free download, mp3 128
@@ -94,6 +98,9 @@ class BandcampIE(InfoExtractor):
9498
'album': 'Call of the Mastodon',
9599
'uploader_url': 'https://relapsealumni.bandcamp.com',
96100
'uploader_id': 'relapsealumni',
101+
'tags': ['Philadelphia'],
102+
'artists': ['Mastodon'],
103+
'album_artists': ['Mastodon'],
97104
},
98105
}, {
99106
# track from compilation album (artist/album_artist difference)
@@ -118,6 +125,9 @@ class BandcampIE(InfoExtractor):
118125
'album': 'DSK F/W 2016-2017 Free Compilation',
119126
'uploader_url': 'https://diskotopia.bandcamp.com',
120127
'uploader_id': 'diskotopia',
128+
'tags': ['Japan'],
129+
'artists': ['submerse'],
130+
'album_artists': ['Diskotopia'],
121131
},
122132
}]
123133

@@ -252,6 +262,7 @@ def _real_extract(self, url):
252262
'album': embed.get('album_title'),
253263
'album_artist': album_artist,
254264
'formats': formats,
265+
'tags': traverse_obj(webpage, ({find_elements(cls='tag')}, ..., {clean_html})),
255266
}
256267

257268

yt_dlp/extractor/common.py

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
TransportError,
3939
network_exceptions,
4040
)
41-
from ..networking.impersonate import ImpersonateTarget
4241
from ..utils import (
4342
IDENTITY,
4443
JSON_LD_RE,
@@ -259,6 +258,11 @@ class InfoExtractor:
259258
* key The key (as hex) used to decrypt fragments.
260259
If `key` is given, any key URI will be ignored
261260
* iv The IV (as hex) used to decrypt fragments
261+
* impersonate Impersonate target(s). Can be any of the following entities:
262+
* an instance of yt_dlp.networking.impersonate.ImpersonateTarget
263+
* a string in the format of CLIENT[:OS]
264+
* a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances
265+
* a boolean value; True means any impersonate target is sufficient
262266
* downloader_options A dictionary of downloader options
263267
(For internal use only)
264268
* http_chunk_size Chunk size for HTTP downloads
@@ -336,6 +340,7 @@ class InfoExtractor:
336340
* "name": Name or description of the subtitles
337341
* "http_headers": A dictionary of additional HTTP headers
338342
to add to the request.
343+
* "impersonate": Impersonate target(s); same as the "formats" field
339344
"ext" will be calculated from URL if missing
340345
automatic_captions: Like 'subtitles'; contains automatically generated
341346
captions instead of normal subtitles
@@ -884,26 +889,17 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
884889

885890
extensions = {}
886891

887-
if impersonate in (True, ''):
888-
impersonate = ImpersonateTarget()
889-
requested_targets = [
890-
t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
891-
for t in variadic(impersonate)
892-
] if impersonate else []
893-
894-
available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None)
892+
available_target, requested_targets = self._downloader._parse_impersonate_targets(impersonate)
895893
if available_target:
896894
extensions['impersonate'] = available_target
897895
elif requested_targets:
898-
message = 'The extractor is attempting impersonation, but '
899-
message += (
900-
'no impersonate target is available' if not str(impersonate)
901-
else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"')
902-
info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation '
903-
'for information on installing the required dependencies')
896+
msg = 'The extractor is attempting impersonation'
904897
if require_impersonation:
905-
raise ExtractorError(f'{message}; {info_msg}', expected=True)
906-
self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True)
898+
raise ExtractorError(
899+
self._downloader._unavailable_targets_message(requested_targets, note=msg, is_error=True),
900+
expected=True)
901+
self.report_warning(
902+
self._downloader._unavailable_targets_message(requested_targets, note=msg), only_once=True)
907903

908904
try:
909905
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions))

yt_dlp/extractor/mlb.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -457,12 +457,9 @@ def _extract_formats_and_subtitles(self, broadcast, video_id):
457457
self.report_warning(f'No formats available for {format_id} broadcast; skipping')
458458
return [], {}
459459

460-
cdn_headers = {'x-cdn-token': token}
461460
fmts, subs = self._extract_m3u8_formats_and_subtitles(
462-
m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4',
463-
m3u8_id=format_id, fatal=False, headers=cdn_headers)
461+
m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
464462
for fmt in fmts:
465-
fmt['http_headers'] = cdn_headers
466463
fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' '))
467464
fmt.setdefault('language', language)
468465
if fmt.get('vcodec') == 'none' and fmt['language'] == 'en':

yt_dlp/extractor/tenplay.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77

88
class TenPlayIE(InfoExtractor):
99
IE_NAME = '10play'
10-
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/?#]+/)+(?P<id>tpv\d{6}[a-z]{5})'
10+
_VALID_URL = r'https?://(?:www\.)?10(?:play)?\.com\.au/(?:[^/?#]+/)+(?P<id>tpv\d{6}[a-z]{5})'
1111
_NETRC_MACHINE = '10play'
1212
_TESTS = [{
1313
# Geo-restricted to Australia
14-
'url': 'https://10play.com.au/australian-survivor/web-extras/season-10-brains-v-brawn-ii/myless-journey/tpv250414jdmtf',
14+
'url': 'https://10.com.au/australian-survivor/web-extras/season-10-brains-v-brawn-ii/myless-journey/tpv250414jdmtf',
1515
'info_dict': {
1616
'id': '7440980000013868',
1717
'ext': 'mp4',
@@ -32,7 +32,7 @@ class TenPlayIE(InfoExtractor):
3232
'params': {'skip_download': 'm3u8'},
3333
}, {
3434
# Geo-restricted to Australia
35-
'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp',
35+
'url': 'https://10.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp',
3636
'info_dict': {
3737
'id': '9000000000091177',
3838
'ext': 'mp4',
@@ -55,7 +55,7 @@ class TenPlayIE(InfoExtractor):
5555
'params': {'skip_download': 'm3u8'},
5656
}, {
5757
# Geo-restricted to Australia; upgrading the m3u8 quality fails and we need the fallback
58-
'url': 'https://10play.com.au/tiny-chef-show/episodes/season-1/episode-2/tpv240228pofvt',
58+
'url': 'https://10.com.au/tiny-chef-show/episodes/season-1/episode-2/tpv240228pofvt',
5959
'info_dict': {
6060
'id': '9000000000084116',
6161
'ext': 'mp4',
@@ -77,6 +77,7 @@ class TenPlayIE(InfoExtractor):
7777
},
7878
'params': {'skip_download': 'm3u8'},
7979
'expected_warnings': ['Failed to download m3u8 information: HTTP Error 502'],
80+
'skip': 'video unavailable',
8081
}, {
8182
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
8283
'only_matching': True,
@@ -96,7 +97,7 @@ class TenPlayIE(InfoExtractor):
9697
def _real_extract(self, url):
9798
content_id = self._match_id(url)
9899
data = self._download_json(
99-
'https://10play.com.au/api/v1/videos/' + content_id, content_id)
100+
'https://10.com.au/api/v1/videos/' + content_id, content_id)
100101

101102
video_data = self._download_json(
102103
f'https://vod.ten.com.au/api/videos/bcquery?command=find_videos_by_id&video_id={data["altId"]}',
@@ -137,21 +138,24 @@ def _real_extract(self, url):
137138

138139
class TenPlaySeasonIE(InfoExtractor):
139140
IE_NAME = '10play:season'
140-
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?P<show>[^/?#]+)/episodes/(?P<season>[^/?#]+)/?(?:$|[?#])'
141+
_VALID_URL = r'https?://(?:www\.)?10(?:play)?\.com\.au/(?P<show>[^/?#]+)/episodes/(?P<season>[^/?#]+)/?(?:$|[?#])'
141142
_TESTS = [{
142-
'url': 'https://10play.com.au/masterchef/episodes/season-15',
143+
'url': 'https://10.com.au/masterchef/episodes/season-15',
143144
'info_dict': {
144145
'title': 'Season 15',
145146
'id': 'MTQ2NjMxOQ==',
146147
},
147148
'playlist_mincount': 50,
148149
}, {
149-
'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024',
150+
'url': 'https://10.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024',
150151
'info_dict': {
151152
'title': 'Season 2024',
152153
'id': 'Mjc0OTIw',
153154
},
154155
'playlist_mincount': 159,
156+
}, {
157+
'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024',
158+
'only_matching': True,
155159
}]
156160

157161
def _entries(self, load_more_url, display_id=None):
@@ -172,7 +176,7 @@ def _entries(self, load_more_url, display_id=None):
172176
def _real_extract(self, url):
173177
show, season = self._match_valid_url(url).group('show', 'season')
174178
season_info = self._download_json(
175-
f'https://10play.com.au/api/shows/{show}/episodes/{season}', f'{show}/{season}')
179+
f'https://10.com.au/api/shows/{show}/episodes/{season}', f'{show}/{season}')
176180

177181
episodes_carousel = traverse_obj(season_info, (
178182
'content', 0, 'components', (

yt_dlp/extractor/youtube/_video.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4056,6 +4056,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
40564056
'ext': fmt,
40574057
'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
40584058
'name': sub_name,
4059+
'impersonate': True,
40594060
STREAMING_DATA_CLIENT_NAME: client_name,
40604061
})
40614062

yt_dlp/networking/_requests.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ def _create_instance(self, cookiejar, legacy_ssl_support=None):
313313
max_retries=urllib3.util.retry.Retry(False),
314314
)
315315
session.adapters.clear()
316-
session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'})
316+
session.headers = requests.models.CaseInsensitiveDict()
317317
session.mount('https://', http_adapter)
318318
session.mount('http://', http_adapter)
319319
session.cookies = cookiejar
@@ -322,6 +322,7 @@ def _create_instance(self, cookiejar, legacy_ssl_support=None):
322322

323323
def _prepare_headers(self, _, headers):
324324
add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
325+
headers.setdefault('Connection', 'keep-alive')
325326

326327
def _send(self, request):
327328

0 commit comments

Comments
 (0)