|
2 | 2 | import urllib.parse |
3 | 3 |
|
4 | 4 | from .common import InfoExtractor |
5 | | -from ..utils import js_to_json, str_or_none, traverse_obj |
| 5 | +from ..networking import HEADRequest |
| 6 | +from ..utils import ( |
| 7 | + determine_ext, |
| 8 | + js_to_json, |
| 9 | + str_or_none, |
| 10 | +) |
| 11 | +from ..utils.traversal import traverse_obj |
6 | 12 |
|
7 | 13 |
|
8 | 14 | class SubstackIE(InfoExtractor): |
@@ -43,6 +49,19 @@ class SubstackIE(InfoExtractor): |
43 | 49 | 'uploader': "Andrew Zimmern's Spilled Milk ", |
44 | 50 | 'uploader_id': '577659', |
45 | 51 | }, |
| 52 | + }, { |
| 53 | + # Podcast that needs its file extension resolved to mp3 |
| 54 | + 'url': 'https://persuasion1.substack.com/p/summers', |
| 55 | + 'md5': '1456a755d46084744facdfac9edf900f', |
| 56 | + 'info_dict': { |
| 57 | + 'id': '141970405', |
| 58 | + 'ext': 'mp3', |
| 59 | + 'title': 'Larry Summers on What Went Wrong on Campus', |
| 60 | + 'description': 'Yascha Mounk and Larry Summers also discuss the promise and perils of artificial intelligence.', |
| 61 | + 'thumbnail': r're:https://substackcdn\.com/image/.+\.jpeg', |
| 62 | + 'uploader': 'Persuasion', |
| 63 | + 'uploader_id': '61579', |
| 64 | + }, |
46 | 65 | }] |
47 | 66 |
|
48 | 67 | @classmethod |
@@ -89,7 +108,15 @@ def _real_extract(self, url): |
89 | 108 | post_type = webpage_info['post']['type'] |
90 | 109 | formats, subtitles = [], {} |
91 | 110 | if post_type == 'podcast': |
92 | | - formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {} |
| 111 | + fmt = {'url': webpage_info['post']['podcast_url']} |
| 112 | + if not determine_ext(fmt['url'], default_ext=None): |
| 113 | + # The redirected format URL expires but the original URL doesn't, |
| 114 | + # so we only want to extract the extension from this request |
| 115 | + fmt['ext'] = determine_ext(self._request_webpage( |
| 116 | + HEADRequest(fmt['url']), display_id, |
| 117 | + 'Resolving podcast file extension', |
| 118 | + 'Podcast URL is invalid').url) |
| 119 | + formats.append(fmt) |
93 | 120 | elif post_type == 'video': |
94 | 121 | formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url) |
95 | 122 | else: |
|
0 commit comments