mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[utils] clean_podcast_url: Handle more trackers (#7556)
				
					
				
			Authored by: mabdelfattah, bashonly Closes #7544
This commit is contained in:
		 Mahmoud Abdel-Fattah
					Mahmoud Abdel-Fattah
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							325191d0c9
						
					
				
				
					commit
					2af4eeb772
				
			| @@ -1835,6 +1835,8 @@ Line 1 | |||||||
|     def test_clean_podcast_url(self): |     def test_clean_podcast_url(self): | ||||||
|         self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') |         self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') | ||||||
|         self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') |         self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') | ||||||
|  |         self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661') | ||||||
|  |         self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3') | ||||||
| 
 | 
 | ||||||
|     def test_LazyList(self): |     def test_LazyList(self): | ||||||
|         it = list(range(10)) |         it = list(range(10)) | ||||||
|   | |||||||
| @@ -5123,14 +5123,18 @@ def clean_podcast_url(url): | |||||||
|             (?: |             (?: | ||||||
|                 chtbl\.com/track| |                 chtbl\.com/track| | ||||||
|                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/ |                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/ | ||||||
|                 play\.podtrac\.com |                 play\.podtrac\.com| | ||||||
|             )/[^/]+| |                 chrt\.fm/track| | ||||||
|  |                 mgln\.ai/e | ||||||
|  |             )(?:/[^/.]+)?| | ||||||
|             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure |             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure | ||||||
|             flex\.acast\.com| |             flex\.acast\.com| | ||||||
|             pd(?: |             pd(?: | ||||||
|                 cn\.co| # https://podcorn.com/analytics-prefix/ |                 cn\.co| # https://podcorn.com/analytics-prefix/ | ||||||
|                 st\.fm # https://podsights.com/docs/ |                 st\.fm # https://podsights.com/docs/ | ||||||
|             )/e |             )/e| | ||||||
|  |             [0-9]\.gum\.fm| | ||||||
|  |             pscrb\.fm/rss/p | ||||||
|         )/''', '', url) |         )/''', '', url) | ||||||
|     return re.sub(r'^\w+://(\w+://)', r'\1', url) |     return re.sub(r'^\w+://(\w+://)', r'\1', url) | ||||||
| 
 | 
 | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user