mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[instagram] Add support for user profiles (Fixes #2606)
This commit is contained in:
		| @@ -110,3 +110,21 @@ def expect_info_dict(self, expected_dict, got_dict): | |||||||
|             self.assertEqual(expected, got, |             self.assertEqual(expected, got, | ||||||
|                 u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) |                 u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) | ||||||
|  |  | ||||||
|  |     # Check for the presence of mandatory fields | ||||||
|  |     for key in ('id', 'url', 'title', 'ext'): | ||||||
|  |         self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) | ||||||
|  |     # Check for mandatory fields that are automatically set by YoutubeDL | ||||||
|  |     for key in ['webpage_url', 'extractor', 'extractor_key']: | ||||||
|  |         self.assertTrue(got_dict.get(key), u'Missing field: %s' % key) | ||||||
|  |  | ||||||
|  |     # Are checkable fields missing from the test case definition? | ||||||
|  |     test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) | ||||||
|  |         for key, value in got_dict.items() | ||||||
|  |         if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) | ||||||
|  |     missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) | ||||||
|  |     if missing_keys: | ||||||
|  |         sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') | ||||||
|  |         self.assertFalse( | ||||||
|  |             missing_keys, | ||||||
|  |             'Missing keys in test definition: %s' % ( | ||||||
|  |                 ', '.join(sorted(missing_keys)))) | ||||||
|   | |||||||
| @@ -137,25 +137,6 @@ def generator(test_case): | |||||||
|                     info_dict = json.load(infof) |                     info_dict = json.load(infof) | ||||||
|  |  | ||||||
|                 expect_info_dict(self, tc.get('info_dict', {}), info_dict) |                 expect_info_dict(self, tc.get('info_dict', {}), info_dict) | ||||||
|  |  | ||||||
|                 # Check for the presence of mandatory fields |  | ||||||
|                 for key in ('id', 'url', 'title', 'ext'): |  | ||||||
|                     self.assertTrue(key in info_dict.keys() and info_dict[key]) |  | ||||||
|                 # Check for mandatory fields that are automatically set by YoutubeDL |  | ||||||
|                 for key in ['webpage_url', 'extractor', 'extractor_key']: |  | ||||||
|                     self.assertTrue(info_dict.get(key), u'Missing field: %s' % key) |  | ||||||
|  |  | ||||||
|                 # Are checkable fields missing from the test case definition? |  | ||||||
|                 test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) |  | ||||||
|                     for key, value in info_dict.items() |  | ||||||
|                     if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) |  | ||||||
|                 missing_keys = set(test_info_dict.keys()) - set(tc.get('info_dict', {}).keys()) |  | ||||||
|                 if missing_keys: |  | ||||||
|                     sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') |  | ||||||
|                     self.assertFalse( |  | ||||||
|                         missing_keys, |  | ||||||
|                         'Missing keys in test definition: %s' % ( |  | ||||||
|                             ','.join(sorted(missing_keys)))) |  | ||||||
|         finally: |         finally: | ||||||
|             try_rm_tcs_files() |             try_rm_tcs_files() | ||||||
|  |  | ||||||
|   | |||||||
| @@ -9,8 +9,10 @@ import sys | |||||||
| import unittest | import unittest | ||||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||||
|  |  | ||||||
| from test.helper import FakeYDL | from test.helper import ( | ||||||
|  |     expect_info_dict, | ||||||
|  |     FakeYDL, | ||||||
|  | ) | ||||||
|  |  | ||||||
| from youtube_dl.extractor import ( | from youtube_dl.extractor import ( | ||||||
|     AcademicEarthCourseIE, |     AcademicEarthCourseIE, | ||||||
| @@ -39,6 +41,7 @@ from youtube_dl.extractor import ( | |||||||
|     TEDIE, |     TEDIE, | ||||||
|     ToypicsUserIE, |     ToypicsUserIE, | ||||||
|     XTubeUserIE, |     XTubeUserIE, | ||||||
|  |     InstagramUserIE, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -287,5 +290,28 @@ class TestPlaylists(unittest.TestCase): | |||||||
|         self.assertEqual(result['id'], 'greenshowers') |         self.assertEqual(result['id'], 'greenshowers') | ||||||
|         self.assertTrue(len(result['entries']) >= 155) |         self.assertTrue(len(result['entries']) >= 155) | ||||||
|  |  | ||||||
|  |     def test_InstagramUser(self): | ||||||
|  |         dl = FakeYDL() | ||||||
|  |         ie = InstagramUserIE(dl) | ||||||
|  |         result = ie.extract('http://instagram.com/porsche') | ||||||
|  |         self.assertIsPlaylist(result) | ||||||
|  |         self.assertEqual(result['id'], 'porsche') | ||||||
|  |         self.assertTrue(len(result['entries']) >= 2) | ||||||
|  |         test_video = next( | ||||||
|  |             e for e in result['entries'] | ||||||
|  |             if e['id'] == '614605558512799803_462752227') | ||||||
|  |         dl.add_default_extra_info(test_video, ie, '(irrelevant URL)') | ||||||
|  |         dl.process_video_result(test_video, download=False) | ||||||
|  |         EXPECTED = { | ||||||
|  |             'id': '614605558512799803_462752227', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': '#Porsche Intelligent Performance.', | ||||||
|  |             'thumbnail': 're:^https?://.*\.jpg', | ||||||
|  |             'uploader': 'Porsche', | ||||||
|  |             'uploader_id': 'porsche', | ||||||
|  |         } | ||||||
|  |         expect_info_dict(self, EXPECTED, test_video) | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -512,13 +512,7 @@ class YoutubeDL(object): | |||||||
|                         '_type': 'compat_list', |                         '_type': 'compat_list', | ||||||
|                         'entries': ie_result, |                         'entries': ie_result, | ||||||
|                     } |                     } | ||||||
|                 self.add_extra_info(ie_result, |                 self.add_default_extra_info(ie_result, ie, url) | ||||||
|                     { |  | ||||||
|                         'extractor': ie.IE_NAME, |  | ||||||
|                         'webpage_url': url, |  | ||||||
|                         'webpage_url_basename': url_basename(url), |  | ||||||
|                         'extractor_key': ie.ie_key(), |  | ||||||
|                     }) |  | ||||||
|                 if process: |                 if process: | ||||||
|                     return self.process_ie_result(ie_result, download, extra_info) |                     return self.process_ie_result(ie_result, download, extra_info) | ||||||
|                 else: |                 else: | ||||||
| @@ -537,6 +531,14 @@ class YoutubeDL(object): | |||||||
|         else: |         else: | ||||||
|             self.report_error('no suitable InfoExtractor for URL %s' % url) |             self.report_error('no suitable InfoExtractor for URL %s' % url) | ||||||
|  |  | ||||||
|  |     def add_default_extra_info(self, ie_result, ie, url): | ||||||
|  |         self.add_extra_info(ie_result, { | ||||||
|  |             'extractor': ie.IE_NAME, | ||||||
|  |             'webpage_url': url, | ||||||
|  |             'webpage_url_basename': url_basename(url), | ||||||
|  |             'extractor_key': ie.ie_key(), | ||||||
|  |         }) | ||||||
|  |  | ||||||
|     def process_ie_result(self, ie_result, download=True, extra_info={}): |     def process_ie_result(self, ie_result, download=True, extra_info={}): | ||||||
|         """ |         """ | ||||||
|         Take the result of the ie(may be modified) and resolve all unresolved |         Take the result of the ie(may be modified) and resolve all unresolved | ||||||
|   | |||||||
| @@ -112,7 +112,7 @@ from .imdb import ( | |||||||
| ) | ) | ||||||
| from .ina import InaIE | from .ina import InaIE | ||||||
| from .infoq import InfoQIE | from .infoq import InfoQIE | ||||||
| from .instagram import InstagramIE | from .instagram import InstagramIE, InstagramUserIE | ||||||
| from .internetvideoarchive import InternetVideoArchiveIE | from .internetvideoarchive import InternetVideoArchiveIE | ||||||
| from .iprima import IPrimaIE | from .iprima import IPrimaIE | ||||||
| from .ivi import ( | from .ivi import ( | ||||||
|   | |||||||
| @@ -3,6 +3,9 @@ from __future__ import unicode_literals | |||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     int_or_none, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class InstagramIE(InfoExtractor): | class InstagramIE(InfoExtractor): | ||||||
| @@ -37,3 +40,68 @@ class InstagramIE(InfoExtractor): | |||||||
|             'uploader_id': uploader_id, |             'uploader_id': uploader_id, | ||||||
|             'description': desc, |             'description': desc, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class InstagramUserIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])' | ||||||
|  |     IE_DESC = 'Instagram user profile' | ||||||
|  |     IE_NAME = 'instagram:user' | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         uploader_id = mobj.group('username') | ||||||
|  |  | ||||||
|  |         entries = [] | ||||||
|  |         page_count = 0 | ||||||
|  |         media_url = 'http://instagram.com/%s/media' % uploader_id | ||||||
|  |         while True: | ||||||
|  |             page = self._download_json( | ||||||
|  |                 media_url, uploader_id, | ||||||
|  |                 note='Downloading page %d ' % (page_count + 1), | ||||||
|  |             ) | ||||||
|  |             page_count += 1 | ||||||
|  |  | ||||||
|  |             for it in page['items']: | ||||||
|  |                 if it.get('type') != 'video': | ||||||
|  |                     continue | ||||||
|  |                 like_count = int_or_none(it.get('likes', {}).get('count')) | ||||||
|  |                 user = it.get('user', {}) | ||||||
|  |  | ||||||
|  |                 formats = [{ | ||||||
|  |                     'format_id': k, | ||||||
|  |                     'height': v.get('height'), | ||||||
|  |                     'width': v.get('width'), | ||||||
|  |                     'url': v['url'], | ||||||
|  |                 } for k, v in it['videos'].items()] | ||||||
|  |                 self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |                 thumbnails_el = it.get('images', {}) | ||||||
|  |                 thumbnail = thumbnails_el.get('thumbnail', {}).get('url') | ||||||
|  |  | ||||||
|  |                 title = it.get('caption', {}).get('text', it['id']) | ||||||
|  |  | ||||||
|  |                 entries.append({ | ||||||
|  |                     'id': it['id'], | ||||||
|  |                     'title': title, | ||||||
|  |                     'formats': formats, | ||||||
|  |                     'thumbnail': thumbnail, | ||||||
|  |                     'webpage_url': it.get('link'), | ||||||
|  |                     'uploader': user.get('full_name'), | ||||||
|  |                     'uploader_id': user.get('username'), | ||||||
|  |                     'like_count': like_count, | ||||||
|  |                     'upload_timestamp': int_or_none(it.get('created_time')), | ||||||
|  |                 }) | ||||||
|  |  | ||||||
|  |             if not page['items']: | ||||||
|  |                 break | ||||||
|  |             max_id = page['items'][-1]['id'] | ||||||
|  |             media_url = ( | ||||||
|  |                 'http://instagram.com/%s/media?max_id=%s' % ( | ||||||
|  |                     uploader_id, max_id)) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'entries': entries, | ||||||
|  |             'id': uploader_id, | ||||||
|  |             'title': uploader_id, | ||||||
|  |         } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister