1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-11-17 15:05:13 +00:00

Add option --replace-in-metadata

This commit is contained in:
pukkandan
2021-08-10 01:22:55 +05:30
parent a38bd1defa
commit e9f4ccd19e
8 changed files with 186 additions and 106 deletions

View File

@@ -20,8 +20,11 @@ from .ffmpeg import (
)
from .xattrpp import XAttrMetadataPP
from .execafterdownload import ExecAfterDownloadPP
from .metadatafromfield import MetadataFromFieldPP
from .metadatafromfield import MetadataFromTitlePP
from .metadataparser import (
MetadataFromFieldPP,
MetadataFromTitlePP,
MetadataParserPP,
)
from .movefilesafterdownload import MoveFilesAfterDownloadPP
from .sponskrub import SponSkrubPP
@@ -48,6 +51,7 @@ __all__ = [
'FFmpegThumbnailsConvertorPP',
'FFmpegVideoConvertorPP',
'FFmpegVideoRemuxerPP',
'MetadataParserPP',
'MetadataFromFieldPP',
'MetadataFromTitlePP',
'MoveFilesAfterDownloadPP',

View File

@@ -1,74 +0,0 @@
from __future__ import unicode_literals
import re
from .common import PostProcessor
from ..compat import compat_str
class MetadataFromFieldPP(PostProcessor):
regex = r'(?P<in>.*?)(?<!\\):(?P<out>.+)$'
def __init__(self, downloader, formats):
PostProcessor.__init__(self, downloader)
assert isinstance(formats, (list, tuple))
self._data = []
for f in formats:
assert isinstance(f, compat_str)
match = re.match(self.regex, f)
assert match is not None
inp = match.group('in').replace('\\:', ':')
self._data.append({
'in': inp,
'out': match.group('out'),
'tmpl': self.field_to_template(inp),
'regex': self.format_to_regex(match.group('out')),
})
@staticmethod
def field_to_template(tmpl):
if re.match(r'[a-zA-Z_]+$', tmpl):
return '%%(%s)s' % tmpl
return tmpl
@staticmethod
def format_to_regex(fmt):
r"""
Converts a string like
'%(title)s - %(artist)s'
to a regex like
'(?P<title>.+)\ \-\ (?P<artist>.+)'
"""
if not re.search(r'%\(\w+\)s', fmt):
return fmt
lastpos = 0
regex = ''
# replace %(..)s with regex group and escape other string parts
for match in re.finditer(r'%\((\w+)\)s', fmt):
regex += re.escape(fmt[lastpos:match.start()])
regex += r'(?P<%s>.+)' % match.group(1)
lastpos = match.end()
if lastpos < len(fmt):
regex += re.escape(fmt[lastpos:])
return regex
def run(self, info):
for dictn in self._data:
tmpl, tmpl_dict = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
data_to_parse = self._downloader.escape_outtmpl(tmpl) % tmpl_dict
self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], dictn['tmpl']))
match = re.search(dictn['regex'], data_to_parse)
if match is None:
self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out']))
continue
for attribute, value in match.groupdict().items():
info[attribute] = value
self.to_screen('parsed %s from "%s": %s' % (attribute, dictn['tmpl'], value if value is not None else 'NA'))
return [], info
class MetadataFromTitlePP(MetadataFromFieldPP): # for backward compatibility
def __init__(self, downloader, titleformat):
super(MetadataFromTitlePP, self).__init__(downloader, ['%%(title)s:%s' % titleformat])
self._titleformat = titleformat
self._titleregex = self._data[0]['regex']

View File

@@ -0,0 +1,117 @@
import re
from enum import Enum
from .common import PostProcessor
class MetadataParserPP(PostProcessor):
class Actions(Enum):
INTERPRET = 'interpretter'
REPLACE = 'replacer'
def __init__(self, downloader, actions):
PostProcessor.__init__(self, downloader)
self._actions = []
for f in actions:
action = f[0]
assert isinstance(action, self.Actions)
self._actions.append(getattr(self, action._value_)(*f[1:]))
@classmethod
def validate_action(cls, action, *data):
''' Each action can be:
(Actions.INTERPRET, from, to) OR
(Actions.REPLACE, field, search, replace)
'''
if not isinstance(action, cls.Actions):
raise ValueError(f'{action!r} is not a valid action')
getattr(cls, action._value_)(cls, *data)
@staticmethod
def field_to_template(tmpl):
if re.match(r'[a-zA-Z_]+$', tmpl):
return f'%({tmpl})s'
return tmpl
@staticmethod
def format_to_regex(fmt):
r"""
Converts a string like
'%(title)s - %(artist)s'
to a regex like
'(?P<title>.+)\ \-\ (?P<artist>.+)'
"""
if not re.search(r'%\(\w+\)s', fmt):
return fmt
lastpos = 0
regex = ''
# replace %(..)s with regex group and escape other string parts
for match in re.finditer(r'%\((\w+)\)s', fmt):
regex += re.escape(fmt[lastpos:match.start()])
regex += rf'(?P<{match.group(1)}>.+)'
lastpos = match.end()
if lastpos < len(fmt):
regex += re.escape(fmt[lastpos:])
return regex
def run(self, info):
for f in self._actions:
f(info)
return [], info
def interpretter(self, inp, out):
def f(info):
outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(template, info)
data_to_parse = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict
self.write_debug(f'Searching for r{out_re.pattern!r} in {template!r}')
match = out_re.search(data_to_parse)
if match is None:
self.report_warning('Could not interpret {inp!r} as {out!r}')
return
for attribute, value in match.groupdict().items():
info[attribute] = value
self.to_screen('Parsed %s from %r: %r' % (attribute, template, value if value is not None else 'NA'))
template = self.field_to_template(inp)
out_re = re.compile(self.format_to_regex(out))
return f
def replacer(self, field, search, replace):
def f(info):
val = info.get(field)
if val is None:
self.report_warning(f'Video does not have a {field}')
return
elif not isinstance(val, str):
self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}')
return
self.write_debug(f'Replacing all r{search!r} in {field} with {replace!r}')
info[field], n = search_re.subn(replace, val)
if n:
self.to_screen(f'Changed {field} to: {info[field]}')
else:
self.to_screen(f'Did not find r{search!r} in {field}')
search_re = re.compile(search)
return f
class MetadataFromFieldPP(MetadataParserPP):
@classmethod
def to_action(cls, f):
match = re.match(r'(?P<in>.*?)(?<!\\):(?P<out>.+)$', f)
if match is None:
raise ValueError(f'it should be FROM:TO, not {f!r}')
return (
cls.Actions.INTERPRET,
match.group('in').replace('\\:', ':'),
match.group('out'))
def __init__(self, downloader, formats):
MetadataParserPP.__init__(self, downloader, [self.to_action(f) for f in formats])
class MetadataFromTitlePP(MetadataParserPP): # for backward compatibility
def __init__(self, downloader, titleformat):
MetadataParserPP.__init__(self, downloader, [(self.Actions.INTERPRET, 'title', titleformat)])