#!/usr/bin/env python
#
# Copyright (C) 2009-2012 W. Trevor King <wking@drexel.edu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program.  If not, see
# <http://www.gnu.org/licenses/>.

"""Mirror a tree of audio files in another format.

Conversion between any of the following formats are supported:

* flac
* m4a (decoding only)
* mp3
* ogg (Vorbis)
* wav

External packages required for full functionality:

* lame_ (`lame`)
* faad_ (`faad`)
* flac_ (`flac`)
* mpg123_ (`mpg123`)
* vorbis_ (`ogg123`, `oggenc`)
* mutagen_ (metadata conversion)

.. _lame: http://lame.sourceforge.net/
.. _faad: http://www.audiocoding.com/faad2.html
.. _flac: http://flac.sourceforge.net/
.. _mpg123: http://www.mpg123.org/
.. _vorbis: http://www.vorbis.com/
.. _mutagen: http://code.google.com/p/mutagen/
"""

from hashlib import sha256 as _hash
import os as _os
import re as _re
import shutil as _shutil
import subprocess as _subprocess
import tempfile as _tempfile

try:
    import mutagen as _mutagen
    import mutagen.flac as _mutagen_flag
    import mutagen.id3 as _mutagen_id3
    import mutagen.m4a as _mutagen_m4a
    import mutagen.mp3 as _mutagen_mp3
    import mutagen.oggvorbis as _mutagen_oggvorbis
except ImportError as _mutagen_import_error:
    _mutagen = None


__version__ = '0.4'


def invoke(args, stdin=None, expect=(0,)):
    print('  {}'.format(args))
    p = _subprocess.Popen(
        args, stdin=_subprocess.PIPE, stdout=_subprocess.PIPE,
        stderr=_subprocess.PIPE)
    stdout,stderr = p.communicate(stdin)
    status = p.wait()
    assert status in expect, 'invalid status {} from {}'.format(status, args)
    return (status, stdout, stderr)


class Converter (object):
    """Recode audio files from `source_dir` to `target_dir`.

    `target_extension` sets the target encoding.

    Notes
    -----

    The `get_` and `set_*_metadata` methods should pass metadata as a
    `dict` with key/value pairs standardised to match the list of
    Vorbis comment suggestions_ with lowercase keys.  The `date` field
    should be formatted `YYYY[-MM[-DD]]`.  The dict values should be
    lists to support repeated entries for a given tag.

    .. _suggestions: http://www.xiph.org/vorbis/doc/v-comment.html
    """
    id3_to_vorbis_keys = {
        'comm': 'comment',
        'talb': 'album',
        'tcom': 'composer',
        'tcop': 'copyright',
        'tit2': 'title',
        'tpe1': 'artist',
        'tpe2': 'accompaniment',
        'tpe3': 'conductor',
        'tpos': 'part of set',
        'tpub': 'organization',  # publisher
        'trck': 'tracknumber',
        'tyer': 'date',
        }

    m4a_to_vorbis_keys = {
        '\xa9cmt': 'comment',
        '\xa9alb': 'album',
        #'tcom': 'composer',
        'cprt': 'copyright',
        '\xa9nam': 'title',
        '\xa9ART': 'artist',
        #'tpe2': 'accompaniment',
        #'tpe3': 'conductor',
        'disk': 'part of set',
        #'tpub': 'organization',  # publisher
        'trkn': 'tracknumber',
        '\xa9day': 'date',
        }

    def __init__(self, source_dir, target_dir, target_extension='ogg',
                 cache_file=None, hash=True, ignore_function=None):
        self.source_dir = source_dir
        self.target_dir = target_dir
        self._source_extensions = ['flac', 'm4a', 'mp3', 'ogg', 'wav']
        self._target_extension = target_extension
        self._cache_file = cache_file
        self._cache = self._read_cache()
        self._hash = hash
        self._ignore_function = ignore_function
        f,self._tempfile = _tempfile.mkstemp(prefix='mkogg-')

    def cleanup(self):
        _os.remove(self._tempfile)
        self._save_cache()

    def _read_cache(self):
        cache = {}
        if self._cache_file == None:
            return cache
        try:
            with open(self._cache_file, 'r') as f:
                line = f.readline()
                assert line.startswith('# mkogg cache version:'), line
                version = line.split(':', 1)[-1].strip()
                if version != __version__:
                    print('cache version mismatch: {} != {}'.format(
                            version, __version__))
                    return cache  # old cache, ignore contents
                for line in f:
                    try:
                        key,value = [x.strip() for x in line.split(' -> ')]
                    except ValueError:
                        pass
                    cache[key] = value
        except IOError:
            pass
        return cache

    def _save_cache(self):
        if self._cache_file == None:
            return
        with open(self._cache_file, 'w') as f:
            f.write('# mkogg cache version: {}\n'.format(__version__))
            for key,value in self._cache.iteritems():
                f.write('{} -> {}\n'.format(key, value))

    def run(self):
        self._makedirs(self.target_dir)
        for dirpath,dirnames,filenames in _os.walk(self.source_dir):
            for filename in filenames:
                root,ext = _os.path.splitext(filename)
                ext = ext.lower()
                if ext.startswith('.'):
                    ext = ext[1:]
                if ext not in self._source_extensions:
                    print('skip', filename, ext)
                    continue
                source_path = _os.path.join(dirpath, filename)
                if (self._ignore_function is not None and
                    self._ignore_function(source_path)):
                    continue
                rel_path = _os.path.relpath(dirpath, self.source_dir)
                target_path = _os.path.join(
                    self.target_dir, rel_path,
                    '{}.{}'.format(root, self._target_extension))
                target_dir = _os.path.dirname(target_path)
                self._makedirs(target_dir)
                self._convert(source_path, target_path, ext)

    def _makedirs(self, target_dir):
        if not _os.path.exists(target_dir):
            _os.makedirs(target_dir)

    def _convert(self, source, target, ext):
        if self._hash:
            cache_key = self._cache_key(source)
            old_cache_value = self._cache.get(cache_key, None)
            if (old_cache_value != None and
                old_cache_value == self._cache_value(target)):
                print('already cached {} to {}'.format(source, target))
                return
        elif _os.path.exists(target):
            print('target {} already exists'.format(target))
            return
        print('convert {} to {}'.format(source, target))
        if ext == self._target_extension:
            _shutil.copy(source, target)
            return
        try:
            convert = getattr(self, 'convert_{}_to_{}'.format(
                    ext, self._target_extension))
        except AttributeError:
            to_wav = getattr(self, 'convert_{}_to_wav'.format(ext))
            from_wav = getattr(self, 'convert_wav_to_{}'.format(
                    self._target_extension))
            def convert(source, target):
                to_wav(source, self._tempfile)
                from_wav(self._tempfile, target)
        convert(source, target)
        if not getattr(convert, 'handles_metadata', False):
            get_metadata = getattr(self, 'get_{}_metadata'.format(ext))
            metadata = get_metadata(source)
            set_metadata = getattr(self, 'set_{}_metadata'.format(
                    self._target_extension))
            set_metadata(target, metadata)
        if not self._hash:
            cache_key = self._cache_key(source)
        self._cache[cache_key] = self._cache_value(target)

    def _cache_key(self, source):
        return repr((self._file_hash(source), self._target_extension))

    def _cache_value(self, target):
        return self._file_hash(target)

    def _file_hash(self, filename):
        """
        Examples
        --------
        >>> c = Converter(None, None)
        >>> h = c._file_hash(__file__)
        >>> len(h)
        64
        >>> c._file_hash('/highly/unlikely/to/exist') == None
        True
        >>> c.cleanup()
        """
        h = _hash()
        chunk_size = 2**20  # 1 Mb
        try:
            with open(filename, 'rb') as f:
                chunk = ' '
                while len(chunk) > 0:
                    chunk = f.read(chunk_size)
                    h.update(chunk)
        except IOError:
            return None
        return str(h.hexdigest())

    def _set_vorbis_comments(self, container, metadata):
        container.delete()
        if type(metadata) == dict:
            items = sorted(metadata.items())
        else:
            items = metadata.items()
        for key,value in items:
            # leave key case alone, because Mutagen downcases Vorbis
            # keys internally.
            container[key] = value

    def _parse_date(self, date):
        """Parse `date` (`YYYY[-MM[-DD]]`), returning `(year, month, day)`.

        Examples
        --------
        >>> c = Converter(None, None)
        >>> c._parse_date('2010')
        ['2010', None, None]
        >>> c._parse_date('2010-11')
        ['2010', '11', None]
        >>> c._parse_date('2010-11-16')
        ['2010', '11', '16']
        >>> c.cleanup()
        """
        fields = date.split('-')
        assert len(fields) > 0 and len(fields) <= 3, date
        fields = fields + [None] * (3 - len(fields))
        return fields

    def _construct_id3_trck(self, metadata):
        if 'tracknumber' not in metadata:
            return (None, None)
        if 'tracktotal' in metadata:
            value = []
            for i,v in enumerate(metadata['tracknumber']):
                value.append('{}/{}'.format(
                        v, metadata['tracktotal'][i]))
        else:
            value = metadata['tracknumber']
        key = 'tracknumber'
        return (key, value)

    def _guess_id3_encoding(self, text_list):
        for id3_encoding,encoding in [(0, 'ISO-8859-1'), (3, 'utf-8')]:
            encoding_success = True
            for text in text_list:
                if isinstance(text, unicode):
                    try:
                        text.encode(encoding)
                    except UnicodeEncodeError:
                        encoding_success = False
                        break
            if encoding_success:
                return id3_encoding
        raise ValueError(text_list)

    def convert_flac_to_wav(self, source, target):
        invoke(['ogg123', '-d', 'wav', '-f', target, source])

    def convert_flac_to_ogg(self, source, target):
        invoke(['oggenc', '--quiet', '-q', '3', source, '-o', target])
    convert_flac_to_ogg.handles_metadata = True

    def convert_m4a_to_wav(self, source, target):
        invoke(['faad', '-o', target, source])

    def convert_mp3_to_wav(self, source, target):
        invoke(['mpg123',  '-w', target, source])

    def convert_ogg_to_wav(self, source, target):
        self.convert_flac_to_wav(source, target)

    def convert_wav_to_flac(self, source, target):
        invoke(['flac', '--force', '--output-name', target, source])

    def convert_wav_to_mp3(self, source, target):
        invoke(['lame', '--quiet', '-V', '4', source, target])

    def convert_wav_to_ogg(self, source, target):
        self.convert_flac_to_ogg(source, target)

    def get_flac_metadata(self, source):
        if _mutagen is None:
            raise _mutagen_import_error
        return _mutagen_flac.FLAC(source)

    def get_m4a_metadata(self, source):
        if _mutagen is None:
            raise _mutagen_import_error
        m4a = _mutagen_m4a.M4A(source)
        metadata = {}
        for key,value in m4a.items():
            try:
                vorbis_key = self.m4a_to_vorbis_keys[key.lower()]
            except KeyError:
                continue
            if vorbis_key == 'tracknumber':
                tracknumber,tracktotal = value
                value = tracknumber
                if tracktotal:
                    metadata['tracktotal'] = [str(tracktotal)]
            elif vorbis_key == 'part of set':
                disknumber,disktotal = value
                value = disknumber
                if disktotal:
                    metadata['set total'] = [str(disktotal)]
            metadata[vorbis_key] = [str(value)]
        return metadata

    def get_mp3_metadata(self, source):
        if _mutagen is None:
            raise _mutagen_import_error
        mp3 = _mutagen_mp3.MP3(source)
        metadata = {}
        for key,value in mp3.items():
            try:
                vorbis_key = self.id3_to_vorbis_keys[key.lower()]
            except KeyError:
                continue
            v = value.text
            if vorbis_key == 'tracknumber':
                for i,v_entry in enumerate(v):
                    if '/' in v_entry:
                        tracknumber,tracktotal = v_entry.split('/', 1)
                        v[i] = tracknumber
                        metadata['tracktotal'] = ['tracktotal']
            metadata[vorbis_key] = v
        return metadata

    def get_ogg_metadata(self, source):
        if _mutagen is None:
            raise _mutagen_import_error        
        return _mutagen_oggvorbis.OggVorbis(source)

    def get_wav_metadata(self, source):
        return {}

    def set_flac_metadata(self, target, metadata):
        if _mutagen is None:
            raise _mutagen_import_error
        flac = _mutagen_flac.FLAC(target)
        self._set_vorbis_comments(flac, metadata)
        flac.save()

    def set_mp3_metadata(self, target, metadata):
        vorbis_keys_to_id3 = dict(
            (v,k) for k,v in self.id3_to_vorbis_keys.items())
        if _mutagen is None:
            raise _mutagen_import_error
        mp3 = _mutagen_mp3.MP3(target)
        if mp3.tags is not None:
            mp3.tags.delete()
        handled_trck = False
        max_encoding = 0
        for key,value in metadata.items():
            if key == 'date':
                for i,v in enumerate(value):
                    year,month,day = self._parse_date(v)
                    value[i] = year
            elif key in ['tracknumber', 'tracktotal']:
                if handled_trck is True:
                    continue
                handled_trck = True
                key,value = self._construct_id3_trck(metadata)
                if value is None:
                    continue
            try:
                frame_name = vorbis_keys_to_id3[key].upper()
            except KeyError:
                continue
            frame = getattr(_mutagen_id3, frame_name)
            id3_encoding = self._guess_id3_encoding(value)
            max_encoding = max(max_encoding, id3_encoding)
            mp3[frame_name] = frame(encoding=id3_encoding, text=value)
        if mp3.tags is None:
            return
        if max_encoding:  # at least one tag doesn't use ISO-8859-1
            v1 = 0  # remove ID3v1 tags
        else:
            v1 = 2  # create and/or update ID3v1 tags
        mp3.save(v1=v1)

    def set_ogg_metadata(self, target, metadata):
        if _mutagen is None:
            raise _mutagen_import_error
        ogg = _mutagen_oggvorbis.OggVorbis(target)
        self._set_vorbis_comments(ogg, metadata)
        ogg.save()

    def set_wav_metadata(self, target, metadata):
        pass


def test():
    import doctest
    results = doctest.testmod()
    return results.failed % 127


if __name__ == '__main__':
    import argparse
    import sys

    class Formatter (argparse.RawDescriptionHelpFormatter,
                     argparse.ArgumentDefaultsHelpFormatter):
        pass

    p = argparse.ArgumentParser(
        description=__doc__.splitlines()[0],
        epilog='\n'.join(__doc__.splitlines()[2:]),
        formatter_class=Formatter)
    p.add_argument(
        '-v', '--version', action='version',
        version='%(prog)s {}'.format(__version__))
    p.add_argument(
        '-t', '--target-extension', dest='ext', metavar='EXT',
        default='ogg', choices=['flac', 'mp3', 'ogg', 'wav'],
        help='Conversion target type')
    p.add_argument(
        '-c', '--cache', dest='cache', metavar='PATH',
        help=('Save conversion hashes in a cache file to avoid '
              'repeated previous conversions.'))
    p.add_argument(
        '-n', '--no-hash', dest='hash',
        default=True, action='store_const', const=False,
        help=("Don't hash files.  Just assume matching names would "
              'have matching hashes.'))
    p.add_argument(
        '-i', '--ignore', dest='ignore', metavar='REGEXP',
        help='Ignore source paths matching REGEXP.')
    p.add_argument(
        '--test', dest='test',
        default=False, action='store_const', const=True,
        help='Run internal tests and exit')
    p.add_argument(
        'source_dir', metavar='SOURCE', default='.',
        help='Source directory')
    p.add_argument(
        'target_dir', metavar='TARGET', default='.',
        help='Target directory')

    args = p.parse_args()

    if args.test:
        sys.exit(test())

    if args.ignore:
        ignore_regexp = _re.compile(args.ignore)
        ignore_function = ignore_regexp.match
    else:
        ignore_function = None

    c = Converter(
        args.source_dir, args.target_dir, target_extension=args.ext,
        cache_file=args.cache, hash=args.hash,
        ignore_function=ignore_function)
    try:
        c.run()
    finally:
        c.cleanup()
        pass
