git.infertux.com bin-scripts / master music-find-duplicates
master

Tree @master (Download .tar.gz)

music-find-duplicates @masterraw · history · blame

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
# Interactive MP3 duplicates cleaner
#

import eyeD3
import fnmatch
import os
import sys
import time
from subprocess import call

pattern = '*.mp3' # can include any UNIX shell-style wildcards

if len(sys.argv) != 2:
    print "Usage: %s <directory>" % sys.argv[0]
    sys.exit(1)

root_path = sys.argv[1]
tag = eyeD3.Tag()
songlist = {}
duplicates = {}
bugged = False
counter = 0
space = 0

start_time = time.time()
for root, dirs, files in os.walk(root_path):
    for filename in fnmatch.filter(files, pattern):
        counter += 1
        print "%d files processed, please wait...%c" % (counter, 13),
        songfile = os.path.join(root, filename)
        try:
            tag.link(songfile)
        except (ValueError, eyeD3.tag.TagException) as e:
            #print "Unable to read tags for '%s', skipping." % songfile
            continue

        attributes = [a.strip() for a in [tag.getTitle(), tag.getArtist()]]
        for attribute in attributes:
            if attribute == '':
                bugged = True
                break
        if bugged:
            bugged = False
            continue

        song = "'" + attributes[0] + "' by '" + attributes[1] + "'"
        if not song in songlist:
            songlist[song] = songfile
        else:
            if not song in duplicates:
                duplicates[song] = [songlist[song], songfile]
                space += os.path.getsize(songfile)
            else:
                assert len(duplicates[song]) > 1
                duplicates[song].append(songfile)
                space += os.path.getsize(songfile)

del songlist

print "%d files processed in %d seconds." % (counter,
    round(time.time() - start_time, 1))

if not len(duplicates):
    print "Hooray! No duplicate in this directory."
    sys.exit(0)

print "You may free about %d MiB by deleting all duplicates." % \
    round(space / 1024**2)

for song, filenames in duplicates.iteritems():
    print "\n%s: %d duplicates" % (song, len(filenames))

    assert len(filenames) > 1
    for index, filename in enumerate(filenames):
        bitrate = '? kb/s'
        if eyeD3.isMp3File(filename):
            audioFile = eyeD3.Mp3AudioFile(filename)
            bitrate = audioFile.getBitRateString()

        print "  %d - %s (%s)" % (index+1, filename[len(root_path):],
            bitrate)

    print "  0 - keep all (do NOT delete anything)"
    user_input = raw_input(
        "Enter the number(s) you want to delete (e.g. 1 3 4) [0]: ")
    if user_input == '':
        user_input = '0'

    if user_input == '0':
        continue

    if user_input in ('a', 'b', 'q'):
        break

    for number in user_input.split(' '):
        number = int(number) - 1
        if number < 0 or number > index:
            print "Invalid number, skipping."
            continue

        print "'%s' moved to trash" % filenames[number]
        if call(["trash-put", filenames[number]]):
            print "Oops!"

print "Bye!"