Download - spug_2008-08
Simple Perl
Using File::Find and MP3::Tag to search
through a junk drawer of mp3 files, finding
duplicates
File::Find•Searches a directory tree•Invokes your callback (\&wanted
subroutine) for each thing•Your callback subroutine does something with the thing
Using File::Find•Create your callback subroutine•Call find() with your callback and a list of directories as argumentssub wanted { # do something neat ... }
find( \&wanted, @directories );
\&wantedsub wanted { say "$_"; say "$File::Find::dir"; say "$File::Find::name";
}
01_find#!/usr/local/bin/perl
use v5.10;use strict; use warnings;
use File::Find;
#============================== # main program
# take any command line arguments as the names of directories to search my @dirs_to_search = @ARGV;
# if no search dirs were specified, just use '.' if ( ! @dirs_to_search ) { @dirs_to_search = ( '.' ); }
find( \&process_file, @dirs_to_search );
01_find (cont.)
sub process_file { # $_ is set to the name of the current file
# $File::Find::dir is the name of the containing # directory $File::Find::name is the full path
say "\$_ <$_>"; say "\$File::Find::dir <$File::Find::dir>"; say "\$File::Find::name <$File::Find::name>"; say ''; # blank line }
02_find_typesub process_file { my $type; if ( -f $_ ) { $type = 'normal file'; } elsif ( -d $_ ) { $type = 'directory'; } else { $type = 'other'; }
say "file: <$_>"; say "type: <$type>"; say ''; }
03_find_mp3sub process_file { # skip anything that isn't a normal file if ( not -f $_ ) { return; }
# skip any normal file that # doesn't have an .mp3 suffix if ( not /\.mp3$/ ) { return; }
say "file <$_>"; }
04_find_mp3sub process_file { # skip anything that isn't a normal file if ( not -f $_ ) { return; }
my $mime = qx{ /usr/bin/file -bi "$_" };
chomp $mime; # "text/plain; charset=us-ascii" # ... get rid of charset or other extra info $mime =~ s/;.*//;
# skip any non mp3 files if ( $mime ne 'audio/mpeg' ) { warn "skipping [wrong mimetype] file <$_> mime: <$mime>\n"; return; }
say " ** got an mp3 file: <$_>"; }
touch \ '"; echo "<$$> pwned orz" >> orz.log; echo"'
> ls "; echo "<$$> pwned orz" >> orz.log; echo"
# within process_file()# $_ = q{"; echo "<$$> pwned orz" >> orz.log; echo"};# ...my $mime = qx{ /usr/bin/file -bi "$_" };
/usr/bin/file -bi ""; echo "<$$> pwned orz" >> orz.log; echo""
DANGERS
05_find_mp3_secure
#!/usr/local/bin/perl -T
BEGIN { # delete certain tainted environment variables delete @ENV{ qw( PATH ENV ) }; }
•Turn on Taint mode
05_find_mp3_secure (cont.)
my $shellsafe = qr{^([-\@\w./ ]+)$};
find( { wanted => \&process_file, untaint => 1, untaint_pattern => $shellsafe, untaint_skip => 1, no_chdir => 1, }, @dirs_to_search, );
05_find_mp3_secure (cont.)
sub process_file { my $file; if ( m/$shellsafe/ ) { # untaint the safe filename $file = $1; } else { warn "skipping [suspicious name] file: <$_> \n"; return; }
# now use $file instead of $_ # ...
}
MP3::Taguse MP3::Tag;
my $mp3 = MP3::Tag->new( $filename );
my ( $title, $track, $artist, $album, $comment, $year, $genre,) = $mp3->autoinfo();
# or my $info = {}; # hashref
# hash slice@{ $info }{ qw(title track artist album comment year genre) } = $mp3->autoinfo();
06_mp3_info# process_file( writes directly into this my $mp3_database = { };
find( ... );
# use Data::Dumper; # print Dumper( $mp3_database );
# use JSON; # print to_json( $mp3_database );
use YAML; print Dump( $mp3_database );
06_mp3_info (cont.)
sub process_file {
# ...
my $mp3 = MP3::Tag->new( $file ); @{ $mp3_database->{ $file } } { qw( title track artist
album comment year genre ) } = $mp3->autoinfo(); }
07_find_mp3_dupessub process_file { # ... my $info = {}; $info->{ file } = $file;
my $mp3 = MP3::Tag->new( $file ); @{ $info->{ mp3 } }{ qw( title track artist album comment year genre ) } = $mp3->autoinfo();
# continued ...
07_find_mp3_dupes (cont.)
my $song = join '|', map { my $_ = lc $_; tr/àáâäãå/aaaaaa/; tr/èéêë/eeee/; tr/ìíîïĩ/iiiii/; tr/òóôöõ/ooooo/; tr/ùúûüũ/uuuuu/; tr/ñýÿ/nyy/; s/\s+//g; $_; } @{ $info->{ mp3 } }{ qw( artist title ) };
push @{ $mp3_database->{ $song } }, $info; }
07_find_mp3_dupes (cont.)
find( ... );
# print Dump( $mp3_database );
my @dupes = grep { @$_ > 1 } values %$mp3_database;
for my $dupe ( @dupes ) { say "\n*** Duplicate Songs ***"; print Dump( $dupe ); }
say "\n";