#!/usr/bin/perl -w # archive-ml # # DESCRIPTION: # rename files from one big directory (as one might use for # keeping incoming mailing list archives) into individual # directories for tarring or simply not having a huge number of # files in any one directory. # # inspired by the fact that 40k files (comprising about a year and a # half of perl5-porters) starts to get really slow on linux. oops. # # USAGE: # archive-ml source_dir dest_dir_root # # "source_dir" should be a directory with lots of files in it; my # scheme (generated by gnus' "nnml" scheme) has them numbered from # 1 to . # # "dest_dir_root" is the prefix of a series of other directories. # these directories will be named YYYY-MM. # # HISTORY: # created 1999-06-13 # # $Log: archive-ml,v $ # Revision 1.1 1999/10/15 19:26:33 tkil # initial checkin # # # LEGAL: # copyright (c) 1999 Tkil # # this code is free software; you may use, copy, modify, and # distribute it under the same terms as Perl itself. use strict; use Date::Parse qw(str2time); use File::Copy qw(move); use File::Path qw(mkpath); my $DEBUG = 1; # i know, i know, we're supposed to let people use their "umask" to # set this appropriately. but since even i don't always have it set # sanely, let's be paranoid, shall we? my $mkdir_mode = 0700; unless (@ARGV == 2) { die "$0: requires two arguments, SOURCE_DIR and DEST_DIR_ROOT\n"; } my ($src_dir, $dest_dir_root) = @ARGV; unless (-d $src_dir && -w _ && -r _) { die ("$0: source dir '$src_dir' either isn't a directory,\n". "or we don't have read/write privs to it."); } unless ((-d $dest_dir_root && -w _) || mkpath($dest_dir_root, 0, $mkdir_mode)) { die ("dest dir '$dest_dir_root' either doesn't exist,\n". "isn't a directory, or i couldn't create it."); } # we don't want to move anything from this month: my $this_yyyy_mm; { my ($y, $m) = (localtime)[5, 4]; $y += 1900; $m += 1; $this_yyyy_mm = sprintf "%04d-%02d", $y, $m; } opendir D, $src_dir or die "couldn't open source dir '$src_dir': $!"; FILE: while (my $file = readdir D) { next if $file eq '.' || $file eq '..'; $file = $src_dir . "/" . $file; print STDERR "opening $file:" if $DEBUG > 1; unless (open F, $file) { warn "couldn't open '$file' for read: $!"; next FILE; } my $date; LINE: while () { if (/^Date:\s+(.*)$/) { $date = $1; last LINE; } } close F or warn "problem closing '$file': $!"; unless ($date) { # i considered using the date from the last file, but there's no # guarantee that they will be in sequential order. this should be # rare enough to where i can just deal with them individually. warn "no date in '$file', skipping"; next FILE; } print STDERR " date='$date'" if $DEBUG > 1; my $time_t = str2time($date); unless (defined $time_t) { warn "couldn't parse '$date' in '$file', skipping"; next FILE; } print STDERR "\n time_t=$time_t" if $DEBUG > 1; # pick out the year and month from the "localtime" return values. # adjust them appropriately, then format them: my ($y, $m) = (localtime($time_t))[5, 4]; $y += 1900; $m += 1; my $yyyy_mm = sprintf "%04d-%02d", $y, $m; print STDERR " dest=$yyyy_mm\n" if $DEBUG > 1; # leave anything from this month next FILE if $yyyy_mm eq $this_yyyy_mm; # make sure we have a destination directory my $dest_dir = $dest_dir_root . "/" . $yyyy_mm; unless (-d $dest_dir && -w _ || mkdir($dest_dir, $mkdir_mode)) { warn "couldn't create dir '$dest_dir', or it's unwritable; " . "skipping '$file'"; next FILE; } move $file, $dest_dir or warn "problem moving '$file' to '$dest_dir': $!"; print STDERR "." if $DEBUG == 1; } closedir D or die "problem closing directory '$src_dir': $!"; print STDERR "\n" if $DEBUG == 1; exit 0;