#-*-perl-*-
# Parse "ls -lR" type listings
# use lsparse'reset( dirname ) repeately
# By Lee McLoughlin <lmjm@doc.ic.ac.uk>
#
# $Header: /a/swan/home/swan/staff/csg/lmjm/src/perl/mirror/RCS/lsparse.pl,v 1.11 1992/03/20 21:01:04 lmjm Exp lmjm $
# $Log: lsparse.pl,v $
# Revision 1.11  1992/03/20  21:01:04  lmjm
# Added patch from Chris Myers to avoid problems with the dir '.'
# and another patch to correct the escaping of special chars in pathnames.
#
# Revision 1.10  1992/02/06  23:25:57  lmjm
# Used ideas from archies' filt program to improve handling of Permission
# denied errors
#
# Revision 1.9  1991/12/18  16:58:01  lmjm
# Try and cope with "Permission denied" messages.
#
# Revision 1.8  1991/12/06  18:12:06  lmjm
# Zap any leading ./  (Somehow they still creep thru.)
#
# Revision 1.7  1991/10/23  22:42:21  lmjm
# Raised timeouts.
# Call the right routine on a timeout!
# Turn off alarms when done!
# Stomp on carriage returns in dir listings.
# Hack for an odd dir listing.
#
# Revision 1.6  1991/09/06  19:53:54  lmjm
# Added in a (munged version) of Al's to return the mode as well.
#
# Revision 1.5  1991/09/04  14:17:02  lmjm
# Make the package return a value.
#
# Revision 1.4  1991/08/29  16:23:50  lmjm
# Allow for timeout of incoming data.
#
# Revision 1.3  1991/08/21  10:45:56  lmjm
# Added fix for broken return from revell@uunet.uu.net
# Try to spot incoming crud in the listing.
#
# Revision 1.2  1991/08/09  18:08:32  lmjm
# Scan symlinks correctly.
#
# Revision 1.1  1991/08/08  20:53:01  lmjm
# Initial revision
#

# This has better be available via your PERLLIB environment variable
require 'dateconv.pl';

package lsparse;

# The current directory is stripped off the
# start of the returned pathname
# $match is a pattern that matches this
local( $match );

# These routines will fail if not given input every readtime seconds.
# Set this to zero to disable timeouts
$lsparse'readtime = 600;

# The filestore type being scanned
$lsparse'fstype = 'unix';

# A name to report when errors occur
$lsparse'name = 'unknown';

# Set the directory than is being scanned
sub lsparse'reset
{
	$here = $currdir = @_[0];
	$now = time;
}

sub lsparse'timeout
{
	die "timeout: lsparse\n";
}

# This is just a wrapper function to set the timer up.
# see the following routine for call/return details.
sub lsparse'line
{
	if( $lsparse'readtime ){
		$SIG{ 'ALRM' } = "lsparse\'timeout";
	}

	local( $path, $size, $time, $type, $mode );

	if( $lsparse'fstype eq 'unix' ){
		($path, $size, $time, $type, $mode) =
			eval '&lsparse\'line_unix( @_ )';
	}
	else {
		die "unknown lsparse'fstype: $lsparse'fstype";
	}

	if( $@ =~ /^timeout/ ){
		return ('', 0, 0, 0, 0);
	}
	# Zap any leading ./  (Somehow they still creep thru.)
	$path =~ s:^(\./)+::;
	return ($path, $size, $time, $type, $mode );
}

# for each file or directory line found return a tuple of
# (pathname, size, time, type, $mode)
# pathname is a full pathname relative to the directory set by reset()
# size is the size in bytes (this is always 0 for directories)
# time is a Un*x time value for the file
# type is "f" for a file, "d" for a directory and
#         "l linkname" for a symlink
sub lsparse'line_unix
{
	local( $fh ) = @_;
	local( $non_crud, $perm_denied );

	if( $lsparse'readtime ){
		alarm( $lsparse'readtime );
	}

	if( eof( $fh ) ){
		alarm( 0 );
		return( "", 0, 0, 0 );
	}

	while( <$fh> ){
		# Stomp on carriage returns
		s/\015//g;

		# Try and spot crud in the line and avoid it
		# You can get:
		# -rw-r--r-ls: navn/internett/RCS/nsc,v: Permission denied
		# ls: navn/internett/RCS/bih,v: Permission denied
		# -  1 43       daemon       1350 Oct 28 14:03 sognhs
		# -rwcannot access .stuff/incoming
		# cannot access .stuff/.cshrc
		if( m%^(.*)/bin/ls:.*Permission denied% ||
		   m%^(.*)ls:.*Permission denied% ||
		   m%^(.*)(cannot|can not) access % ){
			if( ! $non_crud ){
				$non_crud = $1;
			}
			next;
		}
		# Also try and spot non ls "Permission denied" messages.  These
		# are a LOT harder to handle as the key part is at the end
		# of the message.  For now just zap any line containing it
		# and the first line following (as it will PROBABLY have been broken).
		#
		if( /.:\s*Permission denied/ ){
			$perm_denied = 1;
			next;
		}
		if( $perm_denied ){
			$perm_denied = "";
			warn "Warning: input corrupted by \"Permission denied\" errors, about line $. of $lsparse'name\n";
			next;
		}
		# Not found's are like Permission denied's.  They can start part
		# way through a line but with no way of spotting where they begin
		if( /not found/ ){
			$not_found = 1;
			next;
		}
		if( $not_found ){
			$not_found = "";
		warn "Warning: input corrupted by \"not found\" errors, about line $. of $lsparse'name\n";
			next;
		}
		
		if( $non_crud ){
			$_ = $non_crud . $_;
			$non_crud = "";
		}
		
		if( /^([\-lrwx]{10}).*\s(\d+)\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s+(.*)\n/ ){
			local( $kind, $size, $lsdate, $file ) = ($1, $2, $3, $5);
			
			if( $file eq '.' || $file eq '..' ){
				next;
			}

			local( $time ) = &main'lstime_to_time( $lsdate );
			local( $type ) = '?';
			local( $mode ) = 0;

			# This should be a symlink
			if( $kind =~ /^l/ && $file =~ /(.*) -> (.*)/ ){
				$file = $1;
				$type = "l $2";
			}
			elsif( $kind =~ /^-/ ){
				# (hopefully) a regular file
				$type = 'f';
			}
			
			$mode = &chars_to_mode( $kind );

			$file =~ s,^/$match,,;
			$file = "$currdir/$file";
			$file =~ s,/+,/,g;
			alarm( 0 );
			return( substr( $file, 1 ), $size, $time, $type, $mode );
		}
		elsif( /^([\.\/]*.*):$/ ){
			if( $1 eq '.' ){
				next;
			}
			elsif( $1 !~ /^\// ){
				$currdir = "$here/$1/";
			}
			else {
				$currdir = "$1/";
			}
			$currdir =~ s,/+,/,g;
			$match = $currdir;
			$match =~ s/([\+\(\)\[\]\*\?])/\\$1/g;
			alarm( 0 );
			return( substr( $currdir, 1 ), 0, 0, 'd', 0 );
		}
		elsif( /^[dcbsp].*[^:]$/ || /^\s*$/ || /^[Tt]otal.*/ || /[Uu]nreadable$/ ){
			;
		}
		elsif( /^.*[Uu]pdated.*:/ ){
			# Probably some line like:
			# Last Updated:  Tue Oct  8 04:30:50 EDT 1991
			# skip it
			next;
		}
		elsif( /^([\.\/]*[^\s]*)/ ){
			# Just for the export.lcs.mit.edu ls listing
			$match = $currdir = "$1/";
			$match =~ s/[\+\(\[\*\?]/\\$1/g;
		}		
		else {
			printf( "Unmatched line: %s", $_ );
		}
	}
	alarm( 0 );
	return( '', 0, 0, 0, 0 );
}

# Convert the mode chars at the start of an ls-l entry into a number
# (Should really spot X, S and all the other chars now in use.)
sub chars_to_mode
{
	local( $chars ) = @_;
	local( @kind, $c );

	# Split and remove first char
	@kind = split( //, $kind );
	shift( @kind );

	foreach $c ( @kind ){
		$mode <<= 1;
		if( $c ne '-' ){
			$mode |= 1;
		}
	}

	return $mode;
}

	
	

1;
