[Lxr-commits] CVS: lxr/lib/LXR/Files GIT.pm,1.1,1.2
Brought to you by:
ajlittoz
From: Jan-Benedict G. <jb...@us...> - 2006-12-20 19:51:20
|
Update of /cvsroot/lxr/lxr/lib/LXR/Files In directory sc8-pr-cvs6.sourceforge.net:/tmp/cvs-serv792/lib/LXR/Files Modified Files: GIT.pm Log Message: Rework the GIT backend. Index: GIT.pm =================================================================== RCS file: /cvsroot/lxr/lxr/lib/LXR/Files/GIT.pm,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- GIT.pm 8 Apr 2006 13:37:58 -0000 1.1 +++ GIT.pm 20 Dec 2006 19:51:13 -0000 1.2 @@ -1,7 +1,9 @@ -# -*- tab-width: 4 -*- ############################################### # -# $Id$ - +# GIT.pm - A file backend for LXR based on GIT. +# +# © 2006 by Jan-Benedict Glaw <jb...@lu...> +# © 2006 by Maximilian Wilhelm <ma...@rf...> +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or @@ -13,8 +15,9 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# along with this program; if not, write to the Free Software Foundation +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# package LXR::Files::GIT; @@ -22,400 +25,230 @@ use strict; use FileHandle; +use Time::Local; use LXR::Common; -use LXR::Author; +use Git; -# -# We're adding ".git" to the path since we're only dealing with -# low-level stuff and _never_ ever deal with checked-out files. -# sub new { my ($self, $rootpath, $params) = @_; $self = bless({}, $self); $self->{'rootpath'} = $rootpath; + $self->{'do_blame'} = $$params{'do_blame'}; + $self->{'do_annotations'} = $$params{'do_annotations'}; + + if ($self->{'do_blame'}) { + # Blame support will only work when commit IDs are available, + # called annotations here... + $self->{'do_annotations'} = 1; + } - $ENV{'GIT_DIR'} = $self->{'rootpath'}; return $self; } -sub filerev { - my ($self, $filename, $release) = @_; - - $filename = $self->sanitizePath ($filename); - $release = $self->get_treehash_for_branchhead_or_tag ($release); - - my $pid = open(my $F, '-|'); - die $! unless defined $pid; - if (!$pid) { - exec ("git-ls-tree", $release, $filename) - or die "filerev: Cannot exec git-ls-tree"; - } - - my $git_line=<$F>; - chomp $git_line; - close($F); +sub isdir { + my ($self, $pathname, $release) = @_; - if ($git_line =~ m/(\d+)\s(\w+)\s([[:xdigit:]]+)\t(.*)/ ) { - return $3; - + $pathname =~ s/^\///; + if ($pathname eq "") { + return 1 == 1; } else { - die "filerev( $filename, $release ): No entry found.\n"; + my $repo = Git->repository (Directory => "$self->{'rootpath'}"); + my $line = $repo->command_oneline ("ls-tree", "-d", "$release", "$pathname"); + return $line =~ m/^\d+ tree .*$/; } } -sub getfiletime { - my ($self, $filename, $release) = @_; - $filename = $self->sanitizePath ($filename); - - if ($filename =~ m/\/\.\.$/ ) - return undef; -# if ($filename =~ /\/\.\.\$/) -# return undef; - - my $pid1 = open(my $R, '-|' ); - die $! unless defined $pid1; - if(!$pid1) { - exec("git-rev-list", "--max-count=1", "$release", "--", $filename ) or die "getfiletime ($filename, $release): Cannot exec git-rev-list\n"; - } - my $commit = <$R>; - chomp $commit; - close($R); - - my $pid = open(my $F, '-|'); - die $! unless defined $pid; - if(!$pid) { - exec("git-cat-file", "commit", $commit) or die "getfiletime ($filename, $release): Cannot exec git-cat-file\n"; - } +sub isfile { + my ($self, $pathname, $release) = @_; - while(<$F>) { - chomp; - if ( m/^author .*<.*> (\d+)\s.*$/ ) { - close($F); - return $1; - } + $pathname =~ s/^\///; + if ($pathname eq "") { + return 1 == 0; + } else { + my $repo = Git->repository (Directory => "$self->{'rootpath'}"); + my $line = $repo->command_oneline ("ls-tree", "-d", "$release", "$pathname"); + return $line =~ m/^\d+ blob .*$/; } - - close($F); - - die "getfiletime ($filename, $release) : Did not find GIT entry.\n"; } -sub getfilesize { - my ($self, $filename, $release) = @_; +sub getdir { + my ($self, $pathname, $release) = @_; + my ($dir, $node, @dirs, @files); + my $repo = Git->repository (Directory => "$self->{'rootpath'}"); - $filename = $self->sanitizePath ($filename); - my $object_hash = $self->filerev ($filename, $release); + $pathname =~ s/^\///; - print STDERR "getfilesize ($filename, $release)\n"; + my ($fh, $c) = $repo->command_output_pipe ("ls-tree", "$release", "$pathname"); + while (<$fh>) { + if (m/(\d+) (\w+) ([[:xdigit:]]+)\t(.*)/) { + my ($entrymode, $entrytype, $objectid, $entryname) = ($1, $2, $3, $4); - # return `git-cat-file -s $blobhash`; - my $pid = open (my $F, '-|'); - die $! unless defined $pid; - if(!$pid) { - exec ("git-cat-file", "-s", $object_hash) or die "getfilesize ($filename, $release): Cannot exec git-cat-file\n"; - } + # Only get the filename part of the full path + my @array = split (/\//, $entryname); + my $num = @array - 1; + $entryname = @array[$num]; - my $size = <$F>; - close ($F); - chomp $size; - if ( $size ) { - return $size; - } else { - return undef; - } + # Weed out things to ignore + foreach my $ignoredir ($config->{ignoredirs}) { + next if $entryname eq $ignoredir; + } - close ($F); - return undef; -} + next if $entryname =~ /^\.$/; + next if $entryname =~ /^\.\.$/; -sub getfile { - my ($self, $filename, $release) = @_; - my ($buffer); + if ($entrytype eq "blob") { + push (@files, $entryname); + } elsif ($entrytype eq "tree") { + push (@dirs, "$entryname/"); + } + } + } -# my $blobhash = open( "git-ls-tree $release $filename | cut -f 3 -d ' ' | cut -f 1 -d \$'\t' |") or die "Cannot open git-ls-tree $release $filename in getfile\n"; - my $blobhash = $self->filerev( $filename, $release ); -# local ($/) = undef; + $repo->command_close_pipe ($fh, $c); - open(FILE, "git-cat-file blob $blobhash|") || return undef; - $buffer = <FILE>; - close(FILE); - return $buffer; + return sort (@dirs), sort (@files); } -sub getfilehandle { +sub getfilesize { my ($self, $filename, $release) = @_; - my ($fileh); - $filename = $self->sanitizePath ($filename); + my $repo = Git->repository (Directory => "$self->{'rootpath'}"); - my $treeid = $self->get_treehash_for_branchhead_or_tag ($release); + $filename =~ s/^\///; - $filename = $self->sanitizePath ($filename); - my $objectid = $self->getBlobOrTreeOfPathAndTree ($filename, $treeid); + my $sha1hashline = $repo->command_oneline ("ls-tree", "$release", "$filename"); - $fileh = new IO::File; - $fileh->open ("git-cat-file blob $objectid |") or die "Cannot execute git-cat-file blob $objectid"; + if ($sha1hashline =~ m/\d+ blob ([[:xdigit:]]+)\t.*/) { + return $repo->command_oneline ("cat-file", "-s", "$1"); + } - return $fileh; + return undef; } sub tmpfile { my ($self, $filename, $release) = @_; my ($tmp, $fileh); - local ($/) = undef; $tmp = $config->tmpdir . '/lxrtmp.' . time . '.' . $$ . '.' . &LXR::Common::tmpcounter; - open(TMP, "> $tmp") || return undef; - $fileh = $self->getfilehandle( $filename, $release ); - print(TMP <$fileh>); - close($fileh); - close(TMP); + open (TMP, "> $tmp") || return undef; + $fileh = $self->getfilehandle ($filename, $release); + print (TMP <$fileh>); + close ($fileh); + close (TMP); return $tmp; } -sub getannotations { +sub filerev { + my ($self, $filename, $release) = @_; + my $repo = Git->repository (Directory => "$self->{'rootpath'}"); - return (); - my ($self, $pathname, $release) = @_; - my @authors = (); + $filename =~ s/^\///; - if ( $pathname =~ m#^/(.*)$# ) { - $pathname = $1; - } - - open( BLAME, "git-blame -l $pathname $release |"); - while( <BLAME> ) { - if ( m/(^[[:xdigit:]]+)\s.*$/ ) { - my $linehash = $1; - my $authorline = `git-cat-file commit $linehash`; - if ($authorline =~ m/^author ([^<]+)<(([^@])\@[^>]+)>.*$/ ) { - my ($authorname, $authoruser, $authoremail) = ($1, $2, $3); - push(@authors, LXR::Author->new(chomp $authorname, - $authoruser, $authoremail)); - } else { - push(@authors, LXR::Author->new("", "", "")); - } - } else { - print STDERR "getannotations: JB HAT DOOFE OHREN: $_\n"; - } + my $sha1hashline = $repo->command_oneline ("ls-tree", "$release", "$filename"); + + if ($sha1hashline =~ m/\d+ blob ([[:xdigit:]]+)\t.*/) { + return $1; } - close(BLAME); - print STDERR "authors: " . join(" ", @authors) . "\n"; - - return @authors; + return undef; } -sub getauthor { - - return (); - +sub getfiletime { my ($self, $filename, $release) = @_; - $filename = $self->sanitizePath ($filename); - print STDERR "getauthr( $filename, $release )\n"; - my $commit = `git-rev-list --max-count=1 $release -- $filename | tr -d \$'\n'`; - my $authorline = `git-cat-file commit $commit | grep '^author' | head -n1 | tr -d \$'\n'`; - if ($authorline =~ m/^author ([^<]+)<(([^@])\@[^>]+)>.*$/ ) { - my ($authorname, $authoruser, $authoremail) = ($1, $2, $3); - return LXR::Author->new(chomp $authorname, $authoruser, $authoremail); - } else { - return LXR::Author->new("", "", ""); - } -} + $filename =~ s/^\///; -sub getdir { - my ($self, $pathname, $release) = @_; - my ($dir, $node, @dirs, @files); - - my $treeid = $self->get_treehash_for_branchhead_or_tag ($release); - - $pathname = $self->sanitizePath( $pathname ); - if ( $pathname !~ m#..*/# ) { - $pathname = $pathname . '/'; + if ($filename eq "") { + return undef; + } + if ($filename =~ m/\/$/) { + return undef; } - open(DIRLIST, "git-ls-tree $treeid $pathname |") or die "Cannot open git-ls-tree $treeid $pathname"; - while( <DIRLIST> ) { - if ( m/(\d+)\s(\w+)\s([[:xdigit:]]+)\t(.*)/ ) { - my ($entrymode, $entrytype, $objectid, $entryname) = ($1,$2,$3,$4); - - # Weed out things to ignore - foreach my $ignoredir ($config->{ignoredirs}) { - next if $entryname eq $ignoredir; - } - - next if $entryname =~ /^\.$/; - next if $entryname =~ /^\.\.$/; + my $repo = Git->repository (Directory => "$self->{'rootpath'}"); + my $lastcommitline = $repo->command_oneline ("log", "--max-count=1", "--pretty=oneline", "$release", "--", "$filename"); + if ($lastcommitline =~ m/([[:xdigit:]]+) /) { + my $commithash = $1; - if ($entrytype eq "blob") { - push(@files, $entryname); - - } elsif ($entrytype eq "tree") { - push(@dirs, "$entryname/"); - #push(@dirs, "$entryname"); + my (@fh, $c) = $repo->command ("cat-file", "commit", "$commithash"); + foreach my $line (@fh) { + if ($line =~ m/^author .* <.*> (\d+) .[0-9]{4}$/) { + return $1; } } + return undef; } - close(DIRLIST); - - return sort(@dirs), sort(@files); -} -# This function should not be used outside this module -# except for printing error messages -# (I'm not sure even that is legitimate use, considering -# other possible File classes.) - -##sub toreal { -## my ($self, $pathname, $release) = @_; -## -## nearly all (if not all) method calls eventually call toreal(), so this is a good place to block file access -## foreach my $ignoredir ($config->ignoredirs) { -## return undef if $pathname =~ m|/$ignoredir/|; -## } -## -## return ($self->{'rootpath'} . $release . $pathname); -##} - -sub isdir { - my ($self, $pathname, $release) = @_; - - $pathname = $self->sanitizePath ($pathname); - $release = $self->get_newest_commit_from_branchhead_or_tag ($release); - - print STDERR "isdir ($pathname, $release)\n"; - - my $treeid = $self->get_treehash_for_branchhead_or_tag ($release); - - return $self->getObjectType ($pathname, $treeid) eq "tree"; -} - -sub isfile { - my ($self, $pathname, $release) = @_; - - $pathname = $self->sanitizePath ($pathname); - $release = $self->get_newest_commit_from_branchhead_or_tag ($release); - - print STDERR "isfile($pathname, $release)\n"; - - my $treeid = $self->get_treehash_for_branchhead_or_tag ($release); - - return $self->getObjectType ($pathname, $treeid) eq "blob"; -} - -# -# For a given commit (that is, the latest commit on a named branch or -# a tag's name) return the tree object's hash corresponding to it. -# -sub get_treehash_for_branchhead_or_tag () { - my ($self, $release) = @_; - $release = $self->get_newest_commit_from_branchhead_or_tag ($release); - - return `git-cat-file commit $release | grep '^tree' | head -n1 | cut -f 2 -d ' ' | tr -d \$'\n'`; + return undef; } -sub getObjectType() { - my ($self, $pathname, $treeid) = @_; - - open (DIRLIST, "git-ls-tree $treeid $pathname |") or die "Cannot open git-ls-tree $treeid $pathname"; - while (<DIRLIST>) { - if (m/(\d+)\s(\w+)\s([[:xdigit:]]+)\t(.*)/) { - my ($entrymode, $entrytype, $objectid, $entryname) = ($1, $2, $3, $4); - - # Weed out things to ignore -# # This should only be needed in the getdir function. -# foreach my $ignoredir ($config->{ignoredirs}) { -# next if $entryname eq $ignoredir; -# } +sub getfilehandle { + my ($self, $filename, $release) = @_; + my $repo = Git->repository (Directory => "$self->{'rootpath'}"); - $entryname = $self->sanitizePath ($entryname); + $filename =~ s/^\///; -# print STDERR "getBlobOrTreeOfPathAndTree: pathname: \"$pathname\" :: entryname: \"$entryname\"\n"; - next if ( ! $pathname eq $entryname ); + my $sha1hashline = $repo->command_oneline ("ls-tree", "$release", "$filename"); - close (DIRLIST); -# print STDERR "Juhu, wir haben $pathname gefunden :: $objectid\n"; - return $entrytype; - } + if ($sha1hashline =~ m/^\d+ blob ([[:xdigit:]]+)\t.*/) { + my ($fh, $c) = $repo->command_output_pipe ("cat-file", "blob", "$1"); + return $fh; } - close (DIRLIST); return undef; } -sub getBlobOrTreeOfPathAndTree() { - my ($self, $pathname, $treeid ) = @_; - - open (DIRLIST, "git-ls-tree $treeid $pathname |") or die "Cannot open git-ls-tree $treeid $pathname"; - while (<DIRLIST>) { - if (m/(\d+)\s(\w+)\s([[:xdigit:]]+)\t(.*)/) { - my ($entrymode, $entrytype, $objectid, $entryname) = ($1, $2, $3, $4); +sub getannotations { + my ($self, $filename, $release) = @_; - # Weed out things to ignore - foreach my $ignoredir ($config->{ignoredirs}) { - next if $entryname eq $ignoredir; - } + if ($self->{'do_annotations'}) { + my $repo = Git->repository (Directory => "$self->{'rootpath'}"); + my @revlist = (); + $filename =~ s/^\///; - $entryname = $self->sanitizePath( $entryname ); - next if (! $pathname eq $entryname ); + my (@lines, $c) = $repo->command ("blame", "-l", "$release", "--", "$filename"); - close (DIRLIST); - return $objectid; + foreach my $line (@lines) { + if ($line =~ m/^([[:xdigit:]]+) .*/) { + push (@revlist, $1); + } else { + push (@revlist, ""); + } } - } - close (DIRLIST); - - return undef; -} - -# -# This function will take a branch name ("master") or a tag name -# (like "v2.6.15") and return either the branch commit object ID, -# or descend from the tag object into the referenced commit object -# and return its commit ID. XXX -# -sub get_newest_commit_from_branchhead_or_tag ($$) { - my ($self, $head_or_tag) = @_; - my $objtype = `git-cat-file -t $head_or_tag | tr -d \$'\n'`; - if ($objtype eq "commit") { - return $head_or_tag; - } elsif ($objtype eq "tag") { - return `git-cat-file tag $head_or_tag | grep '^object' | head -n1 | cut -f 2 -d ' ' | tr -d \$'\n'`; + return @revlist; } else { - die ("get_newest_commit_from_branchhead_or_tag: Unrecognized object type $objtype for $head_or_tag\n"); + return (); } } -sub sanitizePath() { - my ($self, $pathname) = @_; +sub getauthor { + my ($self, $pathname, $release) = @_; - if ( $pathname eq "" ) { - # Empty? Just beam the client to the root. - $pathname = "."; - } elsif ( $pathname =~ m#^/# ) { - # Absolute? We want them to be relative! - $pathname = ".$pathname"; - } else { - # Filename incurrent directory? Add "./" to - # make them truly relative. - $pathname = "./$pathname"; - } + # + # Note that $release is a real commit this time + # (returned by getannotations() above). This is + # _not_ a tag name! + # - # Don't let them exploit us easily. -# if ( $pathname =~ m#/../# ) { -# die("You are now dead because of $pathname\n"); -# } + if ($self->{'do_blame'}) { + my $repo = Git->repository (Directory => "$self->{'rootpath'}"); + my @authorlist = (); - # Doubled slashes? We remove them. - $pathname =~ s#//#/#g; + $pathname =~ s/^\///; - # Delete leading slashes. - $pathname =~ s#/*$##g; + my (@lines, $c) = $repo->command ("cat-file", "commit", "$release"); + foreach my $line (@lines) { + if ($line =~ m/^author (.*) </) { + return $1 + } + } - return $pathname; + return undef; + } + + return undef; } 1; |