[Lxr-general] [PATCH] LXR: Rewrite of the GIT backend
Brought to you by:
ajlittoz
|
From: Jan-Benedict G. <jb...@lu...> - 2006-11-26 22:51:09
|
Hi!
This patch implements a rewrite of the GIT backend. Should be more
stable/less errorprone and catch more corner cases that didn't work
with the initial version. By Maximilian Wilhelm and me.
Index: lib/LXR/Files/GIT.pm
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/lxr/lxr/lib/LXR/Files/GIT.pm,v
retrieving revision 1.1
diff -u -r1.1 GIT.pm
--- lib/LXR/Files/GIT.pm 8 Apr 2006 13:37:58 -0000 1.1
+++ lib/LXR/Files/GIT.pm 26 Nov 2006 22:39:54 -0000
@@ -1,7 +1,9 @@
-# -*- tab-width: 4 -*- ###############################################
#
-# $Id: GIT.pm,v 1.1 2006/04/08 13:37:58 mbox Exp $
-
+# GIT.pm - A file backend for LXR based on GIT.
+#
+# =C2=A9 2006 by Jan-Benedict Glaw <jb...@lu...>
+# =C2=A9 2006 by Maximilian Wilhelm <ma...@rf...>
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
@@ -13,409 +15,240 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+# along with this program; if not, write to the Free Software Foundation
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
=20
package LXR::Files::GIT;
=20
-$CVSID =3D '$Id: GIT.pm,v 1.1 2006/04/08 13:37:58 mbox Exp $';
+$CVSID =3D '$Id$';
=20
use strict;
use FileHandle;
+use Time::Local;
use LXR::Common;
-use LXR::Author;
+use Git;
=20
-#
-# We're adding ".git" to the path since we're only dealing with
-# low-level stuff and _never_ ever deal with checked-out files.
-#
sub new {
my ($self, $rootpath, $params) =3D @_;
=20
$self =3D bless({}, $self);
$self->{'rootpath'} =3D $rootpath;
+ $self->{'do_blame'} =3D $$params{'do_blame'};
+ $self->{'do_annotations'} =3D $$params{'do_annotations'};
+
+ if ($self->{'do_blame'}) {
+ # Blame support will only work when commit IDs are available,
+ # called annotations here...
+ $self->{'do_annotations'} =3D 1;
+ }
=20
- $ENV{'GIT_DIR'} =3D $self->{'rootpath'};
return $self;
}
=20
-sub filerev {
- my ($self, $filename, $release) =3D @_;
-
- $filename =3D $self->sanitizePath ($filename);
- $release =3D $self->get_treehash_for_branchhead_or_tag ($release);
+sub isdir {
+ my ($self, $pathname, $release) =3D @_;
=20
- my $pid =3D open(my $F, '-|');
- die $! unless defined $pid;
- if (!$pid) {
- exec ("git-ls-tree", $release, $filename)
- or die "filerev: Cannot exec git-ls-tree";=20
- }
-
- my $git_line=3D<$F>;
- chomp $git_line;
- close($F);
-
- if ($git_line =3D~ m/(\d+)\s(\w+)\s([[:xdigit:]]+)\t(.*)/ ) {
- return $3;
- =09
+ $pathname =3D~ s/^\///;
+ if ($pathname eq "") {
+ return 1 =3D=3D 1;
} else {
- die "filerev( $filename, $release ): No entry found.\n";
+ my $repo =3D Git->repository (Directory =3D> "$self->{'rootpath'}");
+ my $line =3D $repo->command_oneline ("ls-tree", "-d", "$release", "$path=
name");
+ return $line =3D~ m/^\d+ tree .*$/;
}
}
=20
-sub getfiletime {
- my ($self, $filename, $release) =3D @_;
- $filename =3D $self->sanitizePath ($filename);
-
- if ($filename =3D~ m/\/\.\.$/ )
- return undef;
-# if ($filename =3D~ /\/\.\.\$/)
-# return undef;
+sub isfile {
+ my ($self, $pathname, $release) =3D @_;
=20
- my $pid1 =3D open(my $R, '-|' );
- die $! unless defined $pid1;
- if(!$pid1) {
- exec("git-rev-list", "--max-count=3D1", "$release", "--", $filename ) or=
die "getfiletime ($filename, $release): Cannot exec git-rev-list\n";
- }
- my $commit =3D <$R>;
- chomp $commit;
- close($R);
-=09
- my $pid =3D open(my $F, '-|');
- die $! unless defined $pid;
- if(!$pid) {
- exec("git-cat-file", "commit", $commit) or die "getfiletime ($filename, =
$release): Cannot exec git-cat-file\n";
- }
-
- while(<$F>) {
- chomp;
- if ( m/^author .*<.*> (\d+)\s.*$/ ) {
- close($F);
- return $1;
- }
+ $pathname =3D~ s/^\///;
+ if ($pathname eq "") {
+ return 1 =3D=3D 0;
+ } else {
+ my $repo =3D Git->repository (Directory =3D> "$self->{'rootpath'}");
+ my $line =3D $repo->command_oneline ("ls-tree", "-d", "$release", "$path=
name");
+ return $line =3D~ m/^\d+ blob .*$/;
}
-=09
- close($F);
-
- die "getfiletime ($filename, $release) : Did not find GIT entry.\n";
}
=20
-sub getfilesize {
- my ($self, $filename, $release) =3D @_;
+sub getdir {
+ my ($self, $pathname, $release) =3D @_;
+ my ($dir, $node, @dirs, @files);
+ my $repo =3D Git->repository (Directory =3D> "$self->{'rootpath'}");
=20
- $filename =3D $self->sanitizePath ($filename);
- my $object_hash =3D $self->filerev ($filename, $release);
+ $pathname =3D~ s/^\///;
=20
- print STDERR "getfilesize ($filename, $release)\n";
+ my ($fh, $c) =3D $repo->command_output_pipe ("ls-tree", "$release", "$pat=
hname");
+ while (<$fh>) {
+ if (m/(\d+) (\w+) ([[:xdigit:]]+)\t(.*)/) {
+ my ($entrymode, $entrytype, $objectid, $entryname) =3D ($1, $2, $3, $4);
=20
- # return `git-cat-file -s $blobhash`;
- my $pid =3D open (my $F, '-|');
- die $! unless defined $pid;
- if(!$pid) {=09
- exec ("git-cat-file", "-s", $object_hash) or die "getfilesize ($filename=
, $release): Cannot exec git-cat-file\n";
- }
-
- my $size =3D <$F>;
- close ($F);
- chomp $size;
- if ( $size ) {
- return $size;
- } else {
- return undef;
- }
+ # Only get the filename part of the full path
+ my @array =3D split (/\//, $entryname);
+ my $num =3D @array - 1;
+ $entryname =3D @array[$num];
=20
- close ($F);
- return undef;
-}
+ # Weed out things to ignore
+ foreach my $ignoredir ($config->{ignoredirs}) {
+ next if $entryname eq $ignoredir;
+ }
=20
-sub getfile {
- my ($self, $filename, $release) =3D @_;
- my ($buffer);
+ next if $entryname =3D~ /^\.$/;
+ next if $entryname =3D~ /^\.\.$/;
=20
-# my $blobhash =3D open( "git-ls-tree $release $filename | cut -f 3 -d ' '=
| cut -f 1 -d \$'\t' |") or die "Cannot open git-ls-tree $release $filenam=
e in getfile\n";
- my $blobhash =3D $self->filerev( $filename, $release );
-# local ($/) =3D undef;
-
- open(FILE, "git-cat-file blob $blobhash|") || return undef;
- $buffer =3D <FILE>;
- close(FILE);
- return $buffer;
+ if ($entrytype eq "blob") {
+ push (@files, $entryname);
+ } elsif ($entrytype eq "tree") {
+ push (@dirs, "$entryname/");
+ }
+ }
+ }
+
+ $repo->command_close_pipe ($fh, $c);
+
+ return sort (@dirs), sort (@files);
}
=20
-sub getfilehandle {
+sub getfilesize {
my ($self, $filename, $release) =3D @_;
- my ($fileh);
- $filename =3D $self->sanitizePath ($filename);
+ my $repo =3D Git->repository (Directory =3D> "$self->{'rootpath'}");
=20
- my $treeid =3D $self->get_treehash_for_branchhead_or_tag ($release);
+ $filename =3D~ s/^\///;
=20
- $filename =3D $self->sanitizePath ($filename);
- my $objectid =3D $self->getBlobOrTreeOfPathAndTree ($filename, $treeid);
+ my $sha1hashline =3D $repo->command_oneline ("ls-tree", "$release", "$fil=
ename");
=20
- $fileh =3D new IO::File;
- $fileh->open ("git-cat-file blob $objectid |") or die "Cannot execute git=
-cat-file blob $objectid";
+ if ($sha1hashline =3D~ m/\d+ blob ([[:xdigit:]]+)\t.*/) {
+ return $repo->command_oneline ("cat-file", "-s", "$1");
+ }
=20
- return $fileh;
+ return undef;
}
=20
sub tmpfile {
my ($self, $filename, $release) =3D @_;
my ($tmp, $fileh);
- local ($/) =3D undef;
=20
$tmp =3D $config->tmpdir . '/lxrtmp.' . time . '.' . $$ . '.' . &LXR::Com=
mon::tmpcounter;
- open(TMP, "> $tmp") || return undef;
- $fileh =3D $self->getfilehandle( $filename, $release );
- print(TMP <$fileh>);
- close($fileh);
- close(TMP);
+ open (TMP, "> $tmp") || return undef;
+ $fileh =3D $self->getfilehandle ($filename, $release);
+ print (TMP <$fileh>);
+ close ($fileh);
+ close (TMP);
=20
return $tmp;
}
=20
-sub getannotations {
+sub filerev {
+ my ($self, $filename, $release) =3D @_;
+ my $repo =3D Git->repository (Directory =3D> "$self->{'rootpath'}");
=20
- return ();
- my ($self, $pathname, $release) =3D @_;
- my @authors =3D ();
+ $filename =3D~ s/^\///;
=20
- if ( $pathname =3D~ m#^/(.*)$# ) {
- $pathname =3D $1;
- }
-=09
- open( BLAME, "git-blame -l $pathname $release |");
- while( <BLAME> ) {
- if ( m/(^[[:xdigit:]]+)\s.*$/ ) {
- my $linehash =3D $1;
- my $authorline =3D `git-cat-file commit $linehash`;
- if ($authorline =3D~ m/^author ([^<]+)<(([^@])\@[^>]+)>.*$/ ) {
- my ($authorname, $authoruser, $authoremail) =3D ($1, $2, $3);
- push(@authors, LXR::Author->new(chomp $authorname,
- $authoruser, $authoremail));
- } else {
- push(@authors, LXR::Author->new("", "", ""));
- }
- } else {=09
- print STDERR "getannotations: JB HAT DOOFE OHREN: $_\n";
- }
+ my $sha1hashline =3D $repo->command_oneline ("ls-tree", "$release", "$fil=
ename");
+
+ if ($sha1hashline =3D~ m/\d+ blob ([[:xdigit:]]+)\t.*/) {
+ return $1;
}
- close(BLAME);
=20
- print STDERR "authors: " . join(" ", @authors) . "\n";
-=09
- return @authors;
+ return undef;
}
=20
-sub getauthor {
+sub getfiletime {
+ my ($self, $filename, $release) =3D @_;
=20
- return ();
+ $filename =3D~ s/^\///;
=20
- my ($self, $filename, $release) =3D @_;
- $filename =3D $self->sanitizePath ($filename);
- print STDERR "getauthr( $filename, $release )\n";
- my $commit =3D `git-rev-list --max-count=3D1 $release -- $filename | tr -=
d \$'\n'`;
- my $authorline =3D `git-cat-file commit $commit | grep '^author' | head -=
n1 | tr -d \$'\n'`;
-
- if ($authorline =3D~ m/^author ([^<]+)<(([^@])\@[^>]+)>.*$/ ) {
- my ($authorname, $authoruser, $authoremail) =3D ($1, $2, $3);
- return LXR::Author->new(chomp $authorname, $authoruser, $authoremail);
- } else {
- return LXR::Author->new("", "", "");
+ if ($filename eq "") {
+ return undef;
}
-}
-
-sub getdir {
- my ($self, $pathname, $release) =3D @_;
- my ($dir, $node, @dirs, @files);
-=09
- my $treeid =3D $self->get_treehash_for_branchhead_or_tag ($release);
-=09
- $pathname =3D $self->sanitizePath( $pathname );
- if ( $pathname !~ m#..*/# ) {
- $pathname =3D $pathname . '/';
+ if ($filename =3D~ m/\/$/) {
+ return undef;
}
=20
- open(DIRLIST, "git-ls-tree $treeid $pathname |") or die "Cannot open git-=
ls-tree $treeid $pathname";
- while( <DIRLIST> ) {
- if ( m/(\d+)\s(\w+)\s([[:xdigit:]]+)\t(.*)/ ) {
- my ($entrymode, $entrytype, $objectid, $entryname) =3D ($1,$2,$3,$4);
-
- # Weed out things to ignore
- foreach my $ignoredir ($config->{ignoredirs}) {
- next if $entryname eq $ignoredir;
- }
-
- next if $entryname =3D~ /^\.$/;
- next if $entryname =3D~ /^\.\.$/;
-
- if ($entrytype eq "blob") {
- push(@files, $entryname);
- =09
- } elsif ($entrytype eq "tree") {
- push(@dirs, "$entryname/");
- #push(@dirs, "$entryname");
+ my $repo =3D Git->repository (Directory =3D> "$self->{'rootpath'}");
+ my $lastcommitline =3D $repo->command_oneline ("log", "--max-count=3D1", =
"--pretty=3Doneline", "$release", "--", "$filename");
+ if ($lastcommitline =3D~ m/([[:xdigit:]]+) /) {
+ my $commithash =3D $1;
+
+ my (@fh, $c) =3D $repo->command ("cat-file", "commit", "$commithash");
+ foreach my $line (@fh) {
+ if ($line =3D~ m/^author .* <.*> (\d+) .[0-9]{4}$/) {
+ return $1;
}
}
+ return undef;
}
- close(DIRLIST);
-=09
- return sort(@dirs), sort(@files);
-}
-
-# This function should not be used outside this module
-# except for printing error messages
-# (I'm not sure even that is legitimate use, considering
-# other possible File classes.)
-
-##sub toreal {
-## my ($self, $pathname, $release) =3D @_;
-##
-## nearly all (if not all) method calls eventually call toreal(), so this =
is a good place to block file access
-## foreach my $ignoredir ($config->ignoredirs) {
-## return undef if $pathname =3D~ m|/$ignoredir/|;
-## }
-##
-## return ($self->{'rootpath'} . $release . $pathname);
-##}
-
-sub isdir {
- my ($self, $pathname, $release) =3D @_;
-
- $pathname =3D $self->sanitizePath ($pathname);
- $release =3D $self->get_newest_commit_from_branchhead_or_tag ($release);
-
- print STDERR "isdir ($pathname, $release)\n";
-
- my $treeid =3D $self->get_treehash_for_branchhead_or_tag ($release);
-
- return $self->getObjectType ($pathname, $treeid) eq "tree";
-}
-
-sub isfile {
- my ($self, $pathname, $release) =3D @_;
=20
- $pathname =3D $self->sanitizePath ($pathname);
- $release =3D $self->get_newest_commit_from_branchhead_or_tag ($release);
-=09
- print STDERR "isfile($pathname, $release)\n";
-
- my $treeid =3D $self->get_treehash_for_branchhead_or_tag ($release);
-=09
- return $self->getObjectType ($pathname, $treeid) eq "blob";
+ return undef;
}
=20
-#
-# For a given commit (that is, the latest commit on a named branch or
-# a tag's name) return the tree object's hash corresponding to it.
-#
-sub get_treehash_for_branchhead_or_tag () {
- my ($self, $release) =3D @_;
- $release =3D $self->get_newest_commit_from_branchhead_or_tag ($release);
-
- return `git-cat-file commit $release | grep '^tree' | head -n1 | cut -f 2=
-d ' ' | tr -d \$'\n'`;
-}
+sub getfilehandle {
+ my ($self, $filename, $release) =3D @_;
+ my $repo =3D Git->repository (Directory =3D> "$self->{'rootpath'}");
=20
-sub getObjectType() {
- my ($self, $pathname, $treeid) =3D @_;
+ $filename =3D~ s/^\///;
=20
- open (DIRLIST, "git-ls-tree $treeid $pathname |") or die "Cannot open git=
-ls-tree $treeid $pathname";
- while (<DIRLIST>) {
- if (m/(\d+)\s(\w+)\s([[:xdigit:]]+)\t(.*)/) {
- my ($entrymode, $entrytype, $objectid, $entryname) =3D ($1, $2, $3, $4);
+ my $sha1hashline =3D $repo->command_oneline ("ls-tree", "$release", "$fi=
lename");
=20
- # Weed out things to ignore
-# # This should only be needed in the getdir function.
-# foreach my $ignoredir ($config->{ignoredirs}) {
-# next if $entryname eq $ignoredir;
-# }
-
- $entryname =3D $self->sanitizePath ($entryname);
-
-# print STDERR "getBlobOrTreeOfPathAndTree: pathname: \"$pathname\" :: e=
ntryname: \"$entryname\"\n";
- next if ( ! $pathname eq $entryname );
-
- close (DIRLIST);
-# print STDERR "Juhu, wir haben $pathname gefunden :: $objectid\n";
- return $entrytype;
- }
+ if ($sha1hashline =3D~ m/^\d+ blob ([[:xdigit:]]+)\t.*/) {
+ my ($fh, $c) =3D $repo->command_output_pipe ("cat-file", "blob", "$1");
+ return $fh;
}
- close (DIRLIST);
=20
return undef;
}
=20
-sub getBlobOrTreeOfPathAndTree() {
- my ($self, $pathname, $treeid ) =3D @_;
-
- open (DIRLIST, "git-ls-tree $treeid $pathname |") or die "Cannot open git=
-ls-tree $treeid $pathname";
- while (<DIRLIST>) {
- if (m/(\d+)\s(\w+)\s([[:xdigit:]]+)\t(.*)/) {
- my ($entrymode, $entrytype, $objectid, $entryname) =3D ($1, $2, $3, $4);
+sub getannotations {
+ my ($self, $filename, $release) =3D @_;
=20
- # Weed out things to ignore
- foreach my $ignoredir ($config->{ignoredirs}) {
- next if $entryname eq $ignoredir;
+ if ($self->{'do_annotations'}) {
+ my $repo =3D Git->repository (Directory =3D> "$self->{'rootpath'}");
+ my @revlist =3D ();
+ $filename =3D~ s/^\///;
+
+ my (@lines, $c) =3D $repo->command ("blame", "-l", "$release", "--", "$f=
ilename");
+
+ foreach my $line (@lines) {
+ if ($line =3D~ m/^([[:xdigit:]]+) .*/) {
+ push (@revlist, $1);
+ } else {
+ push (@revlist, "");
}
-
- $entryname =3D $self->sanitizePath( $entryname );
- next if (! $pathname eq $entryname );
-
- close (DIRLIST);
- return $objectid;
}
- }
- close (DIRLIST);
=20
- return undef;
-}
-
-#
-# This function will take a branch name ("master") or a tag name
-# (like "v2.6.15") and return either the branch commit object ID,
-# or descend from the tag object into the referenced commit object
-# and return its commit ID. XXX
-#
-sub get_newest_commit_from_branchhead_or_tag ($$) {
- my ($self, $head_or_tag) =3D @_;
- my $objtype =3D `git-cat-file -t $head_or_tag | tr -d \$'\n'`;
-
- if ($objtype eq "commit") {
- return $head_or_tag;
- } elsif ($objtype eq "tag") {
- return `git-cat-file tag $head_or_tag | grep '^object' | head -n1 | cut =
-f 2 -d ' ' | tr -d \$'\n'`;
+ return @revlist;
} else {
- die ("get_newest_commit_from_branchhead_or_tag: Unrecognized object type=
$objtype for $head_or_tag\n");
+ return ();
}
}
=20
-sub sanitizePath() {
- my ($self, $pathname) =3D @_;
-
- if ( $pathname eq "" ) {
- # Empty? Just beam the client to the root.
- $pathname =3D ".";
- } elsif ( $pathname =3D~ m#^/# ) {
- # Absolute? We want them to be relative!
- $pathname =3D ".$pathname";
- } else {
- # Filename incurrent directory? Add "./" to
- # make them truly relative.
- $pathname =3D "./$pathname";
- }
-
- # Don't let them exploit us easily.
-# if ( $pathname =3D~ m#/../# ) {
-# die("You are now dead because of $pathname\n");
-# }
+sub getauthor {
+ my ($self, $pathname, $release) =3D @_;
=20
- # Doubled slashes? We remove them.
- $pathname =3D~ s#//#/#g;
+ #
+ # Note that $release is a real commit this time
+ # (returned by getannotations() above). This is
+ # _not_ a tag name!
+ #
+
+ if ($self->{'do_blame'}) {
+ my $repo =3D Git->repository (Directory =3D> "$self->{'rootpath'}");
+ my @authorlist =3D ();
+
+ $pathname =3D~ s/^\///;
+
+ my (@lines, $c) =3D $repo->command ("cat-file", "commit", "$release");
+ foreach my $line (@lines) {
+ if ($line =3D~ m/^author (.*) </) {
+ return $1
+ }
+ }
=20
- # Delete leading slashes.
- $pathname =3D~ s#/*$##g;
+ return undef;
+ }
=20
- return $pathname;
+ return undef;
}
=20
1;
MfG, JBG
--=20
Jan-Benedict Glaw jb...@lu... +49-172-7608481
Signature of: http://perl.plover.com/Questions.html
the second :
|