[Lxr-commits] CVS: lxr/lib/LXR/Lang Generic.pm,1.48,1.49
Brought to you by:
ajlittoz
From: Andre-Littoz <ajl...@us...> - 2013-11-17 09:05:34
|
Update of /cvsroot/lxr/lxr/lib/LXR/Lang In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv14339/lib/LXR/Lang Modified Files: Generic.pm Log Message: Generic.pm: speed improvement and new feature Speed improvement during indexing: cache frequently used flags in memory, dereference various hash accesses, avoid overflooding memory New feature: category names are identified on their prefix only, allowing user to define variants (with different CSS decoration) Index: Generic.pm =================================================================== RCS file: /cvsroot/lxr/lxr/lib/LXR/Lang/Generic.pm,v retrieving revision 1.48 retrieving revision 1.49 diff -u -d -r1.48 -r1.49 --- Generic.pm 9 Nov 2013 10:07:37 -0000 1.48 +++ Generic.pm 17 Nov 2013 09:05:31 -0000 1.49 @@ -101,6 +101,15 @@ $$self{'langid'} = $self->langinfo('langid'); die "No langid for language $lang" unless defined $self->{'langid'}; + # Cache flag to boost performance + $$self{'case_insensitive'} = $self->flagged('case_insensitive'); + # Cache keywords to boost performance + if (exists($self->{'langmap'}{$lang}{'reserved'})) { + $self->{'reserved'} = \%{$self->{'langmap'}{$lang}{'reserved'}}; + } else { + $self->{'reserved'} = undef; + } + # Make sure that at least a default identifier definition exists # default must also cover C and C++ reserved words and Perl -variables $$self{'langmap'}{$lang}{'identdef'} = '[-\w~\#][\w]*' @@ -194,6 +203,12 @@ $langdesc->{'typeid'}{$type} = $index->decid($langid, $tdescr); } } +### The following line is commented out to improve performance. +### The consequence is a higher load of memory since DB updates +### are kept in memory until commit time (at least on directory +### exit). +# $index->commit(); +### This line is ABSOLUTELY mandatory in case multi-thread is publicly released } @@ -236,9 +251,10 @@ sub indexfile { my ($self, $name, $path, $fileid, $index, $config) = @_; + my $nsym = 0; - my $typemap = $self->langinfo('typemap'); - my $insensitive = $self->flagged('case_insensitive'); + my $typeid = $self->langinfo('typeid'); + my $insensitive = $self->{'case_insensitive'}; my $langforce = ${ $self->{'eclangnamemapping'} }{ $self->language }; if (!defined $langforce) { @@ -262,22 +278,25 @@ or die "Can't run ectags, $!"; # Parse the results - while (<CTAGS>) { + my @decls = <CTAGS>; + close(CTAGS); + $nsym = scalar(@decls); + while ($_ = shift(@decls)) { chomp; - my ($sym, $file, $line, $type, $ext) = split(/\t/, $_); - $line =~ s/;\"$//; #" fix fontification - $ext =~ m/language:(\w+)/; - $type = $typemap->{$type}; + my ($sym, $file, $line, $type, $ext) = split(/\t/o, $_); + $line =~ s/;\"$//o; #" fix fontification + $ext =~ m/language:(\w+)/o; + $type = $typeid->{$type}; if (!defined $type) { - print 'Warning: Unknown type ', (split(/\t/, $_))[3], "\n"; + print 'Warning: Unknown type ', (split(/\t/o, $_))[3], "\n"; next; } # TODO: can we make it more generic in parsing the extension fields? - if (defined($ext) && $ext =~ m/^(struct|union|class|enum):(.*)/) { + if (defined($ext) && $ext =~ m/^(struct|union|class|enum):(.*)/o) { $ext = $2; - $ext =~ s/::<anonymous>//g; + $ext =~ s/::<anonymous>//go; $ext = uc($ext) if $insensitive; } else { $ext = undef; @@ -286,9 +305,122 @@ $sym = uc($sym) if $insensitive; $index->setsymdeclaration($sym, $fileid, $line, $self->{'langid'}, $type, $ext); } - close(CTAGS); +### The following line is commented out to improve performance. +### The consequence is a higher load of memory since DB updates +### are kept in memory until commit time (at least on directory +### exit). +# $index->commit(); +### This line is ABSOLUTELY mandatory in case multi-thread is publicly released + } + return $nsym; +} + + +=head2 C<referencefile ($name, $path, $fileid, $index, $config)> + +Method C<referencefile> is invoked during I<genxref> to parse and collect +the references in a file. + +=over + +=item 1 C<$name> + +a I<string> containing the LXR file name + +=item 1 C<$path> + +a I<string> containing the OS file name + +When files are stored in VCSes, C<$path> is the name of a temporary file. + +=item 1 C<$fileid> + +an I<integer> containing the internal DB id for the file/revision + +=item 1 C<$index> + +a I<reference> to the index (DB) object + +=item 1 C<$config> + +a I<reference> to the configuration objet + +=back + +Using I<SimpleParse>'s C<nextfrag>, it focuses on "untyped" +fragments (aka. code fragments) from which symbols are extracted. +User symbols, if already declared, are entered in the reference +data base. + +=cut + +sub referencefile { + my ($self, $name, $path, $fileid, $index, $config) = @_; + my @refs; + + require LXR::SimpleParse; + + # Use dummy tabwidth here since it doesn't matter for referencing + my $fh = FileHandle->new($path); + if (!defined($fh)) { + return (-1, 0); + } + &LXR::SimpleParse::init ( $fh # FileHandle->new($path) + , 1 + , $self->parsespec + ); + $LXR::SimpleParse::dountab = 0; # Does not matter for references + + my $linenum = 1; + my ($btype, $frag) = &LXR::SimpleParse::nextfrag; + my @lines; + my $string; + my $l; + my $identdef = $self->langinfo('identdef'); + my $insensitive = $self->{'case_insensitive'}; + while (defined($frag)) { + + if (defined($btype)) { + if ( 'comment' eq substr($btype, 0, 7) + || 'string' eq substr($btype, 0, 6) + || 'include' eq $btype + || 'extra' eq substr($btype, 0, 5) + ) { + $linenum += () = $frag =~ m/\n/gs; + } else { + print "BTYPE was: $btype\n"; + } + } else { + @lines = split(/\n/o, $frag, -1); + foreach $l (@lines) { + foreach $string ($l =~ m/($identdef)\b/og) { + # print "considering $string\n"; + $string = uc($string) if $insensitive; + if (!$self->isreserved($string)) { + # setsymreference decides by itself to record the + # the symbol as a reference or not, based on the + # DB dictionary (stated otherwise: it does not add + # new symbols to the existing dictionary. + # print "adding $string to references\n"; +# $index->setsymreference($string, $fileid, $linenum); + push @refs, [$string, $linenum]; + } + } + $linenum++; + } + $linenum--; + } + ($btype, $frag) = &LXR::SimpleParse::nextfrag; } + my $nsym = scalar(@refs); + if ($nsym > 0) { + for my $ref (@refs) { # Symbols found, enter then into DB + my ($string, $line) = @$ref; + $index->setsymreference($string, $fileid, $line); + } + } + return ($linenum, $nsym); } @@ -563,10 +695,12 @@ my ($self, $code) = @_; my ($start, $id); - my $source = $$code; my $answer = ''; my $identdef = $self->langinfo('identdef'); - my $insensitive = $self->flagged('case_insensitive'); + my $insensitive = $self->{'case_insensitive'}; + my $prefix; # Unparsed bit before symbol + my $symbol; # Parsed symbol + my $dictsymbol; # Transformed symbol for dictionary lookup # Repeatedly remove what looks like an identifier from the head of # the source line and mark it if it is a reserved word or known @@ -578,21 +712,24 @@ # markings simultaneously to avoid interferences; # second reason, $2 is not a reference - while ( $source =~ s/^(.*?)($identdef)//s) + while ( $$code =~ s/^(.*?)($identdef)//s) { - my $dictsymbol = $2; + $prefix = $1; + $symbol = $2; + $dictsymbol = $2; + $dictsymbol =~ s/\s+//; # for C #directives $dictsymbol = uc($dictsymbol) if $insensitive; - $answer .= $1 - . ( $self->isreserved($2) - ? "<span class='reserved'>$2</span>" + $answer .= $prefix + . ( $self->isreserved($dictsymbol) + ? "<span class='reserved'>$symbol</span>" : ( $index->issymbol($dictsymbol, $$self{'releaseid'}) - ? join($2, @{$$self{'itag'}}) - : $2 + ? join($symbol, @{$$self{'itag'}}) + : $symbol ) ); } # don't forget the last chunk of the line containing no target - $$code = $answer . $source; + $$code = $answer . $$code; } @@ -617,116 +754,9 @@ sub isreserved { my ($self, $frag) = @_; - $frag =~ s/\s//g ; # for those who write # include - if ($self->flagged('case_insensitive')) { - $frag = uc($frag); - foreach my $word (@{$self->langinfo('reserved')}) { - $word = uc($word); - return 1 if $frag eq $word; - } - } else { - foreach my $word (@{$self->langinfo('reserved')}) { - return 1 if $frag eq $word; - } - } - return 0; -} - - -=head2 C<referencefile ($name, $path, $fileid, $index, $config)> - -Method C<referencefile> is invoked during I<genxref> to parse and collect -the references in a file. - -=over - -=item 1 C<$name> - -a I<string> containing the LXR file name - -=item 1 C<$path> - -a I<string> containing the OS file name - -When files are stored in VCSes, C<$path> is the name of a temporary file. - -=item 1 C<$fileid> - -an I<integer> containing the internal DB id for the file/revision - -=item 1 C<$index> - -a I<reference> to the index (DB) object - -=item 1 C<$config> - -a I<reference> to the configuration objet - -=back - -Using I<SimpleParse>'s C<nextfrag>, it focuses on "untyped" -fragments (aka. code fragments) from which symbols are extracted. -User symbols, if already declared, are entered in the reference -data base. - -=cut - -sub referencefile { - my ($self, $name, $path, $fileid, $index, $config) = @_; - - require LXR::SimpleParse; - - # Use dummy tabwidth here since it doesn't matter for referencing - &LXR::SimpleParse::init ( FileHandle->new($path) - , 1 - , $self->parsespec - ); - - my $linenum = 1; - my ($btype, $frag) = &LXR::SimpleParse::nextfrag; - my @lines; - my $ls; - my $identdef = $self->langinfo('identdef'); - my $insensitive = $self->flagged('case_insensitive'); - - while (defined($frag)) { - @lines = ($frag =~ m/(.*?\n)/g, $frag =~ m/([^\n]*)$/); - - if (defined($btype)) { - if ( $btype eq 'comment' - || $btype eq 'string' - || $btype eq 'include' - ) { - $linenum += @lines - 1; - } else { - print "BTYPE was: $btype\n"; - } - } else { - my $l; - my $string; - foreach $l (@lines) { - - foreach ($l =~ m/($identdef)\b/og) { - $string = $_; - - # print "considering $string\n"; - if (!$self->isreserved($string)) { - # setsymreference decides by itself to record the - # the symbol as a reference or not, based on the - # DB dictionary (stated otherwise: it does not add - # new symbols to the existing dictionary. - # print "adding $string to references\n"; - $string = uc($string) if $insensitive; - $index->setsymreference($string, $fileid, $linenum); - } - } - $linenum++; - } - $linenum--; - } - ($btype, $frag) = &LXR::SimpleParse::nextfrag; - } - return $linenum; + my $kw = $self->{'reserved'}; + return 0 if !defined($kw); + return exists($$kw{$frag}); } @@ -774,6 +804,9 @@ if (ref($$val{$item}) eq 'ARRAY') { return wantarray ? @{ $$val{$item} } : $$val{$item}; } + if (ref($$val{$item}) eq 'HASH') { + return wantarray ? %{ $$val{$item} } : $$val{$item}; + } return $$val{$item}; } else { return undef; |