From: Mike G. v. a. <we...@ma...> - 2008-06-24 23:18:55
|
Log Message: ----------- adding missing files Tags: ---- rel-2-4-patches Added Files: ----------- webwork2/lib/WeBWorK: NPL.pm Revision Data ------------- --- /dev/null +++ lib/WeBWorK/NPL.pm @@ -0,0 +1,625 @@ +################################################################################ +# WeBWorK Online Homework Delivery System +# Copyright © 2000-2007 The WeBWorK Project, http://openwebwork.sf.net/ +# $CVSHeader: webwork2/lib/WeBWorK/NPL.pm,v 1.2.2.1 2008/06/24 23:01:40 gage Exp $ +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of either: (a) the GNU General Public License as published by the +# Free Software Foundation; either version 2, or (at your option) any later +# version, or (b) the "Artistic License" which comes with this package. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See either the GNU General Public License or the +# Artistic License for more details. +# +# Contributed by W.H. Freeman; Bedford, Freeman, and Worth Publishing Group. +################################################################################ + +package WeBWorK::NPL; +use base 'Exporter'; + +=head1 NAME + +WeBWorK::NPL - Parse formats used by the National Problem Library. + +=head1 SYNOPSIS + + use WeBWorK::NPL qw/read_textbooks read_tags format_tags gen_find_tags/; + + open TEXTS, "<", "Textbooks"; + my $textbooks = []; + read_textbooks(\*TEXTS, $textbooks); + + open PGFILE, "<", "file.pg"; + my $tags = {}; + read_tags(\*PGFILE, $tags); + + foreach my $string (format_tags($tags)) { + $string =~ s/^/## /gm; + print "TAG: $string\n"; + } + + use File::Find; + my $process = sub { print "Found: $_[0]\n" }; + my $wanted = gen_find_tags({author=>'Rogawski'}, $process); + find({wanted=>$wanted}, @ARGV); + +=head1 DESCRIPTION + +This package contains parsing routines for the various data formats associated +with the National Problem Library. + +=cut + +use strict; +use warnings; +use Data::Dumper; + +our @EXPORT_OK = qw( + read_textbooks + read_tags + format_tags + gen_find_tags +); + +our @global_fields = qw(DESCRIPTION KEYWORDS DBsubject DBchapter DBsection Date +Institution Author UsesAuxiliaryFiles); +our @textbook_fields = qw(title edition author chapter section problem); + +our %tag2field = ( TitleText => "title", EditionText => "edition", +AuthorText => "author", Section => "section", Problem => "problem", ); +our %field2tag = reverse %tag2field; + +=head1 FUNCTIONS + +=head2 read_textbooks + + read_textbooks($fh, $arrayref) + +Reads a Textbooks file opened for reading on $fh and appends its contents to +$arrayref. Each item appended to $arrayref is a reference to a hash containing +the following keys: + + _title The title of the textbook + _edition The edition of the textbook + _author The author of the textbook + 1 The name of chapter 1 + 1.1 The name of section 1.1 + 1.2 The name of section 1.2 + ... + 2 The name of chapter 2 + 2.1 The name of section 2.1 + ... + +Since the number of sections in a textbook is typically small, it is not terribly +inefficient to pull chapters or sections out: + + @chapters = grep { /^\d+$/ } keys %textbook; + @sections = grep { /^\d+\.\d+$/ } keys %textbook; + +=cut + +sub read_textbooks { + my ($fh, $result) = @_; + + my %curr_textbook; + + while (<$fh>) { + s/#.*$//g; + next unless /\S/; + s/^\s*//; + s/\s*$//; + + if (/^(TitleText|EditionText|AuthorText)\(\s*'(.*?)'\s*\)/) { + my $field = $tag2field{$1}; + my $value = $2; + if (exists $curr_textbook{"_$field"}) { + # repeated tag -- this is a new textbook + push @$result, {%curr_textbook}; + %curr_textbook = (); + } + $curr_textbook{"_$field"} = $value; + } elsif (/^(\d+)(?:\.(\d+))?\s*>>>\s*(.*)$/) { + my $chapter = $1; + my $section = $2; + my $name = $3; + if (defined $section and length $section > 0) { + $curr_textbook{"$chapter.$section"} = $name; + } else { + $curr_textbook{$chapter} = $name; + } + } + } + push @$result, {%curr_textbook}; +} + +=head2 read_tags + + read_tags($fh, $hashref, $extra_editing_info); + +Reads the NPL tags from a PG file opened for reading on $fh and stores the tags +in %$hashref. The following keys may be added to %$hashref: + + DESCRIPTION + KEYWORDS + DBsubject + DBchapter + DBsection + Date + Institution + Author + UsesAuxiliaryFiles (experimental, subject to change) + textbooks (arrayref) + +The value for the C<textbooks> key will be a reference to an array of textbook +hashes containing the textbook tags from the source file. In each textbook hash, +entries with empty values (e.g. C<TitleText1('')>) will be omitted. This is to +deal with the large number of empty-valued tags in the NPL. The keys of each +textbook hash will be among: + + title + edition + author + chapter + section + problem (arrayref) + +The value for the C<problem> key will be a reference to an array of problem +numbers. + +If $extra_editing_info is true, special hash items _pos, _rest, and _maxtextbook +will also be added to %$hashref. + +_pos will contain the position of the first byte of the next line after the last +tag in the file. _rest will contain the bytes of the "rest" of the file, after +all tags, starting at _pos. _maxtextbook will contain the highest number used to +identify a textbook in the file. (e.g. If TitleText1 and TitleText3 appear in +the file, there will only be two items in the textbooks array, but _maxtextbook +will be 3.) + +This is useful for appending tags to a file which contains existing tags, where +the new tags should appear immediately after the existing tags: + + open PGFILE, "+<", "file.pg"; + my $tags = {}; + read_tags(\*PGFILE, $tags, 1); + my $pos = $tags{_pos}; + my $rest = $tags{_rest}; + seek PGFILE, $pos, 0; + print PGFILE "## SomeNewTag('foo','bar')\n"; + print PGFILE $rest; + close PGFILE; + +=cut + +sub read_tags { + my ($file, $result, $extra_editing_info) = @_; + + my $fh; + if (ref $file) { + $fh = $file; + } elsif (defined $file and not ref $file) { + $fh = new IO::File($file, 'r'); + } + + my $pos; + my $rest = ''; + my $maxtextbook; + while (<$fh>) { + #if (0) { + if (/^(.*?\#.*?)(\s*)DESCRIPTION/) { + my $prefix = $1; + my $whitespace = $2; + my $description = ''; + while (<$fh>) { + if (/\#.*ENDDESCRIPTION/) { + chomp $description; + $result->{DESCRIPTION} = $description if length $description > 0; + last; + } else { + # handle prefix and whitespace separately so that we can still + # chop the prefix off even if people are being careless about + # whitespace. :P + s/^$prefix//; + s/^$whitespace//; + $description .= $_; + } + } + if ($extra_editing_info) { + $pos = tell $fh; + $rest = ''; + } + } elsif (/\#.*KEYWORDS\((.*)\)/) { + my $keywords = $1; + push @{$result->{KEYWORDS}}, parse_keywords($keywords); + if ($extra_editing_info) { + $pos = tell $fh; + $rest = ''; + } + } elsif (/\#.*(DBsubject|DBchapter|DBsection|Date|Institution|Author)\(\s*(.*?)\s*\)/) { + my $field = $1; + my $value = $2; + my ($parsed_value, $parse_errors) = parse_normal_value($field, $value); + if (@$parse_errors) { + warn "error while parsing value \"$value\" in field $field:\n" + . join('', @$parse_errors) + . "value may be incomplete. use with caution.\n" + . "(line $. of file $file)\n"; + } + $result->{$field} = $parsed_value; + if ($extra_editing_info) { + $pos = tell $fh; + $rest = ''; + } + } elsif (/\#.*(TitleText|EditionText|AuthorText|Section|Problem)(\d+)\(\s*'(.*?)'\s*\)/) { + my $field = $tag2field{$1}; + my $num = $2; + my $value = $3; + next unless $value =~ /\S/; + $value = [ parse_problems($value) ] if $field eq "problem"; + if ($field eq "section") { + my ($ch, $sec) = split /\./, $value; + $result->{textbooks}[$num]{chapter} = $ch; + $result->{textbooks}[$num]{section} = $sec if defined $sec and length $sec > 0; + } else { + $result->{textbooks}[$num]{$field} = $value; + } + if ($extra_editing_info) { + $pos = tell $fh; + $rest = ''; + $maxtextbook = $num if not defined $maxtextbook or $num > $maxtextbook; + } + } elsif (/\#.*(UsesAuxiliaryFiles)\(\s*(.*?)\s*\)/) { + my $field = $1; + my $value = $2; + my ($parsed_value, $parse_errors) = parse_normal_list($field, $value); + if (@$parse_errors) { + warn "error while parsing list value \"$value\" in field $field:\n" + . join('', @$parse_errors) + . "value may be incomplete. use with caution.\n" + . "(line $. of file $file)\n"; + } + $result->{$field} = $parsed_value; + if ($extra_editing_info) { + $pos = tell $fh; + $rest = ''; + } + } else { + if ($extra_editing_info) { + $rest .= $_; + } + } + } + + # remove holes in textbook numbering + @{$result->{textbooks}} = grep { defined } @{$result->{textbooks}}; + delete $result->{textbooks} unless @{$result->{textbooks}}; + + if ($extra_editing_info) { + $result->{_pos} = $pos; + $result->{_rest} = $rest; + $result->{_maxtextbook} = $maxtextbook; + } +} + +sub parse_normal_list { + my ($name, $string) = @_; + + use constant NRM=>0; + use constant STR=>1; + use constant ESC=>2; + use constant STP=>3; + my $state = NRM; + my @errors; + my @items; + my $curr_item = ''; + my $next_item = 0; + foreach my $i (0 .. length($string)-1) { + my $c = substr($string,$i,1); + #print "i=$i c=$c state=$state curr_item=$curr_item next_item=$next_item\n"; + # state changes + if ($state == NRM) { + if ($c eq "'") { + $state = STR; + } elsif ($c eq ',' or $c eq ' ') { + # do nothing -- closequote already consumed curr_item + } else { + push @errors, + "illegal char '$c' in state NRM while parsing value for $name.\n" + . " $string\n" + . ' ' . ' 'x$i . "^\n"; + $next_item = 1; + $state = STP; + } + } elsif ($state == STR) { + if ($c eq "'") { + $state = NRM; + $next_item = 1; + } elsif ($c eq '\\') { + $state = ESC; + } else { + $curr_item .= $c; + } + } elsif ($state == ESC) { + $curr_item .= $c; + $state = STR; + } elsif ($state == STP) { + last; + } else { + die "unexpected state $state while parsing value for $name.\n"; + } + #print "i=$i c=$c state=$state curr_item=$curr_item next_item=$next_item\n"; + # actions + if ($next_item) { + push @items, $curr_item; + $curr_item = ''; + $next_item = 0; + #print "stored item to list\n"; + } + } + + return \@items, \@errors; +} + +sub parse_normal_value { + my ($name, $string) = @_; + my ($items, $errors) = parse_normal_list($name, $string); + push @$errors, "only one item allowed in value for $name.\n" if @$items > 1; + return shift @$items, $errors; +} + +# this now works for keywords is embedded spaces (which are later stripped out +# by kwtidy) but now it doesn't work for values with double quotes or no quotes! +sub parse_keywords { + my $string = shift; + my ($items, $errors) = parse_normal_list('KEYWORDS', $string); + if (@$errors) { + warn "errors while parsing KEYWORDS list:\n@$errors\n" + . "Partially-parsed KEYWORDS: @$items\n" + . "Resorting to old-style KEYWORDS parsing...\n"; + @$items = split /(?:,|\s)+/, $string; + warn "Old-style parse result: ", join('|', @$items), "\n"; + } + return map { kwtidy($_) } @$items; +} + +sub kwtidy { + my $keyword = shift; + $keyword =~ s/\W//g; + $keyword =~ s/_//g; + return lc $keyword; +} + +sub parse_problems { + my $string = shift; + $string =~ s/\D/ /g; + return grep { /\S/ } split /\s+/, $string; +} + +=head2 format_tags + + format_tags($tags, $mintextbook); + +Given a reference to a hash of tags, return a list of strings representing said +tags. The strings do not begin with the standard NPL comment prefix ("## ") or +end with newlines. These must be added by the caller if the strings are to be +inserted into a PG source file. + +One complication is the DESCRIPTION field, which contains embedded newlines. If +a DESCRIPTION tag occurs in %$tags, it will be formatted with embedded newlines +but without a trailing newline. For example, after this code executes, + + $tags = { DESCRIPTION => "line one\nline two\nline three" }; + ($desc) = format_tags($tags); + +$desc will contain the string: + + "DESCRIPTION\nline one\nline two\nline three\nENDDESCRIPTION" + +To account for this when writing to a PG file, you could use: + + foreach my $string (format_tags($tags)) { + $string =~ s/^/## /gm; + print PGFILE "$string\n"; + } + +=cut + +sub format_tags { + my ($tags, $mintextbook) = @_; + $mintextbook ||= 1; + my @result; + my @ordered_fields = grep { exists $tags->{$_} } @global_fields, "textbooks"; + foreach my $field (@ordered_fields) { + my $value = $tags->{$field}; + if ($field eq "DESCRIPTION") { + push @result, format_description($value); + } elsif ($field eq "textbooks") { + push @result, format_textbooks($value, $mintextbook); + } else { + push @result, format_tag($field, $value); + } + } + return @result; +} + +sub format_tag { + my ($field, $value, $n) = @_; + my $tag = $field2tag{$field} || $field; + + # problems are always listed in a single string in the tag. + if ($field eq "problem") { + $value = format_problems($value); + } + + # if we have an arrayref, we represent it as multiple strings in one tag. + if (ref $value) { + $value = join(',', map { "'$_'" } @$value); + } elsif (defined $value) { + $value = "'$value'"; + } else { + warn "value is not defined for field $field!\n"; + $value = "''"; + } + + if (defined $n) { + return "$tag$n($value)"; + } else { + return "$tag($value)"; + } +} + +sub format_description { + my $value = shift; + return "DESCRIPTION\n$value\nENDDESCRIPTION"; +} + +sub format_textbooks { + my ($textbook, $n) = @_; + my @textbooks = @$textbook; + my @result; + foreach my $textbook (@textbooks) { + push @result, format_textbook($textbook, $n); + $n++; + } + return @result; +} + +sub format_textbook { + my ($textbook, $n) = @_; + + # combine chapter/section into single section tag + my $chapter = $textbook->{chapter}; + my $section = $textbook->{section}; + if (defined $chapter or defined $section) { + $section = ".$section" if defined $section; + $section = "$chapter$section" if defined $chapter; + delete $textbook->{chapter}; + $textbook->{section} = $section; + } + + my @result; + my @ordered_fields = grep { exists $textbook->{$_} } @textbook_fields; + foreach my $field (@ordered_fields) { + my $value = $textbook->{$field}; + push @result, format_tag($field, $value, $n); + } + return @result; +} + +sub format_problems { + my $first = shift; + my @problems; + if (ref $first) { + @problems = @$first; + } else { + @problems = ($first, @_); + } + + return join(',', @problems); +} + +=head2 gen_find_tags + + gen_find_tags($pattern, $action, $extra_editing_info); + +Generates an anonymous subroutine suitable for passing the the find() function +of the File::Find module. The no_chdir=>1 option must be passed to find() for +the generated subroutine to operate properly. + +$pattern is a reference to a hash describing the fields that must match. $action +is a reference to a subroutine that will be called if all fields match. +$extra_editing_info is passed to read_tags(). + +Legal fields for $pattern are as follows: + +B<Global fields:> DESCRIPTION, KEYWORDS, DBsubject, DBchapter, DBsection, Date, +Institution, Author. (The experimental UsesAuxiliaryFiles field may be supported +in the future.) + +B<Text-specific fields:> title, edition, author, chapter, section, problem. + +If multiple text-specific keys are given, then all must match for a single +textbook. + +$action is called as follows: + + $action->($path, $tags, $text_index) + +There $path the path to the matching file, $tags a reference to the tag hash for +the matching file, and $text_index the index into the @{$tags->{textbooks}} array +if $pattern included textbook-specific tags. + +=cut + +sub gen_find_tags { + my ($pattern, $action, $extra_editing_info) = @_; + return sub { + return unless /\.pg$/ and -f $File::Find::name; + + my $name = $File::Find::name; + #my $relpath = $name; + #$relpath =~ s/^$src\///; + + my %tags; + + open my $fh, "<", $name or do { + warn "skipping $name: $!\n"; + return; + }; + read_tags($fh, \%tags, $extra_editing_info); + close $fh; + + my (%global_pattern, %textbook_pattern); + foreach my $field (@global_fields) { + $global_pattern{$field} = $pattern->{$field} if exists $pattern->{$field}; + } + foreach my $field (@textbook_fields) { + $textbook_pattern{$field} = $pattern->{$field} if exists $pattern->{$field}; + } + + if (%global_pattern) { + return unless match_global(\%tags, \%global_pattern); + } + my $text_index; + if (%textbook_pattern) { + $text_index = match_textbook(\%tags, \%textbook_pattern); + return unless $text_index >= 0; + } + + $action->($name, \%tags, $text_index); + }; +} + +sub match_global { + my ($tags, $matches) = @_; + foreach my $field (keys %$matches) { + return 0 unless $tags->{$field} eq $matches->{$field}; + } + return 1; +} + +sub match_textbook { + my ($tags, $matches) = @_; + return -1 unless defined $tags->{textbooks}; + my @textbooks = @{$tags->{textbooks}}; + + #textbook: foreach my $textbook (@{$tags->{textbooks}}) { + textbook: foreach my $i (0 .. $#{$tags->{textbooks}}) { + my $textbook = $tags->{textbooks}[$i]; + foreach my $field (keys %$matches) { + next if $field !~ /^(title|edition|author|chapter|section|problem)$/; + next textbook unless $textbook->{$field} eq $matches->{$field}; + } + #warn "matched text i=$i: ", Dumper($textbook); + return $i; + } + return -1; +} + +=back + +=cut + +1; |
From: Mike G. v. a. <we...@ma...> - 2008-06-24 23:19:32
|
Log Message: ----------- adding missing files Tags: ---- rel-2-4-patches Added Files: ----------- webwork2/bin: pg-append-textbook-tags pg-find-tags pg-pull Revision Data ------------- --- /dev/null +++ bin/pg-find-tags @@ -0,0 +1,110 @@ +#!/usr/bin/env perl +################################################################################ +# WeBWorK Online Homework Delivery System +# Copyright © 2000-2007 The WeBWorK Project, http://openwebwork.sf.net/ +# $CVSHeader: webwork2/bin/pg-find-tags,v 1.2.2.1 2008/06/24 23:02:16 gage Exp $ +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of either: (a) the GNU General Public License as published by the +# Free Software Foundation; either version 2, or (at your option) any later +# version, or (b) the "Artistic License" which comes with this package. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See either the GNU General Public License or the +# Artistic License for more details. +# +# Contributed by W.H. Freeman; Bedford, Freeman, and Worth Publishing Group. +################################################################################ + +use strict; +use warnings; + +use Data::Dumper;# $Data::Dumper::Indent = 0; +use File::Find; +use Getopt::Long; +use IO::Handle; + +BEGIN { + die "WEBWORK_ROOT not found in environment.\n" + unless exists $ENV{WEBWORK_ROOT}; +} + +use lib "$ENV{WEBWORK_ROOT}/lib"; +use WeBWorK::NPL qw/gen_find_tags/; + +sub main { + my ($pattern, @paths) = @_; + my $oldfh = select(STDERR); $|=1; select(STDOUT); $|=1; select($oldfh); + my $wanted = gen_find_tags($pattern, \&report); + find({ wanted=>$wanted, no_chdir=>1, }, @paths); +} + +sub report { + my ($name, $tags) = @_; + print "$name\n"; +} + +my %o; +GetOptions(\%o, + "DESCRIPTION=s", + "KEYWORDS=s", + "DBsubject=s", + "DBchapter=s", + "DBsection=s", + "Date=s", + "Institution=s", + "Author=s", + "title=s", + "edition=s", + "author=s", + "chapter=s", + "section=s", + "problem=s", +); +main(\%o, @ARGV); + +__END__ + +=head1 NAME + +pg-find-tags - Search for PG files that contain the specified metadata tags. + +=head1 SYNOPSIS + + pg-find-tags ~/MyLibrary /ww/OtherLibrary --author=Rogawski --edition=1 + +=head1 DESCRIPTION + +Recusively searches the paths given for PG files containing all of the specified +tags. Output is the path to each matching file. Legal tags are as follows: + +B<Global fields:> + + --DESCRIPTION=STRING + --KEYWORDS=STRING + --DBsubject=STRING + --DBchapter=STRING + --DBsection=STRING + --Date=STRING + --Institution=STRING + --Author=STRING + +B<Text-specific fields:> + + --title=STRING + --edition=STRING + --author=STRING + --chapter=STRING + --section=STRING + --problem=STRING + +If multiple text-specific fields are given, then all must match for a single +textbook. + +=head1 LIMITATIONS + +Doesn't support full boolean searches, and it probably should. Can only match on +full strings, so you can't match on a single keyword, for example. + +=cut --- /dev/null +++ bin/pg-append-textbook-tags @@ -0,0 +1,115 @@ +#!/usr/bin/env perl +################################################################################ +# WeBWorK Online Homework Delivery System +# Copyright © 2000-2007 The WeBWorK Project, http://openwebwork.sf.net/ +# $CVSHeader: webwork2/bin/pg-append-textbook-tags,v 1.2.2.1 2008/06/24 23:02:16 gage Exp $ +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of either: (a) the GNU General Public License as published by the +# Free Software Foundation; either version 2, or (at your option) any later +# version, or (b) the "Artistic License" which comes with this package. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See either the GNU General Public License or the +# Artistic License for more details. +# +# Contributed by W.H. Freeman; Bedford, Freeman, and Worth Publishing Group. +################################################################################ + +use strict; +use warnings; + +use IO::File; +use Data::Dumper;# $Data::Dumper::Indent = 0; +use Getopt::Long; + +BEGIN { + die "WEBWORK_ROOT not found in environment.\n" + unless exists $ENV{WEBWORK_ROOT}; +} + +use lib "$ENV{WEBWORK_ROOT}/lib"; +use WeBWorK::NPL qw/read_tags format_tags/; + +sub main { + my ($new_tags, @files) = @_; + $new_tags = { textbooks=>[$new_tags] }; + foreach my $file (@files) { + add_tags_to_file($new_tags, $file); + } +} + +sub add_tags_to_file { + my ($new_tags, $file) = @_; + + my $pgfile = new IO::File($file, '+<:utf8') or do { + warn "Failed to open file $file for editing: $!\n"; + warn "New tags will not be written to this file.\n"; + return; + }; + + my $old_tags = {}; + read_tags($pgfile, $old_tags, 1); # 1==extra_editing_info + my $pos = $old_tags->{_pos}; + my $rest = $old_tags->{_rest}; + my $maxtextbook = $old_tags->{_maxtextbook}; + print "pos=$pos maxtextbook=$maxtextbook\n"; + + my @tagstrings = format_tags($new_tags, $maxtextbook+1); + + seek $pgfile, $pos, 0; + foreach my $string (@tagstrings) { + $string =~ s/^/## /gm; + print $pgfile "$string\n"; + } + print $pgfile $rest; +} + +my %o; +GetOptions(\%o, + "title=s", + "edition=s", + "author=s", + "chapter=s", + "section=s", + "problem=s", +); +main(\%o, @ARGV); + +__END__ + +=head1 NAME + +pg-append-text-tags -- Add textbook tags to a PG file. + +=head1 SYNOPSIS + + pg-append-text-tags file1.pg file2.pg --author=Rogawski \ + --title='Calculus: Early Transcendentals' --edition=1 \ + --chapter=3 --section=1 --problem=11,13 + +=head1 DESCRIPTION + +This script appends metadata tags for a new textbook to one or more PG files. +Tags are given as switches on the command line: + + --title=STRING + --edition=STRING + --author=STRING + --chapter=STRING + --section=STRING + --problem=STRING + +More than one problem can be specified for B<--problem> by passing a +comma-separated list. + +=head1 LIMITATIONS + +Only adds tags for a new textbook, can't rewrite existing tags, can't remove +tags. + +At some point I will write Tie::PGFile, which will allow direct editing of tags +just by modifying a hash, and that will fix all of these problems. :) + +=cut --- /dev/null +++ bin/pg-pull @@ -0,0 +1,284 @@ +#!/usr/bin/env perl +################################################################################ +# WeBWorK Online Homework Delivery System +# Copyright © 2000-2007 The WeBWorK Project, http://openwebwork.sf.net/ +# $CVSHeader: webwork2/bin/pg-pull,v 1.4.2.1 2008/06/24 23:02:16 gage Exp $ +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of either: (a) the GNU General Public License as published by the +# Free Software Foundation; either version 2, or (at your option) any later +# version, or (b) the "Artistic License" which comes with this package. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See either the GNU General Public License or the +# Artistic License for more details. +# +# Contributed by W.H. Freeman; Bedford, Freeman, and Worth Publishing Group. +################################################################################ + +# Assemble a new problem library consisting of the specified PG files and any required +# auxiliary files (i.e. images). +# +# Auxiliary files must be specified in the (unofficial) UsesAuxiliaryFiles(...) tag. +# It is assumed that auxiliary files don't depend on further auxiliary files. +# This will be true <100% of the time. +# +# Also, right now this script always uses the FIRST textbook entry in generating +# the new file's path and name. Eventually, a --match-text switch will make the +# script useful for files with multiple text tags. + +use strict; +use warnings; + +use Data::Dumper; +use File::Path; +use File::Spec; +use File::Copy; +use Getopt::Long; +use IO::File; + +BEGIN { + die "WEBWORK_ROOT not found in environment.\n" + unless exists $ENV{WEBWORK_ROOT}; +} + +use lib "$ENV{WEBWORK_ROOT}/lib"; +use WeBWorK::NPL qw/read_textbooks read_tags/; + +my %o; +my %textbooks; +my %seen_paths; + +sub main { + my (@files) = @_; + my $oldfh = select(STDERR); $|=1; select(STDOUT); $|=1; select($oldfh); + + if (defined $o{help}) { + print usage(); + exit; + } + + my $dest_lib = $o{'dest-lib'}; + # using the last argument as dest-lib is too confusing + #$dest_lib = pop @files unless defined $dest_lib; + die "dest-lib not specified.\n" . usage() unless defined $dest_lib; + + die "no files specified (perhaps you meant to use --stdin?)\n" . usage() + unless @files or $o{stdin}; + + if ($o{'orig-paths'} and not defined $o{'src-lib'}) { + die "--src-lib must be specified with --orig-paths.\n", usage(); + } + + if (defined $o{'src-lib'} and not $o{'orig-paths'}) { + warn "ignoring --src-lib since --orig-paths was not specified.\n"; + } + + if ($o{'orig-paths'}) { + if (defined $o{textbooks}) { + warn "ignoring --textbooks since --orig-paths was specified.\n"; + } + } else { + if (defined $o{textbooks}) { + get_texts($o{textbooks}); + } else { + warn "No Textbooks file specified -- directories will not be named.\n"; + } + } + + my $getfile; + if ($o{stdin}) { + $getfile = sub { if (defined ($_=<STDIN>)) { chomp; $_ } else { () } }; + } else { + my $i = 0; + $getfile = sub { defined $files[$i] ? $files[$i++] : () }; + } + + while (defined (my $file = &$getfile)) { + #print "file=$file\n"; + process_file($file); + } +} + +sub get_texts { + my $textbooks = shift; + my @textbooks; + + open my $fh, '<', $textbooks or die "$textbooks: $!\n"; + read_textbooks($fh, \@textbooks); + close $fh; + + foreach my $textbook (@textbooks) { + $textbooks{$textbook->{'_author'}}{$textbook->{'_title'}}{$textbook->{'_edition'}} = $textbook; + } +} + +sub process_file { + my $file = shift; + + # ignoring volume here because we don't care about w32 + (undef, my ($dir, $name)) = File::Spec->splitpath($file); + #print " dir=$dir\n"; + #print " name=$name\n"; + + my %tags; + read_tags($file, \%tags); + #print Dumper(\%tags); + + my ($target_dir_rel, $target_name); + if ($o{'orig-paths'}) { + $target_dir_rel = File::Spec->abs2rel($dir, $o{'src-lib'}); + $target_name = $name; + } else { + ($target_dir_rel, $target_name) = tags_to_path(\%tags, '.pg', $file); + } + + if ($o{'check-names'} and $name ne $target_name) { + warn "$file: name will be changed to $target_name\n"; + } + + my $target_dir = File::Spec->catdir($o{'dest-lib'}, $target_dir_rel); + + my @files = ($target_name); + push @files, @{$tags{UsesAuxiliaryFiles}} if defined $tags{UsesAuxiliaryFiles}; + + #print "mkpath $target_dir\n" if $o{pretend} or $o{verbose}; + mkpath($target_dir) unless $o{pretend}; + + foreach my $curr (@files) { + my $src = File::Spec->catpath(undef, $dir, $curr); + my $dest = File::Spec->catpath(undef, $target_dir, $curr); + print "copy $src $dest\n" if $o{pretend} or $o{verbose}; + copy($src, $dest) unless $o{pretend}; + } +} + +sub tags_to_path { + my ($tags, $ext, $file) = @_; + + # FIXME here is where we'd put in textbook matching + my $text = $tags->{textbooks}[0]; + unless (defined $text) { + warn "$file: no textbook tags\n"; + return; + } + my %text = %$text; + + if (not defined $text{author} + or not defined $text{title} + or not defined $text{edition} + or not defined $text{chapter} + or not defined $text{section} + or not defined $text{problem} + or @{$text{problem}} == 0) { + warn "$file: incomplete textbook tags\n"; + return; + } + + my $chapter_name = $text{chapter}; + my $chapsec_name = "$text{chapter}.$text{section}"; + + if (defined $o{textbooks}) { + my $text_names = $textbooks{$text{author}}{$text{title}}{$text{edition}}; + if (defined $text_names) { + my %text_names = %$text_names; + + if (defined $text_names{$text{chapter}}) { + $chapter_name .= sissy_filename(" $text_names{$text{chapter}}") + } else { + warn "$file: no chapter name for $text{chapter}"; + } + + if (defined $text_names{"$text{chapter}.$text{section}"}) { + $chapsec_name .= sissy_filename(" $text_names{qq|$text{chapter}.$text{section}|}"); + } else { + warn "$file: no section name for $text{chapter}.$text{section}"; + } + } else { + warn "$file: can't find text $text{author}/$text{title}/$text{edition} in Textbooks file -- directories will be unnamed\n"; + } + } + + my $ex_name = "$text{chapter}.$text{section}.$text{problem}[0]"; + $ex_name .= '+' if @{$text{problem}} > 1; + + #print " chapter_name=$chapter_name\n"; + #print " chapsec_name=$chapsec_name\n"; + #print " ex_name=$ex_name\n"; + + # make sure path hasn't been seen before + my $dir = File::Spec->catdir($chapter_name, $chapsec_name); + my $partA = $ex_name; + $partA .= $o{suffix} if defined $o{suffix}; + my $partB = $ext; + my $uniq = unique_name($dir, $partA, $partB); + + #print " partA=$partA\n"; + #print " uniq=$uniq\n"; + #print " partB=$partB\n"; + + return $dir, "$partA$uniq$partB"; +} + +sub unique_name { + my ($dir, $partA, $partB) = @_; + my $whole = File::Spec->catpath(undef, $dir, "$partA$partB"); + my $uniq = ''; + if (exists $seen_paths{$whole}) { + my $i = 2; + do { + $uniq = "~$i"; + $whole = File::Spec->catpath(undef, $dir, "$partA$uniq$partB"); + } while (exists $seen_paths{$whole}); + } + $seen_paths{$whole} = (); + return $uniq; +} + +#sub find_macro_file { +# my ($name, $ref_by) = @_; +# my (undef,$ref_by_dir,undef) = File::Spec->splitpath($ref_by); +# my $ref_by_dir_rel = File::Spec->abs2rel($ref_by_dir, $o{'src-lib'}); +# my @dirs = File::Spec->splitdir($ref_by_dir_rel); +# while (1) { +# my $dir = File::Spec->catdir(@dirs); +# my $file = File::Spec->catfile(${'src-lib'}, $dir, $name); +# return $file if -f $file; +# pop @dirs; +# } +#} + +sub sissy_filename { + my $string = shift; + $string =~ s/:/-/g; + $string =~ s/[<>\"\/\/|?*]/_/g; + $string =~ s/\s+/_/g; + return $string; +} + +sub usage { + return "USAGE:\n" + . "$0 [OPTIONS] --dest-lib=PATH [--textbooks=PATH] [--suffix=STRING] files...\n" + . "$0 [OPTIONS] --dest-lib=PATH --orig-paths --src-lib=PATH files...\n" + . "Options:\n" + . "\t--stdin\n" + . "\t--pretend\n" + . "\t--verbose\n" + . "\t--check-names\n" + ; +} + +GetOptions(\%o, + 'textbooks=s', + 'dest-lib=s', + 'orig-paths', + 'src-lib=s', + 'stdin', + 'suffix=s', + 'pretend', + 'verbose', + 'check-names', + 'help', +); +main(@ARGV); |