Welcome, Guest! Log In | Create Account

ProtectingFormating

From jabref

Jump to: navigation, search

Some Bibtex styles reproduce the title field exactly as it is in your Bibtex database. Others change the case to sentence case, thus losing the capitalization of acronyms etc.

The following script is provided 'as is' for whomever it may be useful. It processes a bibtex database and inserts { } around acronyms that require protection. It will work on UNIX / Linux systems with bash, sed and awk installed. It was developed with 'MRI' acronyms in mind, however the ideas are there as a framework to be ported to other systems / languages.

#!/bin/bash

# required for [A-Z] to behave as expected
export LC_ALL='c'

# --re-interval is required to use {n,} in regular expression

# convert 4.7 T to 4.7T so that it is consistant, and gets protected with { } later

sed 's/\([ .][0-9]\) T /\1T /g' $1 | \
awk --re-interval 'BEGIN{
    intitle=0
    OFS=" "
    line=""
}
{
    if ($1 == "title") {
        # if this title field is on more than one line
        if ($NF !~ "},$") {
            line=$0
            intitle=1
            next
        }
    } else {
        if (intitle==1) {
            line=(line " @newline@ " $0)  # add this line to title string, and mark location of new lines
            if ($NF ~ "},$") {      # if title field ends on this line
                $0=line             # set current awk line to title string for further processing
                intitle=0
            } else {
                next
            }
        } else {
            # print all non-title lines without processing
            print
            next
        }

    }
}

#  should only get here with single line title strings
$1 == "title" {
    line="  title = {"
    #   gsub("\u2013", " @tripledash@ ")
    gsub("---", " @tripledash@ ")
    gsub(" - ", "  @tripledash@  ") # a dash between words should be a long dash
    gsub("--", " @doubledash@ ") # treat hyphenated words separately;  @doubledash@ is a marker which will be replaced later
    gsub("-", " @dash@ ") # treat hyphenated words separately
    gsub("{B0", "{B$_{0}$") # These work on the SECOND pass, so as to be sure its not DESPOT1 etc
    gsub("{T1", "{T$_{1}$")
    gsub("{T2", "{T$_{2}$")

    $3=substr($3,2)       # remove { from first word
    $NF=substr($NF,1,length($NF) - 2)  # remove } from last word

    # remove trailing full stop
    if ($NF ~ /\.$/) {
        $NF=substr($NF,1,length($NF) - 1)
    }

    # Protect words like MRI fMRI T2 SmartPhantom trueFISP T/R 3-D I Q K-space
    # but not words like Imaging,    Sclerosis:  Full-Brain Gray/White 32 1.5, A
    # and not words which are already protected, like {MRI} etc #
    # currently protects words like Alzheimer<apostrophy>s but not other proper nouns
    # which is a problem and may protect words including apostrophy which is also problem
    # doesnt pick up I:

    for (i=3; i <=NF; i++ ) {
        if (i > 3) line=sprintf("%s ", line)

        if ( ( $i ~ /[^[:lower:]?!:;.,@()].*[^[:lower:]?!:;.,@()]/ || $i ~ /^[B-Z]$/ ) \
               && $i !~ /[[:upper:]][[:lower:]]+[-/][[:upper:]][[:lower:]]+/ && $i !~ /[{].*[}]/ && $i !~ /^[0-9():;.,!?]+$/ ) {
            line=sprintf("%s{%s}", line, $i)
        } else {
            line=sprintf("%s%s", line, $i)
        }
    }

    # add the closing brace },
    line=sprintf("%s%s", line, "},")

    # put new lines and dashes back in
    gsub(" @tripledash@ "," --- ",line)
    gsub(" @doubledash@ ","--",line)
    gsub(" @dash@ ","-",line)
    gsub(" (@newline@[[:space:]]+){1,}","\n\t",line)  # case where new line and -  occur together
    printf("%s\n", line)
}'