ProtectingFormating
From jabref
Some Bibtex styles reproduce the title field exactly as it is in your Bibtex database. Others change the case to sentence case, thus losing the capitalization of acronyms etc.
The following script is provided 'as is' for whomever it may be useful. It processes a bibtex database and inserts { } around acronyms that require protection. It will work on UNIX / Linux systems with bash, sed and awk installed. It was developed with 'MRI' acronyms in mind, however the ideas are there as a framework to be ported to other systems / languages.
#!/bin/bash
# required for [A-Z] to behave as expected
export LC_ALL='c'
# --re-interval is required to use {n,} in regular expression
# convert 4.7 T to 4.7T so that it is consistant, and gets protected with { } later
sed 's/\([ .][0-9]\) T /\1T /g' $1 | \
awk --re-interval 'BEGIN{
intitle=0
OFS=" "
line=""
}
{
if ($1 == "title") {
# if this title field is on more than one line
if ($NF !~ "},$") {
line=$0
intitle=1
next
}
} else {
if (intitle==1) {
line=(line " @newline@ " $0) # add this line to title string, and mark location of new lines
if ($NF ~ "},$") { # if title field ends on this line
$0=line # set current awk line to title string for further processing
intitle=0
} else {
next
}
} else {
# print all non-title lines without processing
print
next
}
}
}
# should only get here with single line title strings
$1 == "title" {
line=" title = {"
# gsub("\u2013", " @tripledash@ ")
gsub("---", " @tripledash@ ")
gsub(" - ", " @tripledash@ ") # a dash between words should be a long dash
gsub("--", " @doubledash@ ") # treat hyphenated words separately; @doubledash@ is a marker which will be replaced later
gsub("-", " @dash@ ") # treat hyphenated words separately
gsub("{B0", "{B$_{0}$") # These work on the SECOND pass, so as to be sure its not DESPOT1 etc
gsub("{T1", "{T$_{1}$")
gsub("{T2", "{T$_{2}$")
$3=substr($3,2) # remove { from first word
$NF=substr($NF,1,length($NF) - 2) # remove } from last word
# remove trailing full stop
if ($NF ~ /\.$/) {
$NF=substr($NF,1,length($NF) - 1)
}
# Protect words like MRI fMRI T2 SmartPhantom trueFISP T/R 3-D I Q K-space
# but not words like Imaging, Sclerosis: Full-Brain Gray/White 32 1.5, A
# and not words which are already protected, like {MRI} etc #
# currently protects words like Alzheimer<apostrophy>s but not other proper nouns
# which is a problem and may protect words including apostrophy which is also problem
# doesnt pick up I:
for (i=3; i <=NF; i++ ) {
if (i > 3) line=sprintf("%s ", line)
if ( ( $i ~ /[^[:lower:]?!:;.,@()].*[^[:lower:]?!:;.,@()]/ || $i ~ /^[B-Z]$/ ) \
&& $i !~ /[[:upper:]][[:lower:]]+[-/][[:upper:]][[:lower:]]+/ && $i !~ /[{].*[}]/ && $i !~ /^[0-9():;.,!?]+$/ ) {
line=sprintf("%s{%s}", line, $i)
} else {
line=sprintf("%s%s", line, $i)
}
}
# add the closing brace },
line=sprintf("%s%s", line, "},")
# put new lines and dashes back in
gsub(" @tripledash@ "," --- ",line)
gsub(" @doubledash@ ","--",line)
gsub(" @dash@ ","-",line)
gsub(" (@newline@[[:space:]]+){1,}","\n\t",line) # case where new line and - occur together
printf("%s\n", line)
}'
