## [r12728]: cdk-fingerprint-paper / trunk / paper / bmc_article.tex Maximize Restore History

  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 %% BioMed_Central_Tex_Template_v1.05 %% % % bmc_article.tex ver: 1.05 % % % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% %% LaTeX template for BioMed Central %% %% journal article submissions %% %% %% %% <27 January 2006> %% %% %% %% %% %% Uses: %% %% cite.sty, url.sty, bmc_article.cls %% %% ifthen.sty. multicol.sty %% %% %% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% %% For instructions on how to fill out this Tex template %% %% document please refer to Readme.pdf and the instructions for %% %% authors page on the biomed central website %% %% http://www.biomedcentral.com/info/authors/ %% %% %% %% Please do not use \input{...} to include other tex files. %% %% Submit your LaTeX manuscript as one .tex document. %% %% %% %% All additional figures and files should be attached %% %% separately and not embedded in the \TeX\ document itself. %% %% %% %% BioMed Central currently use the MikTex distribution of %% %% TeX for Windows) of TeX and LaTeX. This is available from %% %% http://www.miktex.org %% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \NeedsTeXFormat{LaTeX2e}[1995/12/01] \documentclass[10pt]{bmc_article} % Load packages \usepackage{cite} % Make references as [1-4], not [1,2,3,4] \usepackage{url} % Formatting web addresses \usepackage{ifthen} % Conditional \usepackage{multicol} %Columns % \usepackage[utf8]{inputenc} %unicode support \usepackage[applemac]{inputenc} %applemac support if unicode package fails % \usepackage[latin1]{inputenc} %UNIX support if unicode package fails \urlstyle{rm} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% %% If you wish to display your graphics for %% %% your own use using includegraphic or %% %% includegraphics, then comment out the %% %% following two lines of code. %% %% NB: These line *must* be included when %% %% submitting to BMC. %% %% All figure files must be submitted as %% %% separate graphics through the BMC %% %% submission process, not included in the %% %% submitted article. %% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \def\includegraphic{} \def\includegraphics{} \setlength{\topmargin}{0.0cm} \setlength{\textheight}{21.5cm} \setlength{\oddsidemargin}{0cm} \setlength{\textwidth}{16.5cm} \setlength{\columnsep}{0.6cm} \newboolean{publ} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% %% You may change the following style settings %% %% Should you wish to format your article %% %% in a publication style for printing out and %% %% sharing with colleagues, but ensure that %% %% before submitting to BMC that the style is %% %% returned to the Review style setting. %% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Review style settings \newenvironment{bmcformat}{\begin{raggedright}\baselineskip20pt\sloppy\setboolean{publ}{false}}{\end{raggedright}\baselineskip20pt\sloppy} % Publication style settings %\newenvironment{bmcformat}{\fussy\setboolean{publ}{true}}{\fussy} % Begin ... \begin{document} \begin{bmcformat} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% %% Enter the title of your article here %% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \title{A Benchmark Study of the CDK Fingerprints} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% %% Enter the authors here %% %% %% %% Ensure \and is entered between all but %% %% the last two authors. This will be %% %% replaced by a comma in the final article %% %% %% %% Ensure there are no trailing spaces at %% %% the ends of the lines %% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \author{Rajarshi Guha\correspondingauthor$^{1}$% \email{Rajarshi Guha\correspondingauthor - rguha@indiana.edu}% \and Jane E Doe\correspondingauthor$^2$% \email{Jane E Doe\correspondingauthor - jane.e.doe@cambridge.co.uk} and John RS Smith$^3$% \email{John RS Smith - john.RS.Smith@cambridge.co.uk}% } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% %% Enter the authors' addresses here %% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \address{% \iid(1)School of Informatics, Indiana University, Bloomington, IN 47408\\ \iid(2)Department of Zoology, Cambridge, Waterloo Road, London, UK\\ \iid(3)Marine Ecology Department, Institute of Marine Sciences Kiel, % D\"{u}sternbrooker Weg 20, 24105 Kiel, Germany }% \maketitle %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% %% The Abstract begins here %% %% %% %% The Section headings here are those for %% %% a Research article submitted to a %% %% BMC-Series journal. %% %% %% %% If your article is not of this type, %% %% then refer to the Instructions for %% %% authors on http://www.biomedcentral.com %% %% and change the section headings %% %% accordingly. %% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{abstract} % Do not use inserted blank lines (ie \\) until main body of text. \paragraph*{Background:} Text for this section of the abstract. \paragraph*{Results:} Text for this section of the abstract \ldots \paragraph*{Conclusions:} Text for this section of the abstract \ldots \end{abstract} \ifthenelse{\boolean{publ}}{\begin{multicols}{2}}{} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% %% The Main Body begins here %% %% %% %% The Section headings here are those for %% %% a Research article submitted to a %% %% BMC-Series journal. %% %% %% %% If your article is not of this type, %% %% then refer to the instructions for %% %% authors on: %% %% http://www.biomedcentral.com/info/authors%% %% and change the section headings %% %% accordingly. %% %% %% %% See the Results and Discussion section %% %% for details on how to create sub-sections%% %% %% %% use \cite{...} to cite references %% %% \cite{koon} and %% %% \cite{oreg,khar,zvai,xjon,schn,pond} %% %% \nocite{smith,marg,hunn,advi,koha,mouse}%% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%% %% Background %% %% \section*{Background} Binary fingerprints are bit string representations of molecular structures and come in a variety of types. In the most common type, each bit of the fingerprint corresponds to a specific substructural feature (say an aromatic ring or an aldehyde group). Other forms of fingerprints include hashed fingerprints and atom environment fingerprints. While these representations were initially designed for similarity searching in databases, they have been become an important component of virtual screening pipelines. That this is possible, is due to the similarity principle''\cite{Martin:2002ab}, underlying much of virtual screening in drug discovery scenarios, which states that similar molecules will have similar activities. While there have been many counter-examples\cite{Maggiora:2006aa}, this approach has been fruitful in a number of cases. A number of fingerprint implementations are available from commercial vendors and a few from academic groups. The Chemistry Development Kit (CDK) is an Open Source Java library\cite{Steinbeck:2003bh,Steinbeck:2006aa} for cheminformatics and provides several fingerprint implementations. More specifically, it provides two structural key type fingerprints and two hashed fingerprints. While the library has been used in a number of projects, there has been no formal testing of how well the CDK fingerprints perform in a virtual screening scenario. It should be noted that the two structural key fingerprints are implementations of well studied schemes (MACCS\cite{Durant:2002aa} and EState keys) and their performance is well known. However the two hashed fingerprints, while based on the well known Daylight specification, have never been formally benchmarked. The goal of this study is to compare the performance of the CDK hashed fingerprints to other well known fingerprint types. %%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Results and Discussion %% %% \section*{Results and Discussion} \subsection*{Enrichment curves} \subsection*{Information content} \subsection*{Effect of fingerprint size} \subsection*{Influence of ring systems} %%%%%%%%%%%%%%%%%%%%%% \section*{Conclusions} Text for this section \ldots %%%%%%%%%%%%%%%%%% \section*{Methods} \subsection*{Fingerprints} Summary of how CDK FP's are calculated. In addition to the path based fingerprints described above, we also considered structural key type fingerprints. In these types, each bit positions corresponds explicitly to a substructural features. In this study we employed the MACCS 166 bit keys\cite{Durant:2002aa} (implemented in the CDK) and the BCI 1052 bit keys\cite{Barnard:1997aa}. Finally, we also considered atom environment fingerprints, specifically the extended connectivity fingerprints (ECFP) as implemented in Pipeline Pilot (Scitegic, Inc.). These types of fingerprints characterize each atom in terms of the environment around it, usually going up to 6 or 8 bonds from the atom in question. The ECFP's characterize the atoms using features such as hydrogen bonding donor capability, lipophilicity and so on. In this study we considered the ECFP-6 type, which considers atoms up to 6 bonds away from a central atom. \subsection*{Measures of effectiveness} Use of enrichment curves, enrichment factors. Note that they are not the best of measures\cite{Bender:2005aa,Truchon:2007aa,Nicholls:2008aa,Clark:2008aa}. Use of ROC curves and AUC \subsection*{Time efficiency} \subsection*{Benchmark Datasets} A number of datasets have been employed for benchmarking fingerprint methods including ZINC\cite{Irwin:2005aa} and the MDL Drug Discovery Report (MDDR). For the purposes of this study we employed the 17 virtual screening benchmark datasets described by Rohrer and Baumann\cite{Rohrer:2008ab}, collectively termed the Maximum Unbiased Validation (MUV) datasets. These datasets are derived from PubChem bioassays, each dataset corresponding to a specific bioassay. Examples of the targets considered by these datasets include FXIa inhibitors, FXIIa inhibitors, SF1 and HIV RT-RNase inhibitors. More broadly, the datasets cover several target classes including proteases, GPCR's, kinases and nuclear receptors. These datasets were constructed to specifically avoid the problem encountered with other datasets, namely, that many datasets lend an unfair advantage for 2D methods over 3D methods. More specifically, the actives in each of the datasets exhibit a wide variety of scaffold classes, thus avoiding the problems of analog bias\cite{Good:2008aa} and artificial enrichment\cite{Verdonk:2004aa}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section*{Authors contributions} %%%%%%%%%%%%%%%%%%%%%%%%%%% \section*{Acknowledgements} \ifthenelse{\boolean{publ}}{\small}{} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% The Bibliography %% %% %% %% Bmc_article.bst will be used to %% %% create a .BBL file for submission, which includes %% %% XML structured for BMC. %% %% %% %% %% %% Note that the displayed Bibliography will not %% %% necessarily be rendered by Latex exactly as specified %% %% in the online Instructions for Authors. %% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {\ifthenelse{\boolean{publ}}{\footnotesize}{\small} \bibliographystyle{bmc_article} % Style BST file \bibliography{bmc_article} } % Bibliography file (usually '*.bib' ) %%%%%%%%%%% \ifthenelse{\boolean{publ}}{\end{multicols}}{} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% %% Figures %% %% %% %% NB: this is for captions and %% %% Titles. All graphics must be %% %% submitted separately and NOT %% %% included in the Tex document %% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% Do not use \listoffigures as most will included as separate files \section*{Figures} \subsection*{Figure 1 - Sample figure title} A short description of the figure content should go here. \subsection*{Figure 2 - Sample figure title} Figure legend text. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% %% Tables %% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Use of \listoftables is discouraged. %% \section*{Tables} \subsection*{Table 1 - Sample table title} Here is an example of a \emph{small} table in \LaTeX\ using \verb|\tabular{...}|. This is where the description of the table should go. \par \mbox{} \par \mbox{ \begin{tabular}{|c|c|c|} \hline \multicolumn{3}{|c|}{My Table}\\ \hline A1 & B2 & C3 \\ \hline A2 & ... & .. \\ \hline A3 & .. & . \\ \hline \end{tabular} } \subsection*{Table 2 - Sample table title} Large tables are attached as separate files but should still be described here. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% %% Additional Files %% %% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section*{Additional Files} \subsection*{Additional file 1 --- Sample additional file title} Additional file descriptions text (including details of how to view the file, if it is in a non-standard format or the file extension). This might refer to a multi-page table or a figure. \subsection*{Additional file 2 --- Sample additional file title} Additional file descriptions text. \end{bmcformat} \end{document}