Update of /cvsroot/woc/woc/src/woc/src/cas In directory usw-pr-cvs1:/tmp/cvs-serv23171/woc/src/cas Added Files: CAS2NSC.txt CAS2SMILES.txt CAS2UN.txt Makefile.am add_nsc_number.pl add_nsc_number.pl.in add_numbers.pl add_numbers.pl.in add_smiles.pl add_smiles.pl.in add_un_nummers.pl add_un_nummers.pl.in check_cas_nummers.pl check_cas_nummers.pl.in check_doublenumbers.pl check_doublenumbers.pl.in zoek-UNs-zonder-wml.pl zoek-UNs-zonder-wml.pl.in Log Message: --- NEW FILE: CAS2NSC.txt --- 100-00-5 9792 100-01-6 9797 100-02-7 1317 100-06-1 5601 100-06-1 209523 100-07-2 86125 100-09-4 7926 100-09-4 32742 100-10-7 5517 100-11-8 4609 100-12-9 858 100-14-1 9803 100-15-2 5390 100-16-3 4079 100-17-4 5507 100-18-5 84198 100-19-6 41590 100-20-9 41885 100-21-0 36973 [...126750 lines suppressed...] 998-29-8 96837 998-30-1 124134 998-40-3 91700 998-41-4 111642 998-91-4 2321 998-91-4 224119 998-98-1 79255 999-21-3 4799 999-23-5 6406 999-29-1 58406 999-30-4 16588 999-33-7 223056 999-37-1 509380 999-47-3 34835 999-50-8 58807 999-55-3 20945 999-68-8 93208 999-79-1 525131 999-81-5 34858 999-97-3 93895 --- NEW FILE: CAS2SMILES.txt --- 100-00-5 [N+]([O-])(=O)C1=CC=C(Cl)C=C1 100-01-6 [N+]([O-])(=O)C1=CC=C(N)C=C1 100-02-7 [N+]([O-])(=O)C1=CC=C(O)C=C1 100-06-1 C(C)(=O)C1=CC=C(OC)C=C1 100-09-4 C(=O)(O)C1=CC=C(OC)C=C1 100-10-7 N(C)(C)C1=CC=C(C=O)C=C1 100-11-8 [N+]([O-])(=O)C1=CC=C(CBr)C=C1 100-12-9 [N+]([O-])(=O)C1=CC=C(CC)C=C1 100-14-1 [N+]([O-])(=O)C1=CC=C(CCl)C=C1 100-15-2 [N+]([O-])(=O)C1=CC=C(NC)C=C1 100-16-3 [N+]([O-])(=O)C1=CC=C(NN)C=C1 100-17-4 [N+]([O-])(=O)C1=CC=C(OC)C=C1 100-23-2 N(C)(C)C1=CC=C([N+](=O)[O-])C=C1 100-25-4 [N+]([O-])(=O)C1=CC=C([N+](=O)[O-])C=C1 100-26-5 C(=O)(O)C1=CC=C(C(=O)O)N=C1 100-28-7 [N+]([O-])(=O)C1=CC=C(N=C=O)C=C1 100-29-8 [N+]([O-])(=O)C1=CC=C(OCC)C=C1 100-32-3 [N+]([O-])(=O)C2=CC=C(SSC1=CC=C([N+](=O)[O-])C=C1)C=C2 100-33-4 C(=N)(N)C2=CC=C(OCCCCCOC1=CC=C(C(=N)N)C=C1)C=C2 [...9537 lines suppressed...] 99-87-6 C(C)(C)C1=CC=C(C)C=C1 99-88-7 C(C)(C)C1=CC=C(N)C=C1 99-89-8 C(C)(C)C1=CC=C(O)C=C1 99-91-2 C(C)(=O)C1=CC=C(Cl)C=C1 99-92-3 C(C)(=O)C1=CC=C(N)C=C1 99-93-4 C(C)(=O)C1=CC=C(O)C=C1 99-94-5 C(=O)(O)C1=CC=C(C)C=C1 99-96-7 C(=O)(O)C1=CC=C(O)C=C1 99-97-8 N(C)(C)C1=CC=C(C)C=C1 99-98-9 N(C)(C)C1=CC=C(N)C=C1 99-99-0 [N+]([O-])(=O)C1=CC=C(C)C=C1 992-59-6 S(=O)(=O)(O)C1=C6C(=C(N)C(=C1)N=NC5=C(C)C=C(C4=CC(=C(N=NC3=C(N)C2=C(C=CC=C2)C(=C3)S(=O)(=O)O)C=C4)C)C=C5)C=CC=C6 994-31-0 [Sn](CC)(CC)(CC)Cl 996-19-0 S(=O)(=O)(O)O.C(NN)(=N)N 996-97-4 N(C(CCCCCCC)=O)(CC)CC 996-98-5 C(C(NN)=O)(NN)=O 997-95-5 N(CC(C)C)(CC(C)C)N=O 998-91-4 C(=CC(OCC)=O)(OCC)C 999-21-3 C(C=CC(OCC=C)=O)(OCC=C)=O 999-23-5 N(CCN(CCC#N)C)(CCC#N)C --- NEW FILE: CAS2UN.txt --- #CAS-nummer <=> UN-nummer 100-00-5 1578 100-01-6 1661 100-02-7 1663 100-25-4 1597 100-37-8 2686 100-39-0 1737 100-40-3 1993 100-41-4 1175 100-42-5 2055 100-44-7 1738 100-47-0 2224 100-51-6 2810 100-52-7 1990 100-61-8 2294 100-63-0 2572 100-80-1 2618 100-97-0 1328 10024-97-2 1070 10025-67-9 1828 10025-69-1 2923 10025-78-2 1295 10025-87-3 1810 10025-91-9 1733 10026-04-7 1818 10026-13-8 1806 10031-13-7 1618 10035-10-6 1048 10045-94-0 1625 10048-95-0 1685 101-02-0 2811 101-68-8 2489 101-72-4 2811 101-77-9 2651 101-84-8 3077 10102-18-8 2630 10102-44-0 1067 10103-50-1 1622 10103-61-4 1557 10108-64-2 2570 10112-91-1 2025 10124-50-2 1678 10192-30-0 2693 102-82-9 2542 10265-92-6 2783 10290-12-7 1586 10294-33-4 2692 10294-34-5 1741 10326-27-9 1564 10361-37-2 1564 104-15-4 2585 104-94-9 02431 105-46-4 1123 105-58-8 2366 105-74-8 2124, 106-35-4 1224 106-42-3 1307 106-44-5 2076 106-46-7 2811 106-47-8 2018 106-48-9 2020 106-49-0 1708 106-50-3 1673 106-51-4 2587 106-87-6 2810 106-88-7 3022 106-89-8 2023 106-92-3 2219 106-93-4 1605 106-97-8 1011 106-98-9 1012 106-99-0 1010 107-01-7 1012 107-02-8 1092 107-05-1 1100 107-06-2 1184 107-07-3 1135 107-10-8 1277 107-11-9 2334 107-12-0 2404 107-13-1 1093 107-14-2 2668 107-15-3 1604 107-18-6 1098 107-19-7 2929 107-20-0 2232 107-30-2 1239 107-31-3 1243 107-39-1 2050 107-40-4 2050 107-49-3 3018 107-83-5 1208 107-87-9 1249 107-98-2 3092 108-01-0 2051 108-03-2 2608 108-05-4 1301 108-10-1 1245 108-11-2 2053 108-18-9 1158 108-20-3 1159 108-21-4 1220 108-23-6 2407 108-24-7 1715 108-31-6 2215 108-38-3 1307 108-39-4 2076 108-42-9 2019 108-43-0 2020 108-44-1 1708 108-46-3 2876 108-65-6 1993 108-67-8 2325 108-70-3 2321 108-77-0 2670 108-83-8 1157 108-87-2 2296 108-88-3 1294 108-89-4 2313 108-90-7 1134 108-91-8 2357 108-94-1 1915 108-95-2 1671 108-98-5 2337 108-99-6 2313 109-06-8 2313 109-55-7 2734 109-60-4 1276 109-66-0 1265 109-69-3 1127 109-73-9 1125 109-79-5 2347 109-86-4 1188 109-87-5 1234 109-89-7 1154 109-92-2 1302 109-94-4 1190 109-99-9 2056 110-00-9 2389 110-01-0 2412 110-02-1 2414 110-12-3 2302 110-16-7 1759 110-19-0 1213 110-43-0 1110 110-49-6 1189 110-54-3 1208 110-63-4 2810 110-80-5 1171 110-82-7 1145 110-83-8 2256 110-86-1 1282 110-89-4 2401 110-91-8 2054 11097-69-1 2315 111-14-8 3265 111-15-9 1172 111-30-8 2810 111-40-0 2079 111-65-9 1262 111-66-0 1993 111-69-3 2205 111-76-2 2369 111-84-2 1920 1113-38-8 2811 112-24-3 2259 112-55-0 1228 114-26-1 2757 115-11-7 1055 115-29-7 2761 115-32-2 2761 116-06-3 2757 116-29-0 2761 118-74-1 2729 118-75-2 2588 118-96-7 0209 119-93-7 2811 120-36-5 2765 120-51-4 2810 120-71-8 2431 120-80-9 2811 120-82-1 2321 120-83-2 2020 120-92-3 2245 12001-29-5 2590 12057-74-8 2011 12079-65-1 2811 121-14-2 2038 121-43-7 2416 121-44-8 1296 121-69-7 2253 121-75-5 3082 12108-13-3 2810 12122-67-7 2771 12125-01-8 2505 12136-45-7 2033 122-14-5 3018 122-34-9 2763 122-39-4 2811 122-52-1 2323 122-79-2 2810 123-05-7 1191 123-31-9 2662 123-38-6 1275 123-51-3 1105 123-54-6 2310 123-62-6 2496 123-72-8 1129 123-77-3 3242 123-86-4 1123 123-91-1 1165 123-92-2 1104 124-02-7 2359 124-09-4 2280 124-18-5 2247 124-38-9 1013 124-40-3 1032 124-41-4 1431 124-58-3 1557 124-63-0 3246 12427-38-2 2210 126-30-7 1325 126-75-0 3018 126-98-7 3079 126-99-8 1991 127-18-4 1897 1300-73-8 1711 1302-42-7 2812 1303-28-2 1559 1304-28-5 1884 1304-29-6 1449 1305-62-0 1759 1305-78-8 1910 1306-19-0 2570 1309-64-4 1549 131-17-9 2810 131-52-2 2567 1310-58-3 1813 1310-65-2 2680 1310-66-3 2680 1310-73-2 1823 1312-73-8 1382 1313-99-1 2811 1314-34-7 3285 1314-56-3 1807 1314-62-1 2862 1314-84-7 1714 1317-36-8 3288 13171-21-6 3018 1319-77-3 2076, 1321-65-9 2811 1327-53-3 1561 133-06-2 2773 1333-74-0 1049 1333-82-0 1463 1333-86-4 1361 1336-21-6 2672 1338-02-9 3009 13463-39-3 1259 13463-40-6 1994 13477-00-4 1445 13494-80-9 2811 137-26-8 2771 137-30-4 2771 137-32-6 1105 13768-86-0 2928 13952-84-6 1992 140-88-5 1917 141-32-2 2348 141-43-5 2491 141-66-2 3018 141-78-6 1173 141-79-7 1229 142-04-1 1548 142-28-9 1992 142-62-1 2829 142-82-5 1206 142-96-1 1149 143-10-2 1228 143-33-9 1689 144-49-0 2642 144-62-7 2923 14484-64-1 2771 14977-61-8 1758 151-50-8 1680 151-56-4 1185 151-67-7 1610 1563-66-2 2757 1569-69-3 3054 15950-66-0 2020 15972-60-8 2588 1600-27-7 1629 16219-75-3 1993 1634-04-4 2398 16752-77-5 2757 16871-90-2 2655 16872-11-0 1775 1689-83-4 2588 16893-85-9 2674 16961-83-4 1778 17702-41-9 1868 17804-35-2 2757 1836-75-5 2779 1897-45-6 2588 1910-42-5 2781 1912-24-9 2763 1912-26-1 2763 1918-00-9 2769 19287-45-7 1911 19624-22-7 1380 19750-95-9 2588 2032-59-9 2757 2050-92-2 2841 20816-12-0 2471 20859-73-8 1397 2104-64-5 2783 21908-53-2 1641 22224-92-6 2783 2243-62-1 None 2303-17-5 2757 2310-17-0 2783 2425-06-1 2773 25013-15-4 2618 25057-89-0 2588 25154-52-3 3082 25154-54-5 1597 2524-04-1 2751 25321-14-6 2038 25340-17-4 2049 2551-62-4 1080 26628-22-8 1687 2807-30-9 1993 2813-95-8 3014 28434-00-6 2902 2855-13-2 2289 287-92-3 1146 2893-78-9 2465 2921-88-2 2783 298-00-0 2783 299-86-5 2783 300-76-5 3018 301-04-2 1616 302-01-2 2029 302-17-0 2811 30525-89-4 2213 309-00-2 2761 3173-53-3 2488 3209-22-1 1578 3268-49-3 2785 333-41-5 3018 3333-67-3 2811 3383-96-8 2783 3452-97-9 n.o.s 353-50-4 2417 353-59-3 1974 35400-43-2 3018 3689-24-5 1704 3710-30-3 2309 3724-65-0 2823 3811-04-9 1485 39300-45-3 3013 3982-91-0 1837 4098-71-9 2290 4109-96-0 2189 4170-30-3 1143 420-04-2 3276 431-03-8 2346 479-45-8 0208 485-31-4 2779 50-00-0 1198 50-29-3 2761 50-78-2 2811 504-29-0 2671 506-68-3 1889 506-77-4 1589 506-78-5 1588 506-93-4 1467 506-96-7 1716 507-09-5 2436 51-28-5 1320 513-36-0 1127 51630-58-1 2902 52-51-7 3241 52-68-6 2783 52315-07-8 2902 52645-53-1 2902 528-29-0 1597 52918-63-5 2588 532-27-4 1697 5329-14-6 2967 533-74-4 2588 54-11-5 1654 540-59-0 1150 540-84-1 1262 541-41-3 1182 542-88-1 2249 542-92-7 1993 55-38-9 3018 55-63-0 0143 55-68-5 1895 554-00-7 1590 554-13-2 2811 556-52-5 2810 558-13-4 2516 56-23-5 1846 56-35-9 3020 56-38-2 3018 56-72-4 3027 563-12-2 3018 563-80-4 2397 57-06-7 1545 57-14-7 1163 57-24-9 1692 57-57-8 2810 57-74-9 2996 58-08-2 1544 58-89-9 2761 583-59-5 2617 583-78-8 2020 584-02-1 1105 584-79-2 2902 584-84-9 2078 59-50-7 2669 590-01-2 1914 590-18-1 1012 591-35-5 2020 591-78-6 1224 592-01-8 1575 592-27-8 1262 592-41-6 2370 592-57-4 1993 592-84-7 1128 594-42-3 1670 60-24-2 2966 60-29-7 1155 60-34-4 1244 60-35-5 2811 60-41-3 1692 60-51-5 2783 60-57-1 2761 602-01-7 2038 606-20-2 2038 608-27-5 1590 608-31-1 1590 608-73-1 2761 61-82-5 2588 610-39-9 2038 610-40-2 1577 611-06-3 1578 611-15-4 2618 6164-98-3 2588 61789-51-3 2001 61790-14-5 2810 62-38-4 1674 62-53-3 1547 62-55-5 2811 62-56-6 2810 62-73-7 3018 62-74-8 2629 62-75-9 2810 622-45-7 2243 622-97-9 2618 624-64-6 1012 624-83-9 2480 626-38-0 1104 626-93-7 2282 628-63-7 1104 63-25-2 2757 630-08-0 1016 63989-69-5 1607 64-17-5 1170 64-18-6 1779 64-19-7 2789 64-67-5 1594 640-15-3 3018 6484-52-2 1942 65-30-5 1658 65-31-6 1659 66-81-9 2588 67-56-1 1230 67-63-0 1219 67-64-1 1090 67-66-3 1888 674-82-8 2521 68-12-2 2265 68085-85-8 2902 681-84-5 2606 6834-92-0 1759 68694-11-1 2588 6923-22-4 2783 70-30-4 2875 71-23-8 1274 71-36-3 1120 71-41-0 1105 71-43-2 1114 71-55-6 2831 732-11-6 2783 74-82-8 1971 74-83-9 1062 74-84-0 1035 74-85-1 1962 74-86-2 1001 74-87-3 1063 74-89-5 1061 74-90-8 1051 74-93-1 1064 74-95-3 2664 74-97-5 1887 74-98-6 1978 74-99-7 1954 7439-90-9 1970 7439-92-1 3288 7439-93-2 1415 7439-95-4 1418 7439-95-4 1869 7439-97-6 2809 7440-01-9 1065 7440-16-6 3089 7440-28-0 1707 7440-29-1 2975 7440-36-0 2871 7440-37-1 1951 7440-38-2 1558 7440-39-3 1400 7440-41-7 1567 7440-43-9 2570 7440-44-0 1361 7440-48-4 3178 7440-58-6 2545 7440-59-7 1963 7440-63-3 2036 7440-66-6 1436 7440-70-2 1401 7446-08-4 2811 7446-09-5 1079 7446-18-6 1707 7487-94-7 1624 75-00-3 1037 75-01-4 1086 75-02-5 1860 75-04-7 1036 75-05-8 1648 75-07-0 1089 75-08-1 2363 75-09-2 1593 75-15-0 1131 75-18-3 1164 75-20-7 1402 75-21-8 1040 75-25-2 2515 75-31-0 1221 75-34-3 2362 75-35-4 1303 75-36-5 1717 75-38-7 1959 75-43-4 1029 75-44-5 1076 75-45-6 1018 75-46-7 1984 75-50-3 1083 75-52-5 1261 75-54-7 1242 75-55-8 1921 75-56-9 1280 75-63-8 1009 75-65-0 1120 75-69-4 1017 75-71-8 1028 75-72-9 1022 75-73-0 1982 75-74-1 1649 75-77-4 1298 75-79-6 1250 75-86-5 1541 75-91-2 3109 75-98-9 1759 7550-45-0 1838 7580-67-8 1414 759-94-4 2992 76-15-3 1020 76-44-8 2761 7601-89-0 1502 7616-94-6 3083 7631-99-4 1498 7632-00-0 1500 7637-07-2 1008 7646-78-8 1827 7647-01-0 1050 7664-38-2 1805 7664-39-3 1052 7664-41-7 1005 7664-93-9 1830 768-52-5 2810 7681-52-9 1791 7697-37-2 2031 77-58-7 2788 77-78-1 1595 7704-34-9 1350 7719-12-2 1809 7722-84-1 2015 7723-14-0 1381 7726-95-6 1744 7727-37-9 1066 7727-37-9 1977 7727-43-7 1564 7757-79-1 1486 7758-01-2 1484 7758-09-0 1488 7761-88-8 1493 7772-99-8 2923 7775-09-9 1495 7778-43-0 1685 7778-44-1 1573 7778-74-7 1489 7779-88-6 1514 7782-41-4 1045 7782-44-7 1072 7782-44-7 1073 7782-49-2 2658 7782-50-5 1017 7782-65-2 2192 7783-00-8 2811 7783-06-4 1053 7783-07-5 2202 7783-35-9 1645 7783-41-7 2190 7783-47-3 3288 7783-54-2 2451 7783-61-1 1859 7783-70-2 1732 7783-79-1 2194 7783-81-5 2978 7784-34-1 1560 7784-40-9 1617 7784-41-0 1677 7784-42-1 2188 7784-44-3 1546 7786-34-7 3018 7789-06-2 2811 7789-30-2 1745 7789-38-0 1494 7790-91-2 1749 7791-23-3 2879 7791-25-5 1834 78-00-2 1649 78-10-4 1292 78-30-8 2574 78-34-2 3018 78-78-4 1265 78-79-5 1218 78-81-9 1214 78-83-1 1212 78-84-2 2045 78-85-3 2396 78-87-5 1279 78-90-0 2258 78-92-2 1120 78-93-3 1193 78-95-5 1695 78-96-6 2735 78-99-9 1992 7803-51-2 2199 7803-52-3 2676 7803-62-5 2203 786-19-6 3018 79-00-5 3082 79-01-6 1710 79-04-9 1752 79-06-1 2074 79-07-2 2811 79-09-4 1848 79-10-7 2218 79-11-8 1751 79-20-9 1231 79-22-1 1238 79-24-3 2842 79-27-6 2504 79-34-5 1702 79-36-7 1765 79-41-4 2531 79-43-6 1764 79-46-9 2608 80-15-9 3109 80-62-6 1247 8001-35-2 2761 8008-20-6 1223 8018-01-7 2771 8052-41-3 1268 81-81-2 3027 811-97-2 3159 818-08-6 3146 822-06-0 2281 83-79-4 2588 836-30-6 1325 84-74-2 3082 85-44-9 2214 86-50-0 2783 86-88-4 1651 867-27-6 3018 87-61-6 2811 87-66-1 2811 87-68-3 2279 87-86-5 3155 873-66-5 2618 88-06-2 2020 88-72-2 1664 88-73-3 1578 88-74-4 1661 88-85-7 2779 88-89-1 0154 89-98-5 1760 90-04-0 2431 90-30-2 2811 90035-08-8 3027 91-22-5 2656 91-64-5 2811 91465-08-6 2588 919-86-8 3018 92-67-1 2811 92-87-5 1885 93-58-3 2938 93-65-2 2765 93-76-5 2765 933-75-5 2020 933-78-8 2020 935-95-5 2020 94-36-0 3102 94-74-6 2765 94-75-7 2765 944-22-9 3018 95-47-6 1307 95-48-7 2076 95-50-1 1591 95-51-2 2019 95-53-4 1708 95-55-6 2512 95-57-8 2021 95-76-1 1590 95-80-7 1709 95-82-9 1590 95-95-4 2020 95266-40-3 2588 957-51-7 2588 96-12-8 2872 96-14-0 1208 96-18-4 2810 96-22-0 1156 96-33-3 1919 97-00-7 1577 97-02-9 1596 97-63-2 2277 98-00-0 2874 98-01-1 1199 98-05-5 1557 98-07-7 2226 98-82-8 1918 98-83-9 2303 98-87-3 1886 98-88-4 1736 98-95-3 1662 99-09-2 1661 99-54-7 1578 99-65-0 1597 99-87-6 2046 99-97-8 2810 99-99-0 1664 999-61-1 1760 --- NEW FILE: Makefile.am --- bindir=${prefix}/bin/cas bin_SCRIPTS = \ add_nsc_number.pl \ add_numbers.pl \ add_smiles.pl \ add_un_nummers.pl \ check_cas_nummers.pl \ check_doublenumbers.pl \ zoek-UNs-zonder-wml.pl CLEANFILES = $(bin_SCRIPTS) --- NEW FILE: add_nsc_number.pl --- #! /usr/bin/perl -w use strict; use diagnostics; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $CAS_NSC_table_file = "CAS2NSC.txt"; my @CAS_NSC_table; my $nr_NSC_numbers = 0; my $nr_added_NSC_numbers = 0; my $nr_unable = 0; # Read CAS<=>NSC table if (open (FILE, $CAS_NSC_table_file)) { while (<FILE>) { chomp; push (@CAS_NSC_table, $_); } close (FILE) || die "$CAS_NSC_table_file: $!$/"; } else { die "$CAS_NSC_table_file: $!$/"; } # Loop over all files foreach my $arg (@ARGV) { &add_NSC_number_to_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_NSC_numbers) { print "NSC numbers found : $nr_NSC_numbers$/"; } else { print "No NSC numbers found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_added_NSC_numbers) { print "NSC numbers added : $nr_added_NSC_numbers$/"; print "NSC numbers total : ",$nr_NSC_numbers+$nr_added_NSC_numbers,$/; } else { print "No NSC numbers added.$/"; } sub add_NSC_number_to_file { my $file = shift; my @XML_data; my $changed = ""; print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/CAS-NUMBER/i && /\>(.+?)\</) { $cas_found = "True"; my $CAS_line = $_; my $NSC_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /NSC-NUMBER/) { push (@XML_data, $next_line); $nr_NSC_numbers++; } else { my $NSC_number; if ($NSC_number = &get_NSC_number($CAS_number)) { $NSC_line =~ s/CAS-NUMBER/NSC-NUMBER/; $NSC_line =~ s/\>.*?\</\>$NSC_number\</; push (@XML_data, $NSC_line); $changed = "True"; } push (@XML_data, $next_line); } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "NSC number added...$/"; $nr_added_NSC_numbers++; } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_NSC_number { my $CAS_number = shift; my $NSC_number = ""; foreach my $line (@CAS_NSC_table) { if ($line =~ /^$CAS_number\s*(.*)/) { $NSC_number = $1; } } return $NSC_number; } --- NEW FILE: add_nsc_number.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $CAS_NSC_table_file = "CAS2NSC.txt"; my @CAS_NSC_table; my $nr_NSC_numbers = 0; my $nr_added_NSC_numbers = 0; my $nr_unable = 0; # Read CAS<=>NSC table if (open (FILE, $CAS_NSC_table_file)) { while (<FILE>) { chomp; push (@CAS_NSC_table, $_); } close (FILE) || die "$CAS_NSC_table_file: $!$/"; } else { die "$CAS_NSC_table_file: $!$/"; } # Loop over all files foreach my $arg (@ARGV) { &add_NSC_number_to_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_NSC_numbers) { print "NSC numbers found : $nr_NSC_numbers$/"; } else { print "No NSC numbers found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_added_NSC_numbers) { print "NSC numbers added : $nr_added_NSC_numbers$/"; print "NSC numbers total : ",$nr_NSC_numbers+$nr_added_NSC_numbers,$/; } else { print "No NSC numbers added.$/"; } sub add_NSC_number_to_file { my $file = shift; my @XML_data; my $changed = ""; print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/CAS-NUMBER/i && /\>(.+?)\</) { $cas_found = "True"; my $CAS_line = $_; my $NSC_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /NSC-NUMBER/) { push (@XML_data, $next_line); $nr_NSC_numbers++; } else { my $NSC_number; if ($NSC_number = &get_NSC_number($CAS_number)) { $NSC_line =~ s/CAS-NUMBER/NSC-NUMBER/; $NSC_line =~ s/\>.*?\</\>$NSC_number\</; push (@XML_data, $NSC_line); $changed = "True"; } push (@XML_data, $next_line); } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "NSC number added...$/"; $nr_added_NSC_numbers++; } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_NSC_number { my $CAS_number = shift; my $NSC_number = ""; foreach my $line (@CAS_NSC_table) { if ($line =~ /^$CAS_number\s*(.*)/) { $NSC_number = $1; } } return $NSC_number; } --- NEW FILE: add_numbers.pl --- #! /usr/bin/perl -w use strict; use diagnostics; my $number_type = "SMILES"; # Check command line options if (@ARGV < 1) { print "Usage: $0 [-type <TYPE>] <xml-files>$/"; print "\t<TYPE> = (SMILES | UN[-NUMBER] | NSC[-NUMBER])$/"; exit; } elsif ((@ARGV >= 3) && ($ARGV[0] =~ /^-t/i)) { shift @ARGV; $number_type = shift @ARGV; } # Global variables my $CAS_Lookup_table_file = "CAS2SMILES.txt"; my @CAS_Lookup_table; my $nr_numbers = 0; my $nr_added_numbers = 0; my $nr_unable = 0; my $nr_double = 0; if ($number_type =~ /^SMILES/i) { $number_type = "SMILES"; $CAS_Lookup_table_file = "CAS2SMILES.txt"; } elsif ($number_type =~ /^NSC/i) { $number_type = "NSC-NUMBER"; $CAS_Lookup_table_file = "CAS2NSC.txt"; } elsif ($number_type =~ /^UN/i) { $number_type = "UN-NUMBER"; $CAS_Lookup_table_file = "CAS2UN.txt"; } else { print "Error: unrecognized type $number_type!$/"; print "Usage: $0 [-type <TYPE>] <xml-files>$/"; print "\t<TYPE> = (SMILES | UN[-NUMBER] | NSC[-NUMBER])$/"; exit; } # Read CAS<=><TYPE> table if (open (FILE, $CAS_Lookup_table_file)) { while (<FILE>) { chomp; push (@CAS_Lookup_table, $_); } close (FILE) || die "$CAS_Lookup_table_file: $!$/"; } else { die "$CAS_Lookup_table_file: $!$/"; } # Loop over all files foreach my $arg (@ARGV) { &add_number_to_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_numbers) { print "$number_type strings found : $nr_numbers$/"; } else { print "No $number_type strings found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_double) { print "Double $number_type strings: $nr_double$/" if ($nr_double); } if ($nr_added_numbers) { print "$number_type strings added : $nr_added_numbers$/"; print "$number_type strings total : ",$nr_numbers+$nr_added_numbers,$/; } else { print "No $number_type strings added.$/"; } sub add_number_to_file { my $file = shift; my @XML_data; my $changed = ""; my $number_found = ""; my $double_number = ""; my @numbers = (); print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/CAS-NUMBER/i && /\>(.+?)\</) { $cas_found = "True"; if (!$number_found) { my $CAS_line = $_; my $number_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /$number_type/) { push (@XML_data, $next_line); $number_found = "True"; } else { my $number; if ($number = &get_number($CAS_number)) { $number_line =~ s/CAS-NUMBER/$number_type/; $number_line =~ s/\>.*?\</\>$number\</; push (@numbers, $number); push (@XML_data, $number_line); $changed = "True"; } push (@XML_data, $next_line); } } } elsif (/$number_type/ && /\>(.+?)\</) { push (@numbers, $1); if (!$number_found) { push (@XML_data, $_); $number_found = "True"; } else { $double_number = "True"; } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($double_number) { print "Double $number_type string found!"; $nr_numbers++; $nr_double++; $changed = "True"; } elsif ($number_found) { print "No changes needed...$/"; $nr_numbers++; $changed = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; if ($double_number) { print " Attempting to remove...$/"; } else { print "$number_type number added...$/"; $nr_added_numbers++; } } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_number { my $CAS_number = shift; my $number = ""; foreach my $line (@CAS_Lookup_table) { if ($line =~ /^$CAS_number\s*(.*)/) { $number = $1; } } return $number; } --- NEW FILE: add_numbers.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; my $number_type = "SMILES"; # Check command line options if (@ARGV < 1) { print "Usage: $0 [-type <TYPE>] <xml-files>$/"; print "\t<TYPE> = (SMILES | UN[-NUMBER] | NSC[-NUMBER])$/"; exit; } elsif ((@ARGV >= 3) && ($ARGV[0] =~ /^-t/i)) { shift @ARGV; $number_type = shift @ARGV; } # Global variables my $CAS_Lookup_table_file = "CAS2SMILES.txt"; my @CAS_Lookup_table; my $nr_numbers = 0; my $nr_added_numbers = 0; my $nr_unable = 0; my $nr_double = 0; if ($number_type =~ /^SMILES/i) { $number_type = "SMILES"; $CAS_Lookup_table_file = "CAS2SMILES.txt"; } elsif ($number_type =~ /^NSC/i) { $number_type = "NSC-NUMBER"; $CAS_Lookup_table_file = "CAS2NSC.txt"; } elsif ($number_type =~ /^UN/i) { $number_type = "UN-NUMBER"; $CAS_Lookup_table_file = "CAS2UN.txt"; } else { print "Error: unrecognized type $number_type!$/"; print "Usage: $0 [-type <TYPE>] <xml-files>$/"; print "\t<TYPE> = (SMILES | UN[-NUMBER] | NSC[-NUMBER])$/"; exit; } # Read CAS<=><TYPE> table if (open (FILE, $CAS_Lookup_table_file)) { while (<FILE>) { chomp; push (@CAS_Lookup_table, $_); } close (FILE) || die "$CAS_Lookup_table_file: $!$/"; } else { die "$CAS_Lookup_table_file: $!$/"; } # Loop over all files foreach my $arg (@ARGV) { &add_number_to_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_numbers) { print "$number_type strings found : $nr_numbers$/"; } else { print "No $number_type strings found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_double) { print "Double $number_type strings: $nr_double$/" if ($nr_double); } if ($nr_added_numbers) { print "$number_type strings added : $nr_added_numbers$/"; print "$number_type strings total : ",$nr_numbers+$nr_added_numbers,$/; } else { print "No $number_type strings added.$/"; } sub add_number_to_file { my $file = shift; my @XML_data; my $changed = ""; my $number_found = ""; my $double_number = ""; my @numbers = (); print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/CAS-NUMBER/i && /\>(.+?)\</) { $cas_found = "True"; if (!$number_found) { my $CAS_line = $_; my $number_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /$number_type/) { push (@XML_data, $next_line); $number_found = "True"; } else { my $number; if ($number = &get_number($CAS_number)) { $number_line =~ s/CAS-NUMBER/$number_type/; $number_line =~ s/\>.*?\</\>$number\</; push (@numbers, $number); push (@XML_data, $number_line); $changed = "True"; } push (@XML_data, $next_line); } } } elsif (/$number_type/ && /\>(.+?)\</) { push (@numbers, $1); if (!$number_found) { push (@XML_data, $_); $number_found = "True"; } else { $double_number = "True"; } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($double_number) { print "Double $number_type string found!"; $nr_numbers++; $nr_double++; $changed = "True"; } elsif ($number_found) { print "No changes needed...$/"; $nr_numbers++; $changed = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; if ($double_number) { print " Attempting to remove...$/"; } else { print "$number_type number added...$/"; $nr_added_numbers++; } } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_number { my $CAS_number = shift; my $number = ""; foreach my $line (@CAS_Lookup_table) { if ($line =~ /^$CAS_number\s*(.*)/) { $number = $1; } } return $number; } --- NEW FILE: add_smiles.pl --- #! /usr/bin/perl -w use strict; use diagnostics; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $CAS_SMILES_table_file = "CAS2SMILES.txt"; my @CAS_SMILES_table; my $nr_SMILES_numbers = 0; my $nr_added_SMILES_numbers = 0; my $nr_unable = 0; my $nr_double = 0; # Read CAS<=>SMILES table if (open (FILE, $CAS_SMILES_table_file)) { while (<FILE>) { chomp; push (@CAS_SMILES_table, $_); } close (FILE) || die "$CAS_SMILES_table_file: $!$/"; } else { die "$CAS_SMILES_table_file: $!$/"; } # Loop over all files foreach my $arg (@ARGV) { &add_SMILES_number_to_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_SMILES_numbers) { print "SMILES strings found : $nr_SMILES_numbers$/"; } else { print "No SMILES strings found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_double) { print "Double SMILES strings: $nr_double$/" if ($nr_double); } if ($nr_added_SMILES_numbers) { print "SMILES strings added : $nr_added_SMILES_numbers$/"; print "SMILES strings total : ",$nr_SMILES_numbers+$nr_added_SMILES_numbers,$/; } else { print "No SMILES strings added.$/"; } sub add_SMILES_number_to_file { my $file = shift; my @XML_data; my $changed = ""; my $smiles_found = ""; my $double_smiles = ""; my @smiles = (); print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/CAS-NUMBER/i && /\>(.+?)\</) { $cas_found = "True"; if (!$smiles_found) { my $CAS_line = $_; my $SMILES_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /SMILES/) { push (@XML_data, $next_line); $smiles_found = "True"; } else { my $SMILES_number; if ($SMILES_number = &get_SMILES_number($CAS_number)) { $SMILES_line =~ s/CAS-NUMBER/SMILES/; $SMILES_line =~ s/\>.*?\</\>$SMILES_number\</; push (@XML_data, $SMILES_line); $changed = "True"; } push (@XML_data, $next_line); } } } elsif (/SMILES/ && /\>(.+?)\</) { if (!$smiles_found) { push (@XML_data, $_); $smiles_found = "True"; } else { $double_smiles = "True"; } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($double_smiles) { print "Double SMILES string found!"; $nr_SMILES_numbers++; $nr_double++; $changed = "True"; } elsif ($smiles_found) { print "No changes needed...$/"; $nr_SMILES_numbers++; $changed = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; if ($double_smiles) { print " Attempting to remove...$/"; } else { print "SMILES number added...$/"; $nr_added_SMILES_numbers++; } } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_SMILES_number { my $CAS_number = shift; my $SMILES_number = ""; foreach my $line (@CAS_SMILES_table) { if ($line =~ /^$CAS_number\s*(.*)/) { $SMILES_number = $1; } } return $SMILES_number; } --- NEW FILE: add_smiles.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $CAS_SMILES_table_file = "CAS2SMILES.txt"; my @CAS_SMILES_table; my $nr_SMILES_numbers = 0; my $nr_added_SMILES_numbers = 0; my $nr_unable = 0; my $nr_double = 0; # Read CAS<=>SMILES table if (open (FILE, $CAS_SMILES_table_file)) { while (<FILE>) { chomp; push (@CAS_SMILES_table, $_); } close (FILE) || die "$CAS_SMILES_table_file: $!$/"; } else { die "$CAS_SMILES_table_file: $!$/"; } # Loop over all files foreach my $arg (@ARGV) { &add_SMILES_number_to_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_SMILES_numbers) { print "SMILES strings found : $nr_SMILES_numbers$/"; } else { print "No SMILES strings found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_double) { print "Double SMILES strings: $nr_double$/" if ($nr_double); } if ($nr_added_SMILES_numbers) { print "SMILES strings added : $nr_added_SMILES_numbers$/"; print "SMILES strings total : ",$nr_SMILES_numbers+$nr_added_SMILES_numbers,$/; } else { print "No SMILES strings added.$/"; } sub add_SMILES_number_to_file { my $file = shift; my @XML_data; my $changed = ""; my $smiles_found = ""; my $double_smiles = ""; my @smiles = (); print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/CAS-NUMBER/i && /\>(.+?)\</) { $cas_found = "True"; if (!$smiles_found) { my $CAS_line = $_; my $SMILES_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /SMILES/) { push (@XML_data, $next_line); $smiles_found = "True"; } else { my $SMILES_number; if ($SMILES_number = &get_SMILES_number($CAS_number)) { $SMILES_line =~ s/CAS-NUMBER/SMILES/; $SMILES_line =~ s/\>.*?\</\>$SMILES_number\</; push (@XML_data, $SMILES_line); $changed = "True"; } push (@XML_data, $next_line); } } } elsif (/SMILES/ && /\>(.+?)\</) { if (!$smiles_found) { push (@XML_data, $_); $smiles_found = "True"; } else { $double_smiles = "True"; } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($double_smiles) { print "Double SMILES string found!"; $nr_SMILES_numbers++; $nr_double++; $changed = "True"; } elsif ($smiles_found) { print "No changes needed...$/"; $nr_SMILES_numbers++; $changed = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; if ($double_smiles) { print " Attempting to remove...$/"; } else { print "SMILES number added...$/"; $nr_added_SMILES_numbers++; } } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_SMILES_number { my $CAS_number = shift; my $SMILES_number = ""; foreach my $line (@CAS_SMILES_table) { if ($line =~ /^$CAS_number\s*(.*)/) { $SMILES_number = $1; } } return $SMILES_number; } --- NEW FILE: add_un_nummers.pl --- #! /usr/bin/perl -w use strict; use diagnostics; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $CAS_UN_table_file = "CAS2UN.txt"; my @CAS_UN_table; my $nr_UN_numbers = 0; my $nr_added_UN_numbers = 0; my $nr_unable = 0; # Read CAS<=>UN table if (open (FILE, $CAS_UN_table_file)) { while (<FILE>) { chomp; push (@CAS_UN_table, $_); } close (FILE) || die "$CAS_UN_table_file: $!$/"; } else { die "$CAS_UN_table_file: $!$/"; } # Loop over all files foreach my $arg (@ARGV) { &add_UN_number_to_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_UN_numbers) { print "UN numbers found : $nr_UN_numbers$/"; } else { print "No UN numbers found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_added_UN_numbers) { print "UN numbers added : $nr_added_UN_numbers$/"; print "UN numbers total : ",$nr_UN_numbers+$nr_added_UN_numbers,$/; } else { print "No UN numbers added.$/"; } sub add_UN_number_to_file { my $file = shift; my @XML_data; my $changed = ""; print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/CAS-NUMBER/i && /\>(.+?)\</) { $cas_found = "True"; my $CAS_line = $_; my $UN_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /UN-NUMBER/) { push (@XML_data, $next_line); $nr_UN_numbers++; } else { my $UN_number; if ($UN_number = &get_UN_number($CAS_number)) { $UN_line =~ s/CAS-NUMBER/UN-NUMBER/; $UN_line =~ s/\>.*?\</\>$UN_number\</; push (@XML_data, $UN_line); $changed = "True"; } push (@XML_data, $next_line); } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "UN number added...$/"; $nr_added_UN_numbers++; } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_UN_number { my $CAS_number = shift; my $UN_number = ""; foreach my $line (@CAS_UN_table) { if ($line =~ /^$CAS_number\s*\<\=\>\s*(.*)/) { $UN_number = $1; } } return $UN_number; } --- NEW FILE: add_un_nummers.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $CAS_UN_table_file = "CAS2UN.txt"; my @CAS_UN_table; my $nr_UN_numbers = 0; my $nr_added_UN_numbers = 0; my $nr_unable = 0; # Read CAS<=>UN table if (open (FILE, $CAS_UN_table_file)) { while (<FILE>) { chomp; push (@CAS_UN_table, $_); } close (FILE) || die "$CAS_UN_table_file: $!$/"; } else { die "$CAS_UN_table_file: $!$/"; } # Loop over all files foreach my $arg (@ARGV) { &add_UN_number_to_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_UN_numbers) { print "UN numbers found : $nr_UN_numbers$/"; } else { print "No UN numbers found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_added_UN_numbers) { print "UN numbers added : $nr_added_UN_numbers$/"; print "UN numbers total : ",$nr_UN_numbers+$nr_added_UN_numbers,$/; } else { print "No UN numbers added.$/"; } sub add_UN_number_to_file { my $file = shift; my @XML_data; my $changed = ""; print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/CAS-NUMBER/i && /\>(.+?)\</) { $cas_found = "True"; my $CAS_line = $_; my $UN_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /UN-NUMBER/) { push (@XML_data, $next_line); $nr_UN_numbers++; } else { my $UN_number; if ($UN_number = &get_UN_number($CAS_number)) { $UN_line =~ s/CAS-NUMBER/UN-NUMBER/; $UN_line =~ s/\>.*?\</\>$UN_number\</; push (@XML_data, $UN_line); $changed = "True"; } push (@XML_data, $next_line); } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "UN number added...$/"; $nr_added_UN_numbers++; } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_UN_number { my $CAS_number = shift; my $UN_number = ""; foreach my $line (@CAS_UN_table) { if ($line =~ /^$CAS_number\s*\<\=\>\s*(.*)/) { $UN_number = $1; } } return $UN_number; } --- NEW FILE: check_cas_nummers.pl --- #! /usr/bin/perl -w use strict; use diagnostics; # Check argument line if (@ARGV == 0) { print "Usage: $0 <XML-files>$/"; exit; } # Global variables my $nr_ignored = 0; my $nr_inserted = 0; my $nr_unable = 0; # Loop over all files foreach my $arg (@ARGV) { &check_CAS_and_WOC_numbers_in_file ($arg); } # Print statistics print " ------------------------$/"; if ($nr_ignored + $nr_inserted == 0) { print "All files passed check.$/"; } else { print "Errors ignored : $nr_ignored$/" if ($nr_ignored); print "Unable to changes : $nr_unable$/" if ($nr_unable); print "Check digits inserted: $nr_inserted$/" if ($nr_inserted); } sub check_CAS_and_WOC_numbers_in_file { my $file = shift; my @XML_data; my $changed = ""; print "Processing $file, "; if (open (FILE, $file)) { while (<FILE>) { if (/(CAS|WOC)-NUMBER.*?\>(.+?)\</) { my $type = $1; my $line = $_; my $number = $2; my $check = &check_digit ($number); if (!$check) { print "$/ Wrong check digit or typing error in $type number in $file! Ignoring error...$/ "; $nr_ignored++; #chop ($number); #chop ($number); #$number = &check_digit ($number); #$line =~ s/\>.*?\</\>$number\</; #print "Corrected...$/"; #$nr_corrected++; #$changed = "True"; } elsif ($check =~ /Error/i) { print "$/ $check in $file! Ignoring error...$/ "; $nr_ignored++; } elsif ($check !~ /Correct/i) { print "$/ Missing check digit of $type number in $file!$/ "; $number = &check_digit ($number); $line =~ s/\>.*?\</\>$number\</; $changed = "True"; } push (@XML_data, $line); } else { push (@XML_data, $_); } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die "$file: $!$/"; print "Check digit inserted...$/"; $nr_inserted++; } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } --- NEW FILE: check_cas_nummers.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; # Check argument line if (@ARGV == 0) { print "Usage: $0 <XML-files>$/"; exit; } # Global variables my $nr_ignored = 0; my $nr_inserted = 0; my $nr_unable = 0; # Loop over all files foreach my $arg (@ARGV) { &check_CAS_and_WOC_numbers_in_file ($arg); } # Print statistics print " ------------------------$/"; if ($nr_ignored + $nr_inserted == 0) { print "All files passed check.$/"; } else { print "Errors ignored : $nr_ignored$/" if ($nr_ignored); print "Unable to changes : $nr_unable$/" if ($nr_unable); print "Check digits inserted: $nr_inserted$/" if ($nr_inserted); } sub check_CAS_and_WOC_numbers_in_file { my $file = shift; my @XML_data; my $changed = ""; print "Processing $file, "; if (open (FILE, $file)) { while (<FILE>) { if (/(CAS|WOC)-NUMBER.*?\>(.+?)\</) { my $type = $1; my $line = $_; my $number = $2; my $check = &check_digit ($number); if (!$check) { print "$/ Wrong check digit or typing error in $type number in $file! Ignoring error...$/ "; $nr_ignored++; #chop ($number); #chop ($number); #$number = &check_digit ($number); #$line =~ s/\>.*?\</\>$number\</; #print "Corrected...$/"; #$nr_corrected++; #$changed = "True"; } elsif ($check =~ /Error/i) { print "$/ $check in $file! Ignoring error...$/ "; $nr_ignored++; } elsif ($check !~ /Correct/i) { print "$/ Missing check digit of $type number in $file!$/ "; $number = &check_digit ($number); $line =~ s/\>.*?\</\>$number\</; $changed = "True"; } push (@XML_data, $line); } else { push (@XML_data, $_); } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die "$file: $!$/"; print "Check digit inserted...$/"; $nr_inserted++; } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } --- NEW FILE: check_doublenumbers.pl --- #! /usr/bin/perl -w use strict; use diagnostics; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my @NUMBER_count = (); my @NUMBER_doublecount = (); # Loop over all files foreach my $arg (@ARGV) { &check_numbers_in_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_SMILES_numbers) { print "SMILES strings found : $nr_SMILES_numbers$/"; } else { print "No SMILES strings found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_double) { print "Double SMILES strings: $nr_double$/" if ($nr_double); } if ($nr_added_SMILES_numbers) { print "SMILES strings added : $nr_added_SMILES_numbers$/"; print "SMILES strings total : ",$nr_SMILES_numbers+$nr_added_SMILES_numbers,$/; } else { print "No SMILES strings added.$/"; } sub check_numbers_in_file { my $file = shift; my @XML_data; my $changed = ""; my $doubles_found = ""; my $mismatching_doubles_found = ""; my @NUMBERS = (); print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/INDEX CLASS/i && /\>(.+?)\</) { $number_found = "True"; my $CAS_line = $_; my $SMILES_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /SMILES/) { push (@XML_data, $next_line); $smiles_found = "True"; } else { my $SMILES_number; if ($SMILES_number = &get_SMILES_number($CAS_number)) { $SMILES_line =~ s/CAS-NUMBER/SMILES/; $SMILES_line =~ s/\>.*?\</\>$SMILES_number\</; push (@XML_data, $SMILES_line); $changed = "True"; } push (@XML_data, $next_line); } } } elsif (/SMILES/ && /\>(.+?)\</) { if (!$smiles_found) { push (@XML_data, $_); $smiles_found = "True"; } else { $double_smiles = "True"; } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($double_smiles) { print "Double SMILES string found...$/"; $nr_SMILES_numbers++; $nr_double++; } elsif ($changed && $smiles_found) { print "No changes needed...$/"; $nr_SMILES_numbers++; } elsif ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "SMILES number added...$/"; $nr_added_SMILES_numbers++; } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_SMILES_number { my $CAS_number = shift; my $SMILES_number = ""; foreach my $line (@CAS_SMILES_table) { if ($line =~ /^$CAS_number\s*(.*)/) { $SMILES_number = $1; } } return $SMILES_number; } --- NEW FILE: check_doublenumbers.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my @NUMBER_count = (); my @NUMBER_doublecount = (); # Loop over all files foreach my $arg (@ARGV) { &check_numbers_in_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_SMILES_numbers) { print "SMILES strings found : $nr_SMILES_numbers$/"; } else { print "No SMILES strings found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_double) { print "Double SMILES strings: $nr_double$/" if ($nr_double); } if ($nr_added_SMILES_numbers) { print "SMILES strings added : $nr_added_SMILES_numbers$/"; print "SMILES strings total : ",$nr_SMILES_numbers+$nr_added_SMILES_numbers,$/; } else { print "No SMILES strings added.$/"; } sub check_numbers_in_file { my $file = shift; my @XML_data; my $changed = ""; my $doubles_found = ""; my $mismatching_doubles_found = ""; my @NUMBERS = (); print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/INDEX CLASS/i && /\>(.+?)\</) { $number_found = "True"; my $CAS_line = $_; my $SMILES_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /SMILES/) { push (@XML_data, $next_line); $smiles_found = "True"; } else { my $SMILES_number; if ($SMILES_number = &get_SMILES_number($CAS_number)) { $SMILES_line =~ s/CAS-NUMBER/SMILES/; $SMILES_line =~ s/\>.*?\</\>$SMILES_number\</; push (@XML_data, $SMILES_line); $changed = "True"; } push (@XML_data, $next_line); } } } elsif (/SMILES/ && /\>(.+?)\</) { if (!$smiles_found) { push (@XML_data, $_); $smiles_found = "True"; } else { $double_smiles = "True"; } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($double_smiles) { print "Double SMILES string found...$/"; $nr_SMILES_numbers++; $nr_double++; } elsif ($changed && $smiles_found) { print "No changes needed...$/"; $nr_SMILES_numbers++; } elsif ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "SMILES number added...$/"; $nr_added_SMILES_numbers++; } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_SMILES_number { my $CAS_number = shift; my $SMILES_number = ""; foreach my $line (@CAS_SMILES_table) { if ($line =~ /^$CAS_number\s*(.*)/) { $SMILES_number = $1; } } return $SMILES_number; } --- NEW FILE: zoek-UNs-zonder-wml.pl --- #! /usr/bin/perl -w use strict; use diagnostics; my $debug = ""; my $wmldir = "../../web-docs/data/wml"; my $existsfile = "tmp-nowml-exists"; # Check command line options #if (@ARGV == 0) { # print "Usage: $0 <xml-files>$/"; # exit; #} # Global variables my $CAS_UN_table_file = "CAS-UN.table"; my @CAS_UN_table; my $nr_UN_numbers = 0; my $nr_added_UN_numbers = 0; my $nr_unable = 0; # Read CAS<=>UN table if (open (FILE, $CAS_UN_table_file)) { <FILE>; # skip first line (comment) # while (<FILE>) { chomp; push (@CAS_UN_table, $_); } close (FILE) || die "$CAS_UN_table_file: $!$/"; } else { die "$CAS_UN_table_file: $!$/"; } &create_existskeyfile ("NUMBER"); foreach my $line (@CAS_UN_table) { my ($cas) = split (/\s*<=>\s*/, $line); my $nummatch = &exists (("NUMBER" => $cas)); if ($nummatch) { print "NUMBER found:\t$nummatch$/" if ($debug); } else { print "Dangling CAS/UN pair: $line$/"; } } &remove_existskeyfile ("NUMBER"); sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } --- NEW FILE: zoek-UNs-zonder-wml.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; my $debug = ""; my $wmldir = "../../web-docs/data/wml"; my $existsfile = "tmp-nowml-exists"; # Check command line options #if (@ARGV == 0) { # print "Usage: $0 <xml-files>$/"; # exit; #} # Global variables my $CAS_UN_table_file = "CAS-UN.table"; my @CAS_UN_table; my $nr_UN_numbers = 0; my $nr_added_UN_numbers = 0; my $nr_unable = 0; # Read CAS<=>UN table if (open (FILE, $CAS_UN_table_file)) { <FILE>; # skip first line (comment) # while (<FILE>) { chomp; push (@CAS_UN_table, $_); } close (FILE) || die "$CAS_UN_table_file: $!$/"; } else { die "$CAS_UN_table_file: $!$/"; } &create_existskeyfile ("NUMBER"); foreach my $line (@CAS_UN_table) { my ($cas) = split (/\s*<=>\s*/, $line); my $nummatch = &exists (("NUMBER" => $cas)); if ($nummatch) { print "NUMBER found:\t$nummatch$/" if ($debug); } else { print "Dangling CAS/UN pair: $line$/"; } } &remove_existskeyfile ("NUMBER"); sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } |