From: <Ba...@us...> - 2010-06-08 05:46:50
|
Revision: 395 http://ggnfs.svn.sourceforge.net/ggnfs/?rev=395&view=rev Author: Batalov Date: 2010-06-08 05:46:44 +0000 (Tue, 08 Jun 2010) Log Message: ----------- minor Modified Paths: -------------- trunk/contrib/remdups/remdups.c trunk/contrib/remdups/remdups4.c Modified: trunk/contrib/remdups/remdups.c =================================================================== --- trunk/contrib/remdups/remdups.c 2010-06-06 19:37:51 UTC (rev 394) +++ trunk/contrib/remdups/remdups.c 2010-06-08 05:46:44 UTC (rev 395) @@ -47,9 +47,8 @@ int numbad=0, numdups=0, numuniq=0,numskip=0; int DIM=1000; - if ((argc != 3)&&(argc != 4)) { - printf("\nusage: %s [table_size] <ggnfs_file> <out_file>\n\n", argv[0]); + printf("\nusage: %s [DIM] <ggnfs_file> <out_file>\n\n", argv[0]); exit(-1); } @@ -65,7 +64,7 @@ exit(-1); } - printf("Lookup table size (1000-3000 recommended): "); + printf("Lookup table size (5 per million relations recommended): "); scanf("%d",&DIM); } else { infile = fopen(argv[2], "r"); @@ -86,11 +85,13 @@ printf("cannot open badfile\n"); exit(-1); } - - if ((DIM<50)||(DIM>100000)) { - printf("DIM should be between 50 and 100000!\n"); + if (DIM<20) DIM=20; +#if 0 + if (DIM>100000) { + printf("DIM should be between 20 and 100000!\n"); exit(1); } +#endif /* initialize arrays */ arra = (uint64**)malloc(MDVAL * sizeof(uint64 *)); @@ -112,7 +113,7 @@ fgets(buf, sizeof(buf), infile); char *tmp, *field_end; uint64 a; - int32 i, j, p; + int32 i, j, p, cpos; if (buf[0] == '#') { fprintf(outfile, "%s", buf); @@ -128,11 +129,12 @@ /* Hash used to be in a and b bins; it worked well for SNFS */ /* However, for gnfs, the bins were very shallow */ - /* New hash value a is a nonsensical base-11 hybrid of both a and b -SB 2009 */ + /* New hash value a is a hybrid of both a and b -SB 2009 */ + cpos = 0; if(*tmp=='-') {a=10; tmp++;} else a=0; for( ; *tmp ; tmp++) { - if (isdigit(*tmp)) a=11*a+(*tmp-'0'); - else if(*tmp==',') a=11*a+10; + if (isdigit(*tmp)) a=10*a+(*tmp-'0'); + else if(*tmp==',' && !cpos) cpos = tmp-buf; /* must be only one comma between a,b */ else { if(*tmp==':') { if ((tmp-2>buf && tmp[-2]==',' && tmp[-1]=='0') || strccnt(tmp+1,':')==1) @@ -143,6 +145,7 @@ goto skip_; } } + a=4*a+(cpos&3); /* the "comma position" */ p=a%MDVAL; for (i=0;i<n[p];i++) if (a==arra[p][i]) { numdups++; goto skip_; } Modified: trunk/contrib/remdups/remdups4.c =================================================================== --- trunk/contrib/remdups/remdups4.c 2010-06-06 19:37:51 UTC (rev 394) +++ trunk/contrib/remdups/remdups4.c 2010-06-08 05:46:44 UTC (rev 395) @@ -55,8 +55,8 @@ if (argc == 2) { DIM=atoi(argv[1]); } else { - fprintf(stderr,"\nusage: cat relations.file(s) | %s table_size > out_file \n" - "\t table_size is a number (1000-3000 recommended)\n\n", argv[0]); + fprintf(stderr,"\nusage: cat relations.file(s) | %s DIM > out_file \n" + "\t DIM is a number (5 per million relations recommended)\n\n", argv[0]); exit(-1); } @@ -65,11 +65,13 @@ fprintf(stderr,"cannot open badfile\n"); exit(-1); } - - if ((DIM<50)||(DIM>100000)) { - fprintf(stderr,"DIM should be between 50 and 100000!\n"); + if (DIM<20) DIM=20; +#if 0 + if (DIM>100000) { + printf("DIM should be between 20 and 100000!\n"); exit(1); } +#endif /* initialize arrays */ arra = (uint64**)malloc(MDVAL * sizeof(uint64 *)); @@ -90,7 +92,7 @@ while (fgets(buf, sizeof(buf), stdin)) { char *tmp, *field_end; uint64 a; - int32 i, j, p; + int32 i, j, p, cpos; if (buf[0] == '#') { printf("%s", buf); @@ -106,11 +108,12 @@ /* Hash used to be in a and b bins; it worked well for SNFS */ /* However, for gnfs, the bins were very shallow */ - /* New hash value a is a nonsensical base-11 hybrid of both a and b -SB 2009 */ + /* New hash value a is a hybrid of both a and b -SB 2009 */ + cpos = 0; if(*tmp=='-') {a=10; tmp++;} else a=0; for( ; *tmp ; tmp++) { - if (isdigit(*tmp)) a=11*a+(*tmp-'0'); - else if(*tmp==',') a=11*a+10; + if (isdigit(*tmp)) a=10*a+(*tmp-'0'); + else if(*tmp==',' && !cpos) cpos = tmp-buf; /* must be only one comma between a,b */ else { if(*tmp==':') { if ((tmp-2>buf && tmp[-2]==',' && tmp[-1]=='0') || strccnt(tmp+1,':')==1) @@ -121,6 +124,7 @@ goto skip_; } } + a=4*a+(cpos&3); /* the "comma position" */ p=a%MDVAL; for (i=0;i<n[p];i++) if (a==arra[p][i]) { numdups++; goto skip_; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |