From: <jj...@us...> - 2004-03-05 15:42:51
|
Update of /cvsroot/ngetsuite/ngetsuite/ngetsuite In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18152/ngetsuite Modified Files: articles.rb core.rb downloader.rb group.rb Log Message: added -T option (for test) 31337 release detection Index: articles.rb =================================================================== RCS file: /cvsroot/ngetsuite/ngetsuite/ngetsuite/articles.rb,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** articles.rb 20 Feb 2004 16:44:39 -0000 1.6 --- articles.rb 5 Mar 2004 15:28:04 -0000 1.7 *************** *** 38,156 **** Db.autotable(Release, 'releases') - - def initialize(groupindex, artindex, fetch_from_db = false ) - @files = Array.new - @_artindex = artindex - @_groupindex = groupindex - @_size = 0 - @_nbexpected = 0 - @_nbactual = 0 - @lastnb = 0 - @_time = Time.gm('1980') - update_from_db if fetch_from_db - end ! def pushart(art, pos) ! art._rlsnb = pos ! art._rlsartindex = @_artindex ! @_size += art._size ! @_nbactual += 1 ! @_time = art._time if art._time > @_time ! @_completeness = @_nbactual.to_f / @_nbexpected.to_f * 100.to_f if @_nbexpected > 0 and @_nbactual <= @_nbexpected ! ! if @_rlsmap ! @_rlsmap[pos-1] = '#' if pos > 0 and pos <= @_rlsmap.size end - @files.push art - end - - # Returns the index of current file/nb of files - # if guessed from the subject (0 if not) - def Release.filenb_from_subject(subject) - subject =~ /[\[\(]?(\d+)(?:[\/]|\s*of\s*)(\d+)[\]\)]?/ - - return [$1.to_i, $2.to_i] end ! # Returns the common part between this subject and ! # the reference subject. We work with arrays of words ! def compare_subjects(subject) ! common = Array.new ! i = 0 ! while @refsubject[i] != nil and ! subject[i] != nil and ! @refsubject[i] == subject[i] ! common.push subject[i] ! i += 1 end ! return common ! end ! ! # Magic function to tell if an article belongs to a release ! # It adds it and return true if it does, return false if not ! def add?(art) ! fn = Release.filenb_from_subject(art._subject) ! ! if !@files.empty? ! # Deal with trivial cases ! # if the file sequence number is present and ! # seems correct, we can add the article without ! # further investigation ! return false if art._from != @poster ! if fn[1] > 0 and fn[0] > 0 ! return false if fn[1] != @_nbexpected ! return false if fn[0] < @lastnb ! pushart(art,fn[0]) ! return true end ! else ! # First article: add it, ! # and keep the subject's words ! @refsubject = art._subject.split(' ') ! @poster = art._from ! @_nbexpected = fn[1] if fn[1] > 0 ! @lastnb = fn[0] if fn[0] > 0 ! @_rlsmap = '_' * @_nbexpected ! pushart(art,fn[0]) ! return true end ! # If we're here, the file sequence number ! # ("[xx/xx]"-like thing) was not found in the subject ! # We have to compare it with the reference subject ! # This is ugly and *a little* error prone, but hell... ! ! if @files.size == 1 ! # Second article: update the reference by storing ! # only the common words between the 2 subjects, ! # and add the article if there are any of these ! @refsubject = compare_subjects(art._subject.split(' ')) ! if @refsubject.size > 0 ! pushart(art,fn[0]) ! return true else ! return false end else ! # Already 2 or more articles in the release ! # Add the article if the common words between ! # its subject and the reference *is* the reference ! ! if compare_subjects(art._subject.split(' ')) == @refsubject ! pushart(art,fn[0]) ! return true ! else ! return false ! end end end def insert_to_db(only_mandatory = false) # A release contains at least 3 articles ! return if @files.size < 3 super(only_mandatory) --- 38,177 ---- Db.autotable(Release, 'releases') ! @@rls = nil ! ! attr_accessor :poster, :noidxregex ! ! def Release.indexes(subject) ! if subject =~/[\[\(](\d{1,3})\/(\d{1,3})[\]\)]/ ! return $1.to_i, $2.to_i, $2.length ! elsif subject =~ /(\d{1,3}) *of *(\d{1,3})/ ! return $1.to_i, $2.to_i, $2.length end end ! # construct releases, returns number of releases inserted ! # keeps a list of releases 'in construction' indexed by the number of digits of the expected number of articles (to be able to merge '00'->'09' with '10'->'19' if we encounter '1' beetween) ! # cat 0 is for files with no index found ! def Release.mkreleases(art) ! nb_ins = 0 ! # group begin: initialize ! if art == true ! @@rls = [nil, nil, nil, nil] ! return 0 end ! # group end: terminate ! if art == false ! @@rls.compact.each { |r| ! nb_ins += 1 if r.insert_to_db ! } ! @@rls = nil ! return nb_ins ! end ! # parse subject (to get the article category) ! cur, total, cat = Release.indexes(art._subject) ! # index not found: abracadabra! ! if not cat ! if rls = @@rls[0] ! # if this is the second file, calculate the regex ! # otherwise match against it ! if rls.poster == art._from ! if rls._nbactual == 1 ! # create regex ! match = '' ! (0..(rls.noidxregex.length)).each { |i| ! break unless rls.noidxregex[i] == art._subject[i] ! match += rls.noidxregex[i].chr ! } ! match.sub!(/\d*$/, '') ! rls.noidxregex = Regexp.new '^'+Regexp.escape(match)+'(\d+)' ! end ! if art._subject =~ rls.noidxregex ! cur = $1.to_i ! rls.pushart(art, cur) ! else ! nb_ins += 1 if rls.insert_to_db ! @@rls[0] = nil ! end ! else ! nb_ins += 1 if rls.insert_to_db ! @@rls[0] = nil ! end end ! unless @@rls[0] ! rls = @@rls[0] = Release.new(art._groupindex, art._artindex) ! cur = 1 ! cur = $1.to_i if art._subject =~ /(1\d*)/ ! rls.poster = art._from ! rls.noidxregex = art._subject ! @@rls[0].pushart(art,1) ! end ! return nb_ins end ! # look for an existing release which could accept this article ! # (same poster, nbexpected, and file not already seen) ! if rls = @@rls[cat] ! if (rls.poster==art._from) and (rls._nbexpected==total)#and ((rls.lastnb<cur) or (rls.lastnb==1 and cur==1)) ! if rls._rlsmap[cur-1] == '#' ! NgetSuite::Utils.debug "#{cat}: part #{cur} already there {#{art._subject}}" ! end ! # another art for this release ! rls.pushart(art,cur) else ! # new release: commit this one ! nb_ins += 1 if rls.insert_to_db ! @@rls[cat] = nil end + end + + # if there is no release in this category (or if we just erased it), create a new one with the current article as reference + unless @@rls[cat] + @@rls[cat] = Release.new(art._groupindex, art._artindex) + @@rls[cat].poster = art._from + @@rls[cat]._nbexpected = total + @@rls[cat]._rlsmap = '_' * total + @@rls[cat].pushart(art,cur) + end + return nb_ins + end + + + def initialize(groupindex, artindex, fetch_from_db = false ) + @files = Array.new + @poster = nil + @noidxregex = nil + @_artindex = artindex + @_groupindex = groupindex + if fetch_from_db + update_from_db else ! @_size = 0 ! @_nbactual = 0 ! @_nbexpected = 0 ! @_rlsmap = nil ! @_time = Time.gm('1980') end end + def pushart(art, pos) + art._rlsnb = pos + art._rlsartindex = @_artindex + @_size += art._size + @_nbactual += 1 + @_time = art._time if art._time > @_time + @_completeness = @_nbactual.to_f / @_nbexpected.to_f * 100.to_f if @_nbexpected > 0 and @_nbactual <= @_nbexpected + + @_rlsmap[pos-1]='#' if pos>0 and @_rlsmap and pos<=@_rlsmap.size + @files.push art + end + def insert_to_db(only_mandatory = false) # A release contains at least 3 articles ! return false if @_nbactual < 3 super(only_mandatory) *************** *** 160,163 **** --- 181,185 ---- art.sync_to_db } + return true end *************** *** 180,184 **** sth.fetch do |row| subj, grpidx, rlsmap, nbact, nbtot = *row ! puts "[#{Utils.blue i}] #{Utils.yellow nbact}/#{Utils.yellow nbtot} [#{Utils.green rlsmap}] \"#{subj}\"" i += 1 end --- 202,206 ---- sth.fetch do |row| subj, grpidx, rlsmap, nbact, nbtot = *row ! puts "[#{Utils.blue i}] #{Utils.yellow nbact}/#{Utils.yellow nbtot} [#{Utils.green rlsmap if rlsmap}] \"#{subj}\"" i += 1 end *************** *** 188,192 **** def to_s ! "#{Utils.green @_artindex}: #{@_nbactual}/#{@_nbexpected} [#{Utils.yellow @_rlsmap}]" end end --- 210,214 ---- def to_s ! "#{Utils.green @_artindex}: #{@_nbactual}/#{@_nbexpected} [#{Utils.yellow @_rlsmap if @_rlsmap}]" end end Index: core.rb =================================================================== RCS file: /cvsroot/ngetsuite/ngetsuite/ngetsuite/core.rb,v retrieving revision 1.35 retrieving revision 1.36 diff -C2 -d -r1.35 -r1.36 *** core.rb 25 Feb 2004 22:37:10 -0000 1.35 --- core.rb 5 Mar 2004 15:28:04 -0000 1.36 *************** *** 81,84 **** --- 81,88 ---- end + def test(gid) + Group.getgroup(gid).recognizereleases + end + def update_group(groupid = nil) if !groupid *************** *** 187,190 **** --- 191,196 ---- when 'write-config' $config.save(arg) + when 'test' + test(arg) when 'show-groups' Group.show Index: downloader.rb =================================================================== RCS file: /cvsroot/ngetsuite/ngetsuite/ngetsuite/downloader.rb,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** downloader.rb 27 Feb 2004 13:12:07 -0000 1.26 --- downloader.rb 5 Mar 2004 15:28:04 -0000 1.27 *************** *** 140,146 **** ldebug "Connection to #{$1}..", false when />> 200/ ! ldebug green('Connected') when />> 400/ ! ldebug red('Disconnected') when /<< QUIT/ ldebug blue('Quitted') --- 140,146 ---- ldebug "Connection to #{$1}..", false when />> 200/ ! ldebug green('Connected'), false when />> 400/ ! ldebug red('Disconnected'), false when /<< QUIT/ ldebug blue('Quitted') *************** *** 148,155 **** when 'ignored error (nntp_getline: connection closed unexpectedly)' ldebug "#{red 'Lost connection'}. Reconnecting..." ! when /close\(fast\)$/ when /^error, will try .* again \(server says byebye: (.*)\)/ ! ldebug red("Disconnected (#{$1})") + '. Reconnecting...', false when /^nntp_doarticle: try again./ when /^make_connection/ --- 148,155 ---- when 'ignored error (nntp_getline: connection closed unexpectedly)' ldebug "#{red 'Lost connection'}. Reconnecting..." ! when /close\(fast\)$/ when /^error, will try .* again \(server says byebye: (.*)\)/ ! ldebug red("Disconnected (#{$1})") + '. Reconnecting...' when /^nntp_doarticle: try again./ when /^make_connection/ Index: group.rb =================================================================== RCS file: /cvsroot/ngetsuite/ngetsuite/ngetsuite/group.rb,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** group.rb 27 Feb 2004 13:12:07 -0000 1.40 --- group.rb 5 Mar 2004 15:28:04 -0000 1.41 *************** *** 267,285 **** articles = ArticleList.new("where `groupindex`='#{@_groupindex}' order by `subject`") ! rls = nil ! nb_releases = 0 articles.each { |art| ! rls = Release.new(art._groupindex, art._artindex) if !rls ! if rls.add?(art) == false then ! # New release! Commit the current one to the db ! rls.insert_to_db ! nb_releases += 1 ! ! # And make another ! rls = NgetSuite::Release.new(art._groupindex, art._artindex) ! rls.add? art ! end } ! rls.insert_to_db debug "#{nb_releases} releases inserted in #{Utils.cooltime(Time.now-t)}" --- 267,275 ---- articles = ArticleList.new("where `groupindex`='#{@_groupindex}' order by `subject`") ! nb_releases = Release.mkreleases(true) articles.each { |art| ! nb_releases += Release.mkreleases(art) } ! nb_releases += Release.mkreleases(false) debug "#{nb_releases} releases inserted in #{Utils.cooltime(Time.now-t)}" |