#203 HUGE memory usage on htload 2GB wordlist

Include_in_3.2
open
nobody
httools (7)
5
2003-12-04
2003-12-04
Anonymous
No

Hi.
I have 2GB worddump and I tried to load it with htload.
It used 3GB RAM and crashed

here is part of the config.

wordlist_compress_zlib: false
wordlist_compress: false
compression_level: 9
wordlist_cache_size: 200000000
wordlist_page_size: 16384

michal.mihalik@pobox.sk

Discussion

  • Nobody/Anonymous

    Logged In: NO

    solved. you need put into HtWordList.cc at the end there is
    Load of all word at once into memory.
    put there a counter and every X time the loop runs
    put Flush(); ... works ...

     
  • Nobody/Anonymous

    Logged In: NO

    Here is the reapired function.... sorry no diff.. (the value
    i is the added thing and the if whith it.

    Michal

    //*****************************************************************************
    // int HtWordList::Load(char* filename)
    //
    // Read in an ascii version of the word database in <filename>
    //
    int HtWordList::Load(const String& filename)
    {
    FILE *fl;
    String data;
    HtWordReference *next;
    int i;

    if (!isopen) {
    cerr << "WordList::Load: database must be opened first\n";
    return NOTOK;
    }

    if((fl = fopen(filename, "r")) == 0) {
    perror(form("WordList::Load: opening %s for reading",
    (const char*)filename));
    return NOTOK;
    }

    if (HtWordReference::LoadHeader(fl) != OK)
    {
    cerr << "WordList::Load: header is not correct\n";
    return NOTOK;
    }

    i = 0;
    while (data.readLine(fl))
    {
    i++;
    next = new HtWordReference;
    if (next->Load(data) != OK)
    {
    delete next;
    continue;
    }

    words->Add(next);
    if ( i > 10000 )
    {
    i=0;
    Flush();
    }
    }
    Flush();
    fclose(fl);

    return OK;
    }

     
  • Nobody/Anonymous

    Logged In: NO

    Hmmm

    //*****************************************************************************
    // int HtWordList::Load(char* filename)
    //
    // Read in an ascii version of the word database in <filename>
    //
    int HtWordList::Load(const String& filename)
    {
    FILE *fl;
    String data;
    HtWordReference *next;
    int i;

    if (!isopen) {
    cerr << "WordList::Load: database must be opened first\n";
    return NOTOK;
    }

    if((fl = fopen(filename, "r")) == 0) {
    perror(form("WordList::Load: opening %s for reading",
    (const char*)filename));
    return NOTOK;
    }

    if (HtWordReference::LoadHeader(fl) != OK)
    {
    cerr << "WordList::Load: header is not correct\n";
    return NOTOK;
    }

    i = 0;
    while (data.readLine(fl))
    {
    i++;
    next = new HtWordReference;
    if (next->Load(data) != OK)
    {
    delete next;
    continue;
    }

    words->Add(next);
    if ( i > 10000 )
    {
    i=0;
    Flush();
    }
    }
    Flush();
    fclose(fl);

    return OK;
    }

     

Log in to post a comment.