[libimdb-commit] CVS: libimdb/demo/indexMovies Makefile,NONE,1.1 README,NONE,1.1 db.c,NONE,1.1 db.h,
Status: Pre-Alpha
Brought to you by:
jveldhuis
|
From: Jerry V. <jve...@us...> - 2003-06-16 04:18:48
|
Update of /cvsroot/libimdb/libimdb/demo/indexMovies
In directory sc8-pr-cvs1:/tmp/cvs-serv11812/demo/indexMovies
Added Files:
Makefile README db.c db.h dumpit-db.pl dumpit.pl main.c
Log Message:
initial checkin
--- NEW FILE: Makefile ---
#
# $Id: Makefile,v 1.1 2003/06/16 04:18:37 jveldhuis Exp $
#
include ../../config.mk
include ../../constants.mk
CPPFLAGS += -I../../include
LIBS= ../../parser/$(BUILD_DIR)/libimdbParser.a \
../../libjlog/$(BUILD_DIR)/libjlog.a \
-ldb \
-lpthread
MYPROGRAM=$(BUILD_DIR)/indexMovies
build:
$(MAKE) PROGRAM=$(MYPROGRAM)
install:; @true;
include ../../rules.mk
--- NEW FILE: README ---
indexMovies
-----------
This program builds the following files:
movies.idx
title<tab>movieID,movieID,...\n
- where title is the title string from the list file (articles not moved).
- movieID can be used to lookup more information in movies.dat file.
- contents are sorted by title.
movies.dat
movieID<tab>movieTypeID<tab>yearDescription<tab>DirectorID,DirectorID,...<tab>ActorID(billing),..\n
- where movieID is a front-zero padded 7 digit number.
- movieTypeID is one of:
isMovie=1,
isTVSeries=2,
isTVMovie=3,
isVideoMovie=4,
isVideoGame=5,
isTVMiniSeries=6
- yearDescription is one of: 4 digit year or 0000 for unknown and possibly followed by a '/' and
some roman numerals.
- where DirectorID is used to lookup the directors name in directors.dat
- the (billing) following the ActorID gives the billing number if known.
- ActorIDs are always sorted in billing order
- similar for ActorIDs in actors.dat
directors.dat
DirectorID<tab>name<tab>movieID,movieID...\n
actors.dat
ActorID<tab>genderID<tab>name<tab>movieID|billing|narrator|charName|charNameAka<tab>movieID...\n
- gender is of course 0 for male, and 1 for female ( I imagine some guesses as some of these)
- billing
- narrator is 0 or 1 depending if (s)he was the narrator
- charName - character name
- charNameAka - some characters have more than one name
--- NEW FILE: db.c ---
/**
* @file
* @verbatim $Id: db.c,v 1.1 2003/06/16 04:18:37 jveldhuis Exp $ @endverbatim
*/
static char rcs_id[] = "$Id: db.c,v 1.1 2003/06/16 04:18:37 jveldhuis Exp $";
#define _USE_STDIO
#define _USE_STDLIB
#define _USE_ERRNO
#define _USE_IO
#define _USE_ASSERT
#define _USE_STDARG
#define _USE_STRINGS
#define _USE_TYPES
#define _USE_CTYPE
#define _USE_MALLOC
#define _USE_LIMITS
#define _USE_TIME
#include "sysincludes.h"
#include "jlog/dicerr.h"
#include "libimdb/parser.h"
#include "db.h"
#include "/usr/include/db.h"
/*#include "/usr/local/BerkeleyDB.4.1/include/db.h"*/
struct imdbDB_s
{
DB *dbp;
char *dir;
imdbParser_t *parser;
};
#if 0
static inline void
writeIt(int fd, char *in)
{
char *i=in;
while(*i != '\0') {
if ( isalnum(*i) || *i == '_' || *i=='.' || *i=='-') {
write(fd, i, 1);
}
else {
char buf[100];
sprintf(buf, "%%%02x", (unsigned int)*i);
write(fd, buf, strlen(buf));
}
i++;
}
}
#endif
/**
* move ending ', The', articles to front of title name.
* articles include The,A,Une,Les,L\',Le,La,El,Das.
*/
static void
titleMoveArticlesToFront(const char *title, char *output)
{
int len=strlen(title);
const char *end=title+len;
if ( strncmp(end-5, ", The", 5)==0 ||
strncmp(end-5, ", Une", 5)==0 ||
strncmp(end-5, ", Les", 5)==0 ||
strncmp(end-5, ", Das", 5)==0 ) {
output[0]=*(end-3);
output[1]=*(end-2);
output[2]=*(end-1);
output[3]=' ';
strncpy(output+4, title, len-5);
output[len-1]='\0';
}
else if ( strncmp(end-3, ", A", 3)==0 ) {
output[0]='A';
output[1]=' ';
strncpy(output+2, title, len-3);
output[len-1]='\0';
}
else if ( strncmp(end-4, ", L\'", 4)==0 ||
strncmp(end-4, ", Le", 4)==0 ||
strncmp(end-4, ", La", 4)==0 ||
strncmp(end-4, ", El", 4)==0 ) {
output[0]=*(end-2);
output[1]=*(end-1);
output[2]=' ';
strncpy(output+3, title, len-4);
output[len-1]='\0';
}
else {
strcpy(output, title);
}
}
static int
listentry_cb(jlogHandle_t *logh,
imdbParser_t *imdbh,
const ListFile_e lf,
const void *entry)
{
imdbDB_t *dbh=imdbParser_getUserHandle(logh, imdbh);
switch(lf) {
case LIST_MOVIES:
{
const struct MovieInList_s *data=entry;
char buf[1024];
DBT key, keydata;
int ret;
memset(&key, 0, sizeof(key));
memset(&keydata, 0, sizeof(keydata));
key.data = data->label;
key.size = sizeof(char)*strlen(data->label);
switch(data->type) {
case isMovie: sprintf(buf, "1");
break;
case isTVSeries: sprintf(buf, "2");
break;
case isTVMovie: sprintf(buf, "3");
break;
case isVideoMovie: sprintf(buf, "4");
break;
case isVideoGame: sprintf(buf, "5");
break;
case isTVMiniSeries:sprintf(buf, "6");
break;
}
keydata.data = buf;
keydata.size = sizeof(char)*strlen(buf);
switch (ret=dbh->dbp->put(dbh->dbp, NULL, &key, &keydata, DB_NOOVERWRITE)) {
case 0:
/*fprintf(stderr, "db: %s: key stored.\n", (char *)key.data);*/
break;
case DB_KEYEXIST:
/*fprintf(stderr, "db: %s: key previously stored.\n",
(char *)key.data);*/
/*return(-1);*/
break;
default:
dbh->dbp->err(dbh->dbp, ret, "DB->put");
return(-1);
}
if ( data->number > 1000 ) {
/* stop */
return(-1);
}
}
break;
case LIST_DIRECTORS:
{
const struct DirectorInList_s *data=entry;
}
break;
case LIST_ACTORS:
{
const struct ActorInList_s *data=entry;
}
break;
case LIST_ACTRESSES:
{
const struct ActorInList_s *data=entry;
}
break;
}
return(0);
}
imdbDB_t *
imdbDB_create(jlogHandle_t *logh,
const char *dbsDir,
const char *dbDir)
{
imdbDB_t *dbh;
dbh=malloc(sizeof(struct imdbDB_s));
if ( dbh == NULL ) {
DICE_BAD_MEM_FAILURE(logh, imdbDB_t *, NULL);
}
dbh->dbp=NULL;
dbh->dir=strdup(dbDir);
dbh->parser=imdbParser_open(logh, dbsDir, listentry_cb);
if ( dbh->parser == NULL ) {
free(dbh->dir);
free(dbh);
DIC_FAILURE(logh, imdbDB_t *, NULL);
}
imdbParser_setUserHandle(logh, dbh->parser, dbh);
return(dbh);
}
int
imdbDB_destroy(jlogHandle_t *logh,
imdbDB_t *dbh)
{
imdbParser_close(logh, dbh->parser);
if ( dbh->dbp != NULL ) {
dbh->dbp->close(dbh->dbp, 0);
}
free(dbh->dir);
free(dbh);
DIC_SUCCESS(logh, int, 0);
}
int
imdbDB_indexMovies(jlogHandle_t *logh,
imdbDB_t *dbh)
{
char file[1024];
int ret;
sprintf(file, "%s/moviedb.idx", dbh->dir);
/*
if ((ret = db_create(&(dbh->dbp), NULL, 0)) != 0) {
DICE_FILE_OPEN_FAILURE(logh, "db_create", db_strerror(ret), int, -1);
}
if ((ret = dbh->dbp->open(dbh->dbp, NULL, file, NULL, DB_BTREE, DB_CREATE, 0664)) != 0) {
dbh->dbp->close(dbh->dbp, 0);
dbh->dbp=NULL;
DICE_FILE_OPEN_FAILURE(logh, file, db_strerror(ret), int, -1);
}
*/
if ((ret = db_create(&(dbh->dbp), NULL, 0)) != 0) {
fprintf(stderr, "db_create: %s\n", db_strerror(ret));
exit (1);
}
if ((ret = dbh->dbp->open(dbh->dbp, "moviedb", NULL, DB_BTREE, DB_CREATE, 0664)) != 0) {
dbh->dbp->err(dbh->dbp, ret, "%s", file);
exit(1);
}
/* -1 means stop, -2 means abort */
if ( imdbParser_moviesParse(logh, dbh->parser) < -1 ) {
dbh->dbp->close(dbh->dbp, 0);
dbh->dbp=NULL;
unlink(file);
DIC_FAILURE(logh, int, -1);
}
dbh->dbp->close(dbh->dbp, 0);
dbh->dbp=NULL;
return(0);
}
--- NEW FILE: db.h ---
/**
* @file
* @verbatim $Id: db.h,v 1.1 2003/06/16 04:18:37 jveldhuis Exp $ @endverbatim
*/
#ifndef IMDBDB_H
# define IMDBDB_H
typedef struct imdbDB_s imdbDB_t;
imdbDB_t *
imdbDB_create(jlogHandle_t *logh,
const char *dbsDir,
const char *dbDir);
int
imdbDB_destroy(jlogHandle_t *logh,
imdbDB_t *dbh);
int
imdbDB_indexMovies(jlogHandle_t *logh,
imdbDB_t *dbh);
#endif /* IMDBDICT_H */
--- NEW FILE: dumpit-db.pl ---
#!/usr/bin/perl -w
#use warnings;
use strict;
#use BerkeleyDB;
use DB_File;
my %hash;
my $db=tie %hash, 'BerkeleyDB::Btree', Filename => "moviedb", Flags => DB_RDONLY
or die "failed:$!";
# Install DBM Filters
#$db->filter_fetch_key ( sub { s/\0$// } ) ;
#$db->filter_store_key ( sub { $_ .= "\0" } ) ;
#$db->filter_fetch_value( sub { s/\0$// } ) ;
#$db->filter_store_value( sub { $_ .= "\0" } ) ;
print "size=".scalar(keys %hash)."\n";
$hash{key}="data";
for my $key ( keys %hash ) {
print "$key: ".$hash{$key}."\n";
}
undef($db);
untie %hash;
exit(0);
--- NEW FILE: dumpit.pl ---
#!/usr/bin/perl -w
#use warnings;
use strict;
use BerkeleyDB;
#use DB_File;
my %hash;
my $db=tie %hash, 'BerkeleyDB::Btree', Filename => "moviedb", Flags => DB_RDONLY
or die "failed:$!";
# Install DBM Filters
#$db->filter_fetch_key ( sub { s/\0$// } ) ;
#$db->filter_store_key ( sub { $_ .= "\0" } ) ;
#$db->filter_fetch_value( sub { s/\0$// } ) ;
#$db->filter_store_value( sub { $_ .= "\0" } ) ;
print "size=".scalar(keys %hash)."\n";
$hash{key}="data";
for my $key ( keys %hash ) {
print "$key: ".$hash{$key}."\n";
}
undef($db);
untie %hash;
exit(0);
--- NEW FILE: main.c ---
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "sysincludes.h"
#include "jlog/dicerr.h"
#include "jlog/jlog2File.h"
#include "db.h"
jlogHandle_t *logh;
imdbDB_t *dbh;
jlog2File_t *l2f;
int
main(int argc, char **argv)
{
l2f=jlog2File_create("-");
assert(jlogCallback_setNotificationLevel(jlog2File_getJLogCallbackHandle(l2f),
MOD_ALL,
JLOGT_DEBUG)==0);
logh=jlog_openNew();
jlog_registerEventCallback(logh, jlog2File_getJLogCallbackHandle(l2f));
//log_setAllModuleCallbackLevels(logh, LOGT_DEBUG);
//log_setAllModuleDebugEnterExit(logh, 1);
dbh=imdbDB_create(logh, "../dumpMovies/dbs", ".");
if ( dbh != NULL ) {
if ( 1 ) {
if (imdbDB_indexMovies(logh, dbh) == 0 ) {
}
}
/*if ( 0 ) {
if ( imdbParser_directorsParse(logh, parser) == 0 ) {
}
}
if ( 0 ) {
if ( imdbParser_actorsParse(logh, parser) == 0 ) {
}
}
if ( 0 ) {
if ( imdbParser_actressesParse(logh, parser) == 0 ) {
}
}*/
imdbDB_destroy(logh, dbh);
}
jlog_close(logh);
jlog2File_destroy(l2f);
exit(0);
}
|