From: Michal L. <lu...@us...> - 2007-03-28 13:05:54
|
Update of /cvsroot/mysqlfs/mysqlfs In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv452 Modified Files: ChangeLog configure.in mysqlfs.h query.c query.h schema.sql Log Message: 2007-03-29 Michal Ludvig <mi...@lo...> * Reworked data storage. Instead of one huge record for each file in table "data" we now store the data in 4kB chunks (a.k.a. "sectors" or "blocks"). That speeds up especially updates to files a _lot_ :-) As a side effect it allows for storing "sparse" files. CAUTION: database schema has changed in this version! Index: query.h =================================================================== RCS file: /cvsroot/mysqlfs/mysqlfs/query.h,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** query.h 23 Sep 2006 09:29:44 -0000 1.8 --- query.h 28 Mar 2007 13:05:48 -0000 1.9 *************** *** 8,11 **** --- 8,19 ---- */ + struct data_blocks_info { + unsigned long seq_first, /* Sequence ID of 1st and last block. */ + seq_last; + size_t length_first, /* Length of data for reading / writing. */ + length_last; + off_t offset_first; /* Offset in 1st block. */ + }; + long query_inode(MYSQL *mysql, const char* path); int query_inode_full(MYSQL *mysql, const char* path, char *name, size_t name_len, *************** *** 30,34 **** int query_chown(MYSQL *mysql, long inode, uid_t uid, gid_t gid); int query_utime(MYSQL *mysql, long inode, struct utimbuf *time); ! size_t query_size(MYSQL *mysql, long inode); int query_inuse_inc(MYSQL *mysql, long inode, int increment); --- 38,44 ---- int query_chown(MYSQL *mysql, long inode, uid_t uid, gid_t gid); int query_utime(MYSQL *mysql, long inode, struct utimbuf *time); ! ! ssize_t query_size(MYSQL *mysql, long inode); ! ssize_t query_size_block(MYSQL *mysql, long inode, unsigned long seq); int query_inuse_inc(MYSQL *mysql, long inode, int increment); Index: configure.in =================================================================== RCS file: /cvsroot/mysqlfs/mysqlfs/configure.in,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** configure.in 2 Oct 2006 22:34:03 -0000 1.8 --- configure.in 28 Mar 2007 13:05:48 -0000 1.9 *************** *** 34,43 **** FUSE_CFLAGS="-I${FUSE_INC_DIR}" LDFLAGS="$LDFLAGS $FUSE_LDFLAGS" ! CFLAGS="$LDFLAGS $FUSE_CFLAGS" MYSQL_CONFIG=$(which mysql_config) if test "$MYSQL_CONFIG"; then MYSQL_LDFLAGS=$(${MYSQL_CONFIG} --libs_r) ! MYSQL_CFLAGS=$(${MYSQL_CONFIG} --cflags) else for i in /usr/local/mysql /usr/local /usr; do --- 34,43 ---- FUSE_CFLAGS="-I${FUSE_INC_DIR}" LDFLAGS="$LDFLAGS $FUSE_LDFLAGS" ! CFLAGS="$CFLAGS $FUSE_CFLAGS" MYSQL_CONFIG=$(which mysql_config) if test "$MYSQL_CONFIG"; then MYSQL_LDFLAGS=$(${MYSQL_CONFIG} --libs_r) ! MYSQL_CFLAGS=$(${MYSQL_CONFIG} --include) else for i in /usr/local/mysql /usr/local /usr; do Index: schema.sql =================================================================== RCS file: /cvsroot/mysqlfs/mysqlfs/schema.sql,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** schema.sql 23 Sep 2006 09:29:44 -0000 1.5 --- schema.sql 28 Mar 2007 13:05:48 -0000 1.6 *************** *** 20,28 **** -- ! DROP TABLE IF EXISTS `data`; ! CREATE TABLE `data` ( `inode` bigint(20) NOT NULL, ! `data` longblob NOT NULL, ! PRIMARY KEY (`inode`) ) ENGINE=MyISAM DEFAULT CHARSET=binary; --- 20,29 ---- -- ! DROP TABLE IF EXISTS `data_blocks`; ! CREATE TABLE `data_blocks` ( `inode` bigint(20) NOT NULL, ! `seq` int unsigned not null, ! `data` blob , ! PRIMARY KEY (`inode`, `seq`) ) ENGINE=MyISAM DEFAULT CHARSET=binary; *************** *** 50,54 **** DELIMITER ;; /*!50003 SET SESSION SQL_MODE="" */;; ! /*!50003 CREATE */ /*!50017 DEFINER=`root`@`localhost` */ /*!50003 TRIGGER `drop_data` AFTER DELETE ON `inodes` FOR EACH ROW BEGIN DELETE FROM data WHERE inode=OLD.inode; END */;; DELIMITER ; --- 51,55 ---- DELIMITER ;; /*!50003 SET SESSION SQL_MODE="" */;; ! /*!50003 CREATE */ /*!50017 DEFINER=`root`@`localhost` */ /*!50003 TRIGGER `drop_data` AFTER DELETE ON `inodes` FOR EACH ROW BEGIN DELETE FROM data_blocks WHERE inode=OLD.inode; END */;; DELIMITER ; Index: query.c =================================================================== RCS file: /cvsroot/mysqlfs/mysqlfs/query.c,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** query.c 23 Sep 2006 09:29:44 -0000 1.15 --- query.c 28 Mar 2007 13:05:48 -0000 1.16 *************** *** 2,5 **** --- 2,6 ---- mysqlfs - MySQL Filesystem Copyright (C) 2006 Tsukasa Hamano <co...@cu...> + Copyright (C) 2006,2007 Michal Ludvig <mi...@lo...> $Id$ *************** *** 28,31 **** --- 29,61 ---- #define SQL_MAX 10240 + static inline int lock_inode(MYSQL *mysql, long inode) + { + // TODO + return 0; + } + + static inline int unlock_inode(MYSQL *mysql, long inode) + { + // TODO + return 0; + } + + static struct data_blocks_info * + fill_data_blocks_info(struct data_blocks_info *info, size_t size, off_t offset) + { + info->seq_first = offset / DATA_BLOCK_SIZE; + info->offset_first = offset % DATA_BLOCK_SIZE; + + unsigned long nr_following_blocks = ((info->offset_first + size) / DATA_BLOCK_SIZE); + info->length_first = nr_following_blocks > 0 ? DATA_BLOCK_SIZE - info->offset_first : size; + + info->seq_last = info->seq_first + nr_following_blocks; + info->length_last = (info->offset_first + size) % DATA_BLOCK_SIZE; + /* offset in last block (if it's a different one from the first block) + * is always 0 */ + + return info; + } + int query_getattr(MYSQL *mysql, const char *path, struct stat *stbuf) { *************** *** 122,126 **** depth+1, depth+1, depth, sql_from, sql_where); ! log_printf(LOG_D_OTHER, "SQL=%s\n", sql); ret = mysql_query(mysql, sql); if(ret){ --- 152,156 ---- depth+1, depth+1, depth, sql_from, sql_where); ! log_printf(LOG_D_SQL, "sql=%s\n", sql); ret = mysql_query(mysql, sql); if(ret){ *************** *** 178,181 **** --- 208,214 ---- int ret; char sql[SQL_MAX]; + struct data_blocks_info info; + + fill_data_blocks_info(&info, length, 0); long inode = query_inode(mysql, path); *************** *** 183,198 **** return inode; snprintf(sql, SQL_MAX, ! "UPDATE inodes LEFT JOIN data ON inodes.inode = data.inode SET data=RPAD(data, %lld, '\\0'), size=%lld WHERE inodes.inode=%ld", ! length, length, inode); log_printf(LOG_D_SQL, "sql=%s\n", sql); ! ret = mysql_query(mysql, sql); ! if(ret) ! goto err_out; return 0; err_out: log_printf(LOG_ERROR, "mysql_error: %s\n", mysql_error(mysql)); return ret; --- 216,246 ---- return inode; + lock_inode(mysql, inode); + snprintf(sql, SQL_MAX, ! "DELETE FROM data_blocks WHERE inode=%ld AND seq > %ld", ! inode, info.seq_last); log_printf(LOG_D_SQL, "sql=%s\n", sql); + if ((ret = mysql_query(mysql, sql))) goto err_out; ! snprintf(sql, SQL_MAX, ! "UPDATE data_blocks SET data=RPAD(data, %zu, '\\0') " ! "WHERE inode=%ld AND seq=%ld", ! info.length_last, inode, info.seq_last); ! log_printf(LOG_D_SQL, "sql=%s\n", sql); ! if ((ret = mysql_query(mysql, sql))) goto err_out; ! ! snprintf(sql, SQL_MAX, ! "UPDATE inodes SET size=%lld WHERE inode=%ld", ! length, inode); ! log_printf(LOG_D_SQL, "sql=%s\n", sql); ! if ((ret = mysql_query(mysql, sql))) goto err_out; ! ! unlock_inode(mysql, inode); return 0; err_out: + unlock_inode(mysql, inode); log_printf(LOG_ERROR, "mysql_error: %s\n", mysql_error(mysql)); return ret; *************** *** 287,299 **** goto err_out; - if (alloc_data) { - snprintf(sql, SQL_MAX, - "INSERT INTO data SET inode=%ld", new_inode_number); - - log_printf(LOG_D_SQL, "sql=%s\n", sql); - ret = mysql_query(mysql, sql); - if (ret) - goto err_out; - } return new_inode_number; --- 335,338 ---- *************** *** 419,427 **** MYSQL_RES* result; MYSQL_ROW row; ! unsigned long length; snprintf(sql, SQL_MAX, ! "SELECT SUBSTRING(data, %lld, %d) FROM data WHERE inode=%ld", ! offset + 1, size, inode); log_printf(LOG_D_SQL, "sql=%s\n", sql); --- 458,472 ---- MYSQL_RES* result; MYSQL_ROW row; ! unsigned long length = 0L, copy_len, seq; ! struct data_blocks_info info; ! char *dst = (char *)buf; ! char *src, *zeroes = alloca(DATA_BLOCK_SIZE); ! ! fill_data_blocks_info(&info, size, offset); + /* Read all required blocks */ snprintf(sql, SQL_MAX, ! "SELECT seq, data, LENGTH(data) FROM data_blocks WHERE inode=%ld AND seq>=%lu AND seq <=%lu ORDER BY seq ASC", ! inode, info.seq_first, info.seq_last); log_printf(LOG_D_SQL, "sql=%s\n", sql); *************** *** 441,458 **** } ! if(mysql_num_rows(result) != 1 && mysql_num_fields(result) != 1){ ! mysql_free_result(result); ! return -EIO; ! } ! row = mysql_fetch_row(result); ! if(!row){ ! mysql_free_result(result); ! return -EIO; } - - length = mysql_fetch_lengths(result)[0]; - memcpy((void*)buf, row[0], length); mysql_free_result(result); --- 486,530 ---- } ! /* This is a bit tricky as we support 'sparse' files now. ! * It means not all requested blocks must exist in the ! * database. For those that don't exist we'll return ! * a block of \0 instead. */ row = mysql_fetch_row(result); ! memset(zeroes, 0L, DATA_BLOCK_SIZE); ! for (seq = info.seq_first; seq<=info.seq_last; seq++) { ! off_t row_seq = -1; ! size_t row_len = DATA_BLOCK_SIZE; ! char *data = zeroes; ! ! if (row && (row_seq = atoll(row[0])) == seq) { ! data = row[1]; ! row_len = atoll(row[2]); ! } ! ! if (seq == info.seq_first) { ! if (row_len < info.offset_first) ! goto go_away; ! ! copy_len = MIN(row_len - info.offset_first, info.length_first); ! src = data + info.offset_first; ! } else if (seq == info.seq_last) { ! copy_len = MIN(info.length_last, row_len); ! src = data; ! } else { ! copy_len = MIN(DATA_BLOCK_SIZE, row_len); ! src = data; ! } ! ! memcpy(dst, src, copy_len); ! dst += copy_len; ! length += copy_len; ! ! if (row && row_seq == seq) ! row = mysql_fetch_row(result); } + go_away: + /* Read all remaining rows */ + while (mysql_fetch_row(result)); mysql_free_result(result); *************** *** 460,470 **** } ! int query_write(MYSQL *mysql, long inode, const char *data, size_t size, ! off_t offset) { MYSQL_STMT *stmt; MYSQL_BIND bind[1]; char sql[SQL_MAX]; ! size_t current_size = query_size(mysql, inode); stmt = mysql_stmt_init(mysql); --- 532,568 ---- } ! static int write_one_block(MYSQL *mysql, long inode, ! unsigned long seq, ! const char *data, size_t size, ! off_t offset) { MYSQL_STMT *stmt; MYSQL_BIND bind[1]; char sql[SQL_MAX]; ! size_t current_block_size = query_size_block(mysql, inode, seq); ! ! /* Shortcut */ ! if (size == 0) return 0; ! ! if (offset + size > DATA_BLOCK_SIZE) { ! log_printf(LOG_ERROR, "%s(): offset(%zu)+size(%zu)>max_block(%d)\n", ! __func__, offset, size, DATA_BLOCK_SIZE); ! return -EIO; ! } ! ! /* We expect the inode is already locked for this thread by caller! */ ! ! if (current_block_size == -ENXIO) { ! /* This data block has not yet been allocated */ ! snprintf(sql, SQL_MAX, ! "INSERT INTO data_blocks SET inode=%ld, seq=%lu, data=''", inode, seq); ! log_printf(LOG_D_SQL, "sql=%s\n", sql); ! if(mysql_query(mysql, sql)){ ! log_printf(LOG_ERROR, "mysql_error: %s\n", mysql_error(mysql)); ! return -EIO; ! } ! ! current_block_size = query_size_block(mysql, inode, seq); ! } stmt = mysql_stmt_init(mysql); *************** *** 476,500 **** memset(bind, 0, sizeof(bind)); ! if (offset == 0 && current_size == 0) { snprintf(sql, SQL_MAX, ! "UPDATE inodes LEFT JOIN data ON inodes.inode = data.inode SET data=?, size=%zu WHERE inodes.inode=%ld", size, inode); ! } else if (offset == current_size) { snprintf(sql, sizeof(sql), ! "UPDATE inodes LEFT JOIN data ON inodes.inode = data.inode SET data=CONCAT(data, ?), size=size+%zu WHERE inodes.inode=%ld", size, inode); } else { size_t pos, new_size; pos = snprintf(sql, sizeof(sql), ! "UPDATE inodes LEFT JOIN data ON inodes.inode = data.inode SET data=CONCAT("); if (offset > 0) pos += snprintf(sql + pos, sizeof(sql) - pos, "RPAD(IF(ISNULL(data),'', data), %llu, '\\0'),", offset); pos += snprintf(sql + pos, sizeof(sql) - pos, "?,"); new_size = offset + size; ! if (offset + size < current_size) { pos += snprintf(sql + pos, sizeof(sql) - pos, "SUBSTRING(data FROM %llu),", offset + size + 1); ! new_size = current_size; } sql[--pos] = '\0'; /* Remove the trailing comma. */ ! pos += snprintf(sql + pos, sizeof(sql) - pos, "), size=%zu WHERE inodes.inode=%ld", ! new_size, inode); } log_printf(LOG_D_SQL, "sql=%s\n", sql); --- 574,604 ---- memset(bind, 0, sizeof(bind)); ! if (offset == 0 && current_block_size == 0) { snprintf(sql, SQL_MAX, ! "UPDATE data_blocks " ! "SET data=? " ! "WHERE inode=%ld AND seq=%lu", ! inode, seq); ! } else if (offset == current_block_size) { snprintf(sql, sizeof(sql), ! "UPDATE data_blocks " ! "SET data=CONCAT(data, ?) " ! "WHERE inode=%ld AND seq=%lu", ! inode, seq); } else { size_t pos, new_size; pos = snprintf(sql, sizeof(sql), ! "UPDATE data_blocks SET data=CONCAT("); if (offset > 0) pos += snprintf(sql + pos, sizeof(sql) - pos, "RPAD(IF(ISNULL(data),'', data), %llu, '\\0'),", offset); pos += snprintf(sql + pos, sizeof(sql) - pos, "?,"); new_size = offset + size; ! if (offset + size < current_block_size) { pos += snprintf(sql + pos, sizeof(sql) - pos, "SUBSTRING(data FROM %llu),", offset + size + 1); ! new_size = current_block_size; } sql[--pos] = '\0'; /* Remove the trailing comma. */ ! pos += snprintf(sql + pos, sizeof(sql) - pos, ") WHERE inode=%ld AND seq=%lu", ! inode, seq); } log_printf(LOG_D_SQL, "sql=%s\n", sql); *************** *** 512,516 **** bind[0].buffer= (char *)data; bind[0].is_null= 0; ! bind[0].length= (unsigned long *)&size; if (mysql_stmt_bind_param(stmt, bind)) { --- 616,620 ---- bind[0].buffer= (char *)data; bind[0].is_null= 0; ! bind[0].length= (unsigned long *)(void *)&size; if (mysql_stmt_bind_param(stmt, bind)) { *************** *** 534,537 **** --- 638,656 ---- log_printf(LOG_ERROR, "failed closing the statement: %s\n", mysql_stmt_error(stmt)); + /* Update file size */ + snprintf(sql, SQL_MAX, + "UPDATE inodes SET size=(" + "SELECT seq*%d + LENGTH(data) FROM data_blocks WHERE inode=%ld AND seq=(" + "SELECT MAX(seq) FROM data_blocks WHERE inode=%ld" + ")" + ") " + "WHERE inode=%ld", + DATA_BLOCK_SIZE, inode, inode, inode); + log_printf(LOG_D_SQL, "sql=%s\n", sql); + if(mysql_query(mysql, sql)) { + log_printf(LOG_ERROR, "mysql_error: %s\n", mysql_error(mysql)); + return -EIO; + } + return size; *************** *** 543,547 **** } ! size_t query_size(MYSQL *mysql, long inode) { size_t ret; --- 662,715 ---- } ! int query_write(MYSQL *mysql, long inode, const char *data, size_t size, ! off_t offset) ! { ! struct data_blocks_info info; ! unsigned long seq; ! const char *ptr; ! int ret, ret_size = 0; ! ! fill_data_blocks_info(&info, size, offset); ! ! /* Handle first block */ ! lock_inode(mysql, inode); ! ret = write_one_block(mysql, inode, info.seq_first, data, ! info.length_first, info.offset_first); ! unlock_inode(mysql, inode); ! if (ret < 0) ! return ret; ! ret_size = ret; ! ! /* Shortcut - if last block seq is the same as first block ! * seq simply go away as it's the same block */ ! if (info.seq_first == info.seq_last) ! return ret_size; ! ! ptr = data + info.length_first; ! ! /* Handle all full-sized intermediate blocks */ ! for (seq = info.seq_first + 1; seq < info.seq_last; seq++) { ! lock_inode(mysql, inode); ! ret = write_one_block(mysql, inode, seq, ptr, DATA_BLOCK_SIZE, 0); ! unlock_inode(mysql, inode); ! if (ret < 0) ! return ret; ! ptr += DATA_BLOCK_SIZE; ! ret_size += ret; ! } ! ! /* Handle last block */ ! lock_inode(mysql, inode); ! ret = write_one_block(mysql, inode, info.seq_last, ptr, ! info.length_last, 0); ! unlock_inode(mysql, inode); ! if (ret < 0) ! return ret; ! ret_size += ret; ! ! return ret_size; ! } ! ! ssize_t query_size(MYSQL *mysql, long inode) { size_t ret; *************** *** 556,560 **** if(ret){ log_printf(LOG_ERROR, "mysql_error: %s\n", mysql_error(mysql)); ! return -1; } log_printf(LOG_D_SQL, "sql=%s\n", sql); --- 724,728 ---- if(ret){ log_printf(LOG_ERROR, "mysql_error: %s\n", mysql_error(mysql)); ! return -EIO; } log_printf(LOG_D_SQL, "sql=%s\n", sql); *************** *** 564,578 **** log_printf(LOG_ERROR, "ERROR: mysql_store_result()\n"); log_printf(LOG_ERROR, "mysql_error: %s\n", mysql_error(mysql)); ! return -1; } ! if(mysql_num_rows(result) != 1 && mysql_num_fields(result) != 1){ mysql_free_result(result); ! return -1; } row = mysql_fetch_row(result); if(!row){ ! return -1; } --- 732,790 ---- log_printf(LOG_ERROR, "ERROR: mysql_store_result()\n"); log_printf(LOG_ERROR, "mysql_error: %s\n", mysql_error(mysql)); ! return -EIO; } ! if(mysql_num_rows(result) != 1 || mysql_num_fields(result) != 1){ mysql_free_result(result); ! return -EIO; } row = mysql_fetch_row(result); if(!row){ ! return -EIO; ! } ! ! if(row[0]){ ! ret = atoll(row[0]); ! }else{ ! ret = 0; ! } ! mysql_free_result(result); ! ! return ret; ! } ! ! ssize_t query_size_block(MYSQL *mysql, long inode, unsigned long seq) ! { ! size_t ret; ! char sql[SQL_MAX]; ! MYSQL_RES *result; ! MYSQL_ROW row; ! ! snprintf(sql, SQL_MAX, "SELECT LENGTH(data) FROM data_blocks WHERE inode=%ld AND seq=%lu", ! inode, seq); ! ! ret = mysql_query(mysql, sql); ! if(ret){ ! log_printf(LOG_ERROR, "mysql_error: %s\n", mysql_error(mysql)); ! return -EIO; ! } ! log_printf(LOG_D_SQL, "sql=%s\n", sql); ! ! result = mysql_store_result(mysql); ! if(!result){ ! log_printf(LOG_ERROR, "ERROR: mysql_store_result()\n"); ! log_printf(LOG_ERROR, "mysql_error: %s\n", mysql_error(mysql)); ! return -EIO; ! } ! ! if(mysql_num_rows(result) == 0) { ! mysql_free_result(result); ! return -ENXIO; ! } ! ! row = mysql_fetch_row(result); ! if(!row){ ! return -EIO; } Index: ChangeLog =================================================================== RCS file: /cvsroot/mysqlfs/mysqlfs/ChangeLog,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** ChangeLog 2 Oct 2006 22:34:03 -0000 1.15 --- ChangeLog 28 Mar 2007 13:05:48 -0000 1.16 *************** *** 1,2 **** --- 1,11 ---- + 2007-03-29 Michal Ludvig <mi...@lo...> + + * Reworked data storage. Instead of one huge record + for each file in table "data" we now store the data + in 4kB chunks (a.k.a. "sectors" or "blocks"). That + speeds up especially updates to files a _lot_ :-) + As a side effect it allows for storing "sparse" files. + CAUTION: database schema has changed in this version! + 2006-10-03 Michal Ludvig <mi...@lo...> Index: mysqlfs.h =================================================================== RCS file: /cvsroot/mysqlfs/mysqlfs/mysqlfs.h,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** mysqlfs.h 4 Sep 2006 11:43:29 -0000 1.2 --- mysqlfs.h 28 Mar 2007 13:05:48 -0000 1.3 *************** *** 10,13 **** --- 10,15 ---- #define PATH_MAX 1024 + #define DATA_BLOCK_SIZE 4096 + #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) |