[lc-checkins] CVS: linux/mm/comp_cache adaptivity.c,1.44,1.45 free.c,1.48,1.49 main.c,1.67,1.68 proc
Status: Beta
Brought to you by:
nitin_sf
|
From: Rodrigo S. de C. <rc...@us...> - 2002-12-06 22:50:35
|
Update of /cvsroot/linuxcompressed/linux/mm/comp_cache
In directory sc8-pr-cvs1:/tmp/cvs-serv8451/mm/comp_cache
Modified Files:
adaptivity.c free.c main.c proc.c swapin.c swapout.c vswap.c
Log Message:
Some races still to be fixed, but we have fixed a bunch of them in
this set of changes, including one that would corrupt FSs when used
with preempt patch.
Bug fixes
o Fixed bug that might compress a page for the second time if it were
swapped in while being written using swap buffers. In this case, a new
swap cache page could be compressed and we are not sure the fragment
being written out had actually been freed. The fix is to make the swap
buffer get a reference on this swap cache page, releasing when the
swap buffer is freed.
o Fixed bug that could submit a read to the disk while the same block
is being written by a swap buffer. When writing out the swap buffer,
we get a reference on the fragment in order to avoid it to be
released, even if swapped in in the meanwhile.
o Removed extra spin_lock()/spin_unlock() on comp_cache_lock in
grow_comp_cache()
o Fixed race in compact_comp_cace() that we were triggering which
would corrupt fs or return wrong process data, likely to segfault. It
happened usually with preempt patch. When a fragment is relocated to
another comp page, we could preempt the process after the fragment is
removed from the previous comp page, but before being added to the
next comp page. If this happens, a read operation is submitted to the
disk, likely to read bogus data or, if vswap is used, to reach a
kernel BUG. In order to solve, we add the new fragment to the hash
table before the old one is removed. So, if the process is preempted
before removing the old fragment, we have a fragment with its
data. This fragment is locked until get to a sane state, but it surely
avoids a read operation to be done. We think it's SMP-safe too, since
if a reference to the old fragment is get after the new fragment is
added to the hash table, the old fragment isn't freed and we remove
the new fragment from the hash table. If the new fragment is
referenced, it's the same behaviour that happens when the process is
preempted.
o Added spin_lock/spin_unlock to clean page adaptability to provide
concurrency control.
o Fixed bug that would allow to set more than 50% of the memory size
as the maximum size of compressed cache. For example, booting with
"mem=16M compsize=12M" would work. Simple fix.
o Fixed bug that would duplicate a real swap entry (for compressed
swap) even if the swap entry failed to duplicate.
o Although unlikely, nothing prevents a swap entry to be freed while
being written out by a swap buffer. Now we, besides the reference on
the fragment, we hold a reference on the swap entry when writing out a
page.
Index: adaptivity.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/comp_cache/adaptivity.c,v
retrieving revision 1.44
retrieving revision 1.45
diff -C2 -r1.44 -r1.45
*** adaptivity.c 29 Nov 2002 21:23:03 -0000 1.44
--- adaptivity.c 6 Dec 2002 19:29:21 -0000 1.45
***************
*** 2,6 ****
* linux/mm/comp_cache/adaptivity.c
*
! * Time-stamp: <2002-11-29 12:05:01 rcastro>
*
* Linux Virtual Memory Compressed Cache
--- 2,6 ----
* linux/mm/comp_cache/adaptivity.c
*
! * Time-stamp: <2002-12-06 09:58:05 rcastro>
*
* Linux Virtual Memory Compressed Cache
***************
*** 587,592 ****
int retval = 0;
- spin_lock(&comp_cache_lock);
-
page = alloc_pages(GFP_ATOMIC, COMP_PAGE_ORDER);
--- 587,590 ----
***************
*** 594,603 ****
if (!page) {
failed_comp_page_allocs++;
! goto out_unlock;
}
if (!init_comp_page(&comp_page, page)) {
__free_pages(page, COMP_PAGE_ORDER);
! goto out_unlock;
}
--- 592,601 ----
if (!page) {
failed_comp_page_allocs++;
! goto out;
}
if (!init_comp_page(&comp_page, page)) {
__free_pages(page, COMP_PAGE_ORDER);
! goto out;
}
***************
*** 614,619 ****
grow_fragment_hash_table();
grow_vswap();
! out_unlock:
! spin_unlock(&comp_cache_lock);
return retval;
}
--- 612,616 ----
grow_fragment_hash_table();
grow_vswap();
! out:
return retval;
}
***************
*** 624,627 ****
--- 621,626 ----
* not yet reached the maximum size, we try to grow compressed cache
* by one new entry.
+ *
+ * caller must hold comp_cache_lock
*/
int
***************
*** 675,678 ****
--- 674,678 ----
while (1) {
fragment = list_entry(fragment_lh, struct comp_cache_fragment, list);
+ /* what about count == 2 && swp_buffer != null? */
if (fragment_count(fragment) != 1) {
fail = 1;
***************
*** 722,727 ****
--- 722,734 ----
new_fragment->flags = fragment->flags;
new_fragment->comp_page = new_comp_page;
+
+ /* Setting the fragment count to the count of the old
+ * fragment, we make sure that no reference will be lost. In
+ * particular, the swap buffer one. */
set_fragment_count(new_fragment, fragment_count(fragment));
+ /* If we have a swap buffer, we just set the swap buffer to
+ * this fragment (the reference will be automatically set
+ * above). As simple as that. */
if ((new_fragment->swp_buffer = fragment->swp_buffer))
new_fragment->swp_buffer->fragment = new_fragment;
***************
*** 731,736 ****
--- 738,746 ----
previous_comp_page = comp_page;
+ add_fragment_to_hash_table(new_fragment);
+
UnlockPage(comp_page->page);
if (!drop_fragment(fragment)) {
+ remove_fragment_from_hash_table(new_fragment);
if (fragment->swp_buffer)
fragment->swp_buffer->fragment = fragment;
***************
*** 748,752 ****
add_to_comp_page_list(new_comp_page, new_fragment);
add_fragment_vswap(new_fragment);
- add_fragment_to_hash_table(new_fragment);
if (CompFragmentActive(new_fragment))
--- 758,761 ----
***************
*** 812,815 ****
--- 821,826 ----
struct clean_page_data * clpage;
+ spin_lock(&comp_cache_lock);
+
clpage = clean_page_hash[clean_page_hashfn(page->mapping, page->index)];
***************
*** 820,824 ****
inside:
if (!clpage)
! return;
if (clpage->mapping != page->mapping)
continue;
--- 831,835 ----
inside:
if (!clpage)
! goto out_release;
if (clpage->mapping != page->mapping)
continue;
***************
*** 837,840 ****
--- 848,854 ----
nr_clean_page_hits = 0;
}
+
+ out_release:
+ spin_unlock(&comp_cache_lock);
}
***************
*** 845,853 ****
unsigned long hash_index;
/* allocate a new structure */
clpage = ((struct clean_page_data *) kmem_cache_alloc(clean_page_cachep, SLAB_ATOMIC));
if (unlikely(!clpage))
! return;
clpage->mapping = page->mapping;
--- 859,869 ----
unsigned long hash_index;
+ spin_lock(&comp_cache_lock);
+
/* allocate a new structure */
clpage = ((struct clean_page_data *) kmem_cache_alloc(clean_page_cachep, SLAB_ATOMIC));
if (unlikely(!clpage))
! goto out_release;
clpage->mapping = page->mapping;
***************
*** 901,904 ****
--- 917,922 ----
if (num_clean_fragments * 10 > num_fragments * 3)
compact_comp_cache();
+ out_release:
+ spin_unlock(&comp_cache_lock);
}
#endif
Index: free.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/comp_cache/free.c,v
retrieving revision 1.48
retrieving revision 1.49
diff -C2 -r1.48 -r1.49
*** free.c 22 Nov 2002 16:01:37 -0000 1.48
--- free.c 6 Dec 2002 19:29:22 -0000 1.49
***************
*** 2,6 ****
* linux/mm/comp_cache/free.c
*
! * Time-stamp: <2002-10-25 11:26:26 rcastro>
*
* Linux Virtual Memory Compressed Cache
--- 2,6 ----
* linux/mm/comp_cache/free.c
*
! * Time-stamp: <2002-12-05 19:38:23 rcastro>
*
* Linux Virtual Memory Compressed Cache
***************
*** 62,72 ****
if (!fragment->mapping)
BUG();
-
- /* fragments that have already been submitted to IO have a
- * non-null swp_buffer. Let's warn the swap buffer that this
- * page has been already removed by setting its fragment field
- * to NULL. */
- if (fragment->swp_buffer)
- fragment->swp_buffer->fragment = NULL;
/* compressed fragments of swap cache are accounted in
--- 62,65 ----
Index: main.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/comp_cache/main.c,v
retrieving revision 1.67
retrieving revision 1.68
diff -C2 -r1.67 -r1.68
*** main.c 26 Nov 2002 21:42:32 -0000 1.67
--- main.c 6 Dec 2002 19:29:22 -0000 1.68
***************
*** 2,6 ****
* linux/mm/comp_cache/main.c
*
! * Time-stamp: <2002-11-26 19:32:57 rcastro>
*
* Linux Virtual Memory Compressed Cache
--- 2,6 ----
* linux/mm/comp_cache/main.c
*
! * Time-stamp: <2002-12-05 11:35:39 rcastro>
*
* Linux Virtual Memory Compressed Cache
***************
*** 206,209 ****
--- 206,210 ----
LIST_HEAD(inactive_lru_queue);
+ /* caller must hold comp_cache_lock spinlock */
inline int
init_comp_page(struct comp_cache_page ** comp_page,struct page * page) {
***************
*** 234,238 ****
min_num_comp_pages = page_to_comp_page(48);
! if (!max_num_comp_pages || max_num_comp_pages < min_num_comp_pages || max_num_comp_pages > num_physpages * 0.5)
max_num_comp_pages = page_to_comp_page((unsigned long) (num_physpages * 0.5));
--- 235,239 ----
min_num_comp_pages = page_to_comp_page(48);
! if (!max_num_comp_pages || max_num_comp_pages < min_num_comp_pages || max_num_comp_pages > page_to_comp_page(num_physpages) * 0.5)
max_num_comp_pages = page_to_comp_page((unsigned long) (num_physpages * 0.5));
***************
*** 243,247 ****
max_used_num_comp_pages = min_num_comp_pages = num_comp_pages = page_to_comp_page(48);
! if (!max_num_comp_pages || max_num_comp_pages < min_num_comp_pages || max_num_comp_pages > num_physpages * 0.5)
max_num_comp_pages = page_to_comp_page((unsigned long) (num_physpages * 0.5));
--- 244,248 ----
max_used_num_comp_pages = min_num_comp_pages = num_comp_pages = page_to_comp_page(48);
! if (!max_num_comp_pages || max_num_comp_pages < min_num_comp_pages || max_num_comp_pages > page_to_comp_page(num_physpages) * 0.5)
max_num_comp_pages = page_to_comp_page((unsigned long) (num_physpages * 0.5));
Index: proc.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/comp_cache/proc.c,v
retrieving revision 1.29
retrieving revision 1.30
diff -C2 -r1.29 -r1.30
*** proc.c 22 Nov 2002 16:01:41 -0000 1.29
--- proc.c 6 Dec 2002 19:29:22 -0000 1.30
***************
*** 2,6 ****
* linux/mm/comp_cache/proc.c
*
! * Time-stamp: <2002-10-21 16:26:52 rcastro>
*
* Linux Virtual Memory Compressed Cache
--- 2,6 ----
* linux/mm/comp_cache/proc.c
*
! * Time-stamp: <2002-12-01 17:35:44 rcastro>
*
* Linux Virtual Memory Compressed Cache
***************
*** 217,220 ****
--- 217,225 ----
fragment_index = 0;
+ if (!counter)
+ BUG();
+ if (metadata_offset > COMP_PAGE_SIZE)
+ BUG();
+
while (counter-- && fragment_index != page->index) {
fragment_index = *((unsigned long *) (page_address(page) + metadata_offset + 4));
Index: swapin.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/comp_cache/swapin.c,v
retrieving revision 1.55
retrieving revision 1.56
diff -C2 -r1.55 -r1.56
*** swapin.c 22 Nov 2002 16:01:42 -0000 1.55
--- swapin.c 6 Dec 2002 19:29:23 -0000 1.56
***************
*** 2,6 ****
* linux/mm/comp_cache/swapin.c
*
! * Time-stamp: <2002-11-21 15:23:30 rcastro>
*
* Linux Virtual Memory Compressed Cache
--- 2,6 ----
* linux/mm/comp_cache/swapin.c
*
! * Time-stamp: <2002-12-06 17:15:44 rcastro>
*
* Linux Virtual Memory Compressed Cache
***************
*** 156,161 ****
last_accessed = ACTIVE_FRAGMENT;
! /* Ver alair1 */
! /* compact_comp_cache(); */
}
else {
--- 156,161 ----
last_accessed = ACTIVE_FRAGMENT;
! /* -- version alair1 -- */
! /* compact_comp_cache(); */
}
else {
***************
*** 181,187 ****
decompress_fragment_to_page(fragment, page);
- comp_cache_update_read_stats(fragment);
spin_lock(&comp_cache_lock);
if (CompFragmentTestandClearDirty(fragment)) {
--- 181,187 ----
decompress_fragment_to_page(fragment, page);
spin_lock(&comp_cache_lock);
+ comp_cache_update_read_stats(fragment);
if (CompFragmentTestandClearDirty(fragment)) {
***************
*** 189,192 ****
--- 189,203 ----
__set_page_dirty(page);
}
+
+ /* Swap buffer must know if this fragment was reclaimed. In
+ * this case, we get a reference on this page for the swap
+ * buffer, since we want to make sure this page will not get
+ * compressed while the I/O operation isn't finished. This
+ * reference will be released when the swap buffer is
+ * freed. */
+ if (fragment->swp_buffer) {
+ fragment->swp_buffer->swap_cache_page = page;
+ page_cache_get(page);
+ }
UnlockPage(fragment->comp_page->page);
Index: swapout.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/comp_cache/swapout.c,v
retrieving revision 1.75
retrieving revision 1.76
diff -C2 -r1.75 -r1.76
*** swapout.c 29 Nov 2002 21:23:03 -0000 1.75
--- swapout.c 6 Dec 2002 19:29:23 -0000 1.76
***************
*** 2,6 ****
* /mm/comp_cache/swapout.c
*
! * Time-stamp: <2002-11-29 18:09:53 rcastro>
*
* Linux Virtual Memory Compressed Cache
--- 2,6 ----
* /mm/comp_cache/swapout.c
*
! * Time-stamp: <2002-12-05 17:11:02 rcastro>
*
* Linux Virtual Memory Compressed Cache
***************
*** 26,29 ****
--- 26,31 ----
unsigned long index;
} grouped_fragments[255];
+
+ static spinlock_t comp_swap_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;
#endif
***************
*** 36,42 ****
{
struct list_head * swp_buffer_lh;
! struct page * buffer_page;
struct swp_buffer * swp_buffer;
struct comp_cache_fragment * fragment;
int wait, maxscan;
--- 38,45 ----
{
struct list_head * swp_buffer_lh;
! struct page * buffer_page, * swap_cache_page;
struct swp_buffer * swp_buffer;
struct comp_cache_fragment * fragment;
+ swp_entry_t entry;
int wait, maxscan;
***************
*** 72,75 ****
--- 75,79 ----
fragment = swp_buffer->fragment;
+ swap_cache_page = swp_buffer->swap_cache_page;
/* A swap buffer page that has been set to dirty means
***************
*** 78,90 ****
if (PageDirty(buffer_page)) {
spin_lock(&comp_cache_lock);
! if (fragment) {
! fragment->swp_buffer = NULL;
! spin_lock(&pagecache_lock);
! list_del(&fragment->mapping_list);
! list_add(&fragment->mapping_list, &fragment->mapping->dirty_comp_pages);
! spin_unlock(&pagecache_lock);
!
! CompFragmentSetDirty(fragment);
! }
ClearPageDirty(buffer_page);
spin_unlock(&comp_cache_lock);
--- 82,100 ----
if (PageDirty(buffer_page)) {
spin_lock(&comp_cache_lock);
!
! fragment->swp_buffer = NULL;
! put_fragment(fragment);
!
! spin_lock(&pagemap_lru_lock);
! if (swap_cache_page)
! page_cache_release(swap_cache_page);
! spin_unlock(&pagemap_lru_lock);
!
! spin_lock(&pagecache_lock);
! list_del(&fragment->mapping_list);
! list_add(&fragment->mapping_list, &fragment->mapping->dirty_comp_pages);
! spin_unlock(&pagecache_lock);
!
! CompFragmentSetDirty(fragment);
ClearPageDirty(buffer_page);
spin_unlock(&comp_cache_lock);
***************
*** 96,103 ****
* (if still needed). */
spin_lock(&comp_cache_lock);
! if (fragment) {
! fragment->swp_buffer = NULL;
! drop_fragment(fragment);
! }
spin_unlock(&comp_cache_lock);
add_to_free:
--- 106,124 ----
* (if still needed). */
spin_lock(&comp_cache_lock);
!
! /* now that the data is actually back stored, release
! * references on the fragment... */
! fragment->swp_buffer = NULL;
! put_fragment(fragment);
! drop_fragment(fragment);
! spin_lock(&pagemap_lru_lock);
! if (swap_cache_page)
! page_cache_release(swap_cache_page);
! spin_unlock(&pagemap_lru_lock);
!
! /* and on the swap entry. */
! entry.val = buffer_page->index;
! swap_free(entry);
!
spin_unlock(&comp_cache_lock);
add_to_free:
***************
*** 125,129 ****
}
!
/**
* find_free_swp_buffer - gets a swap buffer page
--- 146,162 ----
}
! /**
! * sync_all_swp_buffers - syncs all pending swap buffers. This is done
! * in order to release references on swap entries for swapoff
! * operation. It is the first implementation and I know it can be
! * smarter.*
! */
! void
! sync_all_swp_buffers()
! {
! while (!list_empty(&swp_used_buffer_head))
! refill_swp_buffer(GFP_KERNEL, 1);
! }
!
/**
* find_free_swp_buffer - gets a swap buffer page
***************
*** 172,175 ****
--- 205,210 ----
spin_lock(&comp_cache_lock);
swp_buffer->fragment = fragment;
+ swp_buffer->swap_cache_page = NULL;
+
fragment->swp_buffer = swp_buffer;
***************
*** 201,204 ****
--- 236,241 ----
unsigned short counter, next_offset, metadata_size;
+ spin_lock(&comp_swap_lock);
+
entry.val = fragment->index;
real_entry = get_real_swap_page(entry);
***************
*** 229,233 ****
set_swap_compressed(entry, 0);
decompress_fragment_to_page(fragment, page);
! return;
}
--- 266,270 ----
set_swap_compressed(entry, 0);
decompress_fragment_to_page(fragment, page);
! goto out_release;
}
***************
*** 288,291 ****
--- 325,331 ----
next_offset += 4;
}
+
+ out_release:
+ spin_unlock(&comp_swap_lock);
}
#else
***************
*** 420,434 ****
remove_fragment_from_lru_queue(fragment);
! /* avoid to free this entry if we sleep below */
if (swap_cache_page && !swap_duplicate(entry))
! BUG();
- get_fragment(fragment);
spin_unlock(&comp_cache_lock);
swp_buffer = prepare_swp_buffer(fragment, gfp_mask);
! if (!swp_buffer)
! goto out;
spin_lock(&pagecache_lock);
list_del(&fragment->mapping_list);
--- 460,506 ----
remove_fragment_from_lru_queue(fragment);
! get_fragment(fragment);
!
! /* we need a reference on the swap counter to perform
! * the I/O (in order to avoid freeing this swap
! * entry). If we can't duplicate the swap entry, the
! * entry has already been freed and this fragment will
! * be probable freed as soon as we release our
! * reference on it */
if (swap_cache_page && !swap_duplicate(entry))
! goto add_back;
spin_unlock(&comp_cache_lock);
+ /* so far, we have:
+ *
+ * (a) a reference on the fragment, so it won't be
+ * freed until the end of the I/O. This reference
+ * makes sure that any access to this page will read
+ * sane data from the fragment. Without it, the system
+ * could free the fragment in the meanwhile and submit
+ * a concurrent read operation, returning bogus
+ * data. This fragment can be freed if reclaimed by
+ * the system.
+ *
+ * (b) and, if swap cache page, a reference on the
+ * swap entry, so it won't be freed until the end of
+ * I/O too. This reference is necessary since we want
+ * to keep the fragment alive in a save way until we
+ * finish our writeout. If the swap entry is freed, we
+ * are not safe any longer keeping a fragment set to
+ * this entry.
+ *
+ * thus we can go on and prepare swap buffers.
+ */
swp_buffer = prepare_swp_buffer(fragment, gfp_mask);
! if (!swp_buffer) {
! if (swap_cache_page)
! swap_free(entry);
! spin_lock(&comp_cache_lock);
! goto add_back;
! }
+ spin_lock(&comp_cache_lock);
spin_lock(&pagecache_lock);
list_del(&fragment->mapping_list);
***************
*** 439,465 ****
num_clean_fragments++;
! writepage = fragment->mapping->a_ops->writepage;
!
if (!writepage)
BUG();
writepage(swp_buffer->page);
nrpages--;
- out:
- if (swap_cache_page)
- swap_free(entry);
spin_lock(&comp_cache_lock);
- if (!swp_buffer) {
- if (likely(list == &inactive_lru_queue))
- add_fragment_to_inactive_lru_queue(fragment);
- else
- add_fragment_to_active_lru_queue(fragment);
- put_fragment(fragment);
- goto try_again;
- }
-
- put_fragment(fragment);
-
if (!nrpages)
break;
--- 511,525 ----
num_clean_fragments++;
! writepage = fragment->mapping->a_ops->writepage;
! spin_unlock(&comp_cache_lock);
!
if (!writepage)
BUG();
writepage(swp_buffer->page);
+
nrpages--;
spin_lock(&comp_cache_lock);
if (!nrpages)
break;
***************
*** 472,475 ****
--- 532,544 ----
spin_lock(&comp_cache_lock);
}
+ continue;
+
+ add_back:
+ if (likely(list == &inactive_lru_queue))
+ add_fragment_to_inactive_lru_queue(fragment);
+ else
+ add_fragment_to_active_lru_queue(fragment);
+ put_fragment(fragment);
+ goto try_again;
}
Index: vswap.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/comp_cache/vswap.c,v
retrieving revision 1.46
retrieving revision 1.47
diff -C2 -r1.46 -r1.47
*** vswap.c 29 Nov 2002 21:23:03 -0000 1.46
--- vswap.c 6 Dec 2002 19:29:23 -0000 1.47
***************
*** 2,6 ****
* linux/mm/comp_cache/vswap.c
*
! * Time-stamp: <2002-11-29 12:05:38 rcastro>
*
* Linux Virtual Memory Compressed Cache
--- 2,6 ----
* linux/mm/comp_cache/vswap.c
*
! * Time-stamp: <2002-12-03 14:27:24 rcastro>
*
* Linux Virtual Memory Compressed Cache
***************
*** 171,175 ****
comp_cache_available_space(void) {
int ret = 1;
!
spin_lock(&virtual_swap_list);
--- 171,175 ----
comp_cache_available_space(void) {
int ret = 1;
!
spin_lock(&virtual_swap_list);
|