#51 kswapd/kupdated deadlock

closed-works-for-me
Filesystem (49)
3
2005-09-29
2004-04-26
No

The kupdated processes has an inode's i_sem locked and
then needs memory (wakes up kswapd). The kswapd ran
needs the i_sem for the same inode hangs.

Stack traceback for pid 6
0xc7ff6000 6 2 0 0 D 0xc7ff6420
kswapd
EBP EIP Function (args)
0xc7ff7cb0 0xc0122881 schedule+0x1b1 (0x1, 0xc7ff6000,
0xc7186880, 0xc7186880, 0x1000)
kernel .text 0xc0100000
0xc01226d0 0xc0122a20
0xc7ff7cd4 0xc01089f3 __down+0x73 (0xc7186870,
0xfffffff2, 0xc7186800)
kernel .text 0xc0100000
0xc0108980 0xc0108a50
0xc7ff7ce8 0xc0108bcf __down_failed+0xb (0x0,
0xc30146ac, 0xc7ff7d1c, 0xc0241cf7, 0xc7ff7d3c)
kernel .text 0xc0100000
0xc0108bc4 0xc0108bd8
0xc0146eac .text.lock.filemap+0x27e
kernel .text 0xc0100000
0xc0146c2e 0xc0146ed0
0xc7ff7d5c 0xc0145f86 generic_file_write+0x76
(0xc7ff7dd4, 0xc0ad4000, 0x1000, 0xc7ff7df4, 0xc7ff6000)
kernel .text 0xc0100000
0xc0145f10 0xc01467c0
0xc7ff7d80 0xc0193825 ext3_file_write+0x35 (0xc7ff7dd4,
0xc0ad4000, 0x1000, 0xc7ff7df4, 0x1)
kernel .text 0xc0100000
0xc01937f0 0xc01938c0
0xc7ff7e50 0xc024de5e cfsd_write+0xde (0xc5ef6380,
0xff000, 0x0, 0xc0ad4000, 0x1000)
kernel .text 0xc0100000
0xc024dd80 0xc024ded0
0xc7ff7eb0 0xc02494ff cfs_proc_write+0x17f (0xc7ff7f1c,
0xc7ff6000, 0xff000, 0x0)
kernel .text 0xc0100000
0xc0249380 0xc0249540
0xc7ff7ecc 0xc023e477 msgsend+0x157 (0xc7186c00,
0xff000, 0x0, 0x1000, 0xc0ad400
kernel .text 0xc0100000 0xc023e320 0xc023e4d0
0xc7ff7eec 0xc02463e7 cfs_writepage_sync+0xb7 (0x0,
0xc7186c00, 0xc102e060, 0x0,[0]more>
kernel .text 0xc0100000
0xc0246330 0xc02464e0
0xc7ff7f1c 0xc02465a5 cfs_writepage+0xc5 (0xc102e060,
0x292, 0xc102e074, 0x1, 0x
kernel .text 0xc0100000 0xc02464e0 0xc0246620
0xc7ff7f40 0xc014d63f launder_page+0x66f (0xc0540480,
0x1d0, 0xc102e060, 0x47, 0
kernel .text 0xc0100000 0xc014cfd0 0xc014d960
0xc7ff7f64 0xc014edbb rebalance_dirty_zone+0x9b
(0xc0540480, 0x100, 0x1d0, 0xc05
kernel .text 0xc0100000 0xc014ed20 0xc014ee50
0xc7ff7f84 0xc014eef6 rebalance_inactive_zone+0xa6
(0xc0540480, 0x100, 0x1d0, 0x
kernel .text 0xc0100000 0xc014ee50 0xc014ef40
0xc7ff7fa8 0xc014ef8a rebalance_inactive+0x4a (0x1d0,
0xf, 0xc7ff7fcc, 0x0, 0xc7
kernel .text 0xc0100000 0xc014ef40 0xc014efd0
0xc7ff7fcc 0xc014f0e5 do_try_to_free_pages_kswapd+0x65
(0x1d0, 0x2, 0x1d0, 0xc01
kernel .text 0xc0100000 0xc014f080 0xc014f260
0xc7ff7fec 0xc014f442 kswapd+0xe2
kernel .text 0xc0100000
0xc014f360 0xc014f5b0
0xc0107995 kernel_thread_helper+0x5
kernel .text 0xc0100000
0xc0107990 0xc01079a0

Stack traceback for pid 11
0xc7e64000 11 2 0 0 D 0xc7e64420
kupdated
EBP EIP Function (args)
0xc7e65cb0 0xc0122881 schedule+0x1b1 (0x0, 0x1, 0x0,
0xc7e64000, 0x0)
kernel .text 0xc0100000
0xc01226d0 0xc0122a20
0xc7e65ce8 0xc014f6a1 wakeup_kswapd+0xf1 (0x1d2, 0x0,
0x2, 0x1, 0xc0199f42)
kernel .text 0xc0100000
0xc014f5b0 0xc014f6b0
0xc7e65d2c 0xc0151263 __alloc_pages+0xe3
kernel .text 0xc0100000
0xc0151180 0xc01514f0
0xc7e65d34 0xc0151086 _alloc_pages+0x16 (0xc71868c0,
0x140, 0xc7e238ec, 0x1000,
kernel .text 0xc0100000
0xc0151070 0xc0151090
0xc7e65da8 0xc01462c0 generic_file_write+0x3b0
(0xc7e65e20, 0xc16f7000, 0x1000,
kernel .text 0xc0100000
0xc0145f10 0xc01467c0
0xc7e65dcc 0xc0193825 ext3_file_write+0x35 (0xc7e65e20,
0xc16f7000, 0x1000, 0xc7
kernel .text 0xc0100000 0xc01937f0 0xc01938c0
0xc7e65e9c 0xc024de5e cfsd_write+0xde (0xc5ef6380,
0x140000, 0x0, 0xc16f7000, 0x
kernel .text 0xc0100000 0xc024dd80 0xc024ded0
0xc7e65efc 0xc02494ff cfs_proc_write+0x17f (0xc7186c00,
0x140000, 0x0, 0x1000, 0
kernel .text 0xc0100000 0xc0249380 0xc0249540
0xc7e65f38 0xc02463e7 cfs_writepage_sync+0xb7 (0x0,
0xc7186c00, 0xc10619ac, 0x0,
kernel .text 0xc0100000 0xc0246330 0xc02464e0
0xc7e65f68 0xc02465a5 cfs_writepage+0xc5 (0xc10619ac,
0xc7186cd0, 0xc7186cc8, 0x[0]more> 0x0)
kernel .text 0xc0100000
0xc02464e0 0xc0246620

0xc7e65f90 0xc0142eeb filemap_fdatasync+0xcb
(0xc7186cc0, 0xc7e65fb8, 0xc0161252
kernel .text 0xc0100000 0xc0142e20 0xc0142f20
0xc7e65fb8 0xc017672b sync_unlocked_inodes+0xdb (0x0,
0xc7e64000, 0xc7e64000)
kernel .text 0xc0100000
0xc0176650 0xc01769c0
0xc7e65fcc 0xc015ffcb sync_old_buffers+0x2b
(0xc05532a0, 0x1, 0x0, 0xc0160390, 0
kernel .text 0xc0100000 0xc015ffa0 0xc0160050
0xc7e65fec 0xc0160488 kupdate+0xf8
kernel .text 0xc0100000
0xc0160390 0xc01604e0
0xc0107995 kernel_thread_helper+0x5
kernel .text 0xc0100000
0xc0107990 0xc01079a0

Discussion

  • David Zafman

    David Zafman - 2004-04-29

    Logged In: YES
    user_id=297844

    I put in a workaround for this problem:
    openssi/kernel/mm/vmscan.c 1.2.2.7

     
  • David Zafman

    David Zafman - 2004-06-17
    • priority: 5 --> 3
     
  • David Zafman

    David Zafman - 2004-06-30

    Logged In: YES
    user_id=297844

    Similiar problem occurs without kupdated involvement. Here a cfs_async
    process can't get a page to complete a write operation, so it waits for
    kswapd. But kswapd is waiting for some pages to get to disk (I assume it
    is looping because it does have a timeout). Lots of other processes are
    in generic_file_write() because the cfs_async process holds the lock. I
    was running iozone which is writing to a single file.

    0xc1292000 14 2 0 0 D 0xc1292420 cfs_async
    schedule+0x1b6 (0x0, 0xc1292000, 0x0, 0x0, 0x0)
    wakeup_kswapd+0xe8 (0x1d2, 0xc0187fcd, 0xc1d73440, 0x1,
    0xc1530000)
    __alloc_pages+0xc0 (0xc1293e34)
    _alloc_pages+0x16 (0x0, 0x1000, 0xc1092e40, 0x1, 0x0)
    generic_file_write+0x383 (0xc1293ea8, 0xc2d39000, 0x1000,
    0xc1293ec8, 0xc1292000)
    ext3_file_write+0x21 (0xc1293ea8, 0xc2d39000, 0x1000, 0xc1
    293ec8, 0xc1530000)
    cfsd_write+0xae (0xc15786a0, 0x181000, 0x0, 0xc2d39000, 0x1000)
    cfs_proc_write+0x179 (0xc2e0d000, 0x181000, 0x0, 0x1000,
    0xc2d39000)
    cfs_async_handler_write+0xd0 (0xc1d73a34, 0xc127ac74, 0xc1292000,
    0xc026fed0, 0x0)
    kernel_thread_helper+0x5

    0xc129e000 8 2 0 0 D 0xc129e420 kswapd
    schedule+0x1b6 (0xc129feec, 0xc0583f8c, 0xc0583f8c, 0x1f4f
    1, 0xc129e000)
    schedule_timeout+0x52 (0xc1100668, 0x0, 0xc129e000, 0xc110
    066c, 0xc110066c)
    wait_on_page_timeout+0xb9 (0xc107eb80, 0x1f4, 0x129ff70, 0
    xc04e0de0, 0xc10793dc)
    rebalance_laundry_zone+0x23a (0xc04e0820, 0x100, 0x1d0, 0x
    c04e0820, 0x100)
    rebalance_inactive_zone+0x72 (0xc04e0820, 0x100, 0x1d0, 0x
    f, 0x0)
    rebalance_inactive+0x3f (0x1d0, 0xf, 0x0, 0xc129e000, 0x0)
    do_try_to_free_pages_kswapd+0x48 (0x1d0, 0xc0148430, 0x0,
    0x0)
    kswapd+0xd5
    kernel_thread_helper+0x5

     
  • David Zafman

    David Zafman - 2004-06-30

    Logged In: YES
    user_id=297844

    What appeared to be a hang in my last post eventually completed. This
    did cause the machine to thrash badly enough that I thought it was
    deadlocked.

     
  • David Zafman

    David Zafman - 2004-12-11
    • assigned_to: dzafman --> jlbyrne
     
  • Roger Tsang

    Roger Tsang - 2005-09-29

    Logged In: YES
    user_id=1246761

    Closing bug according to last post.

     
  • Roger Tsang

    Roger Tsang - 2005-09-29
    • status: open --> closed-works-for-me
     

Log in to post a comment.