[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-kernel
Subject:    XFS memory allocation deadlock in 2.6.38
From:       Sean Noonan <Sean.Noonan () twosigma ! com>
Date:       2011-03-21 16:19:44
Message-ID: 081DDE43F61F3D43929A181B477DCA95639B52FD () MSXAOA6 ! twosigma ! com
[Download message RAW]

This message was originally posted to the XFS mailing list, but received no \
responses.  Thus, I am sending it to LKML on the advice of Martin.

Using the attached program, we are able to reproduce this bug reliably.
$ make vmtest
$ ./vmtest /xfs/hugefile.dat $(( 16 * 1024 * 1024 * 1024 )) # vmtest <path_to_file> \
                <size_in_bytes>
/xfs/hugefile.dat: mapped 17179869184 bytes in 33822066943 ticks
749660: avg 13339 max 234667 ticks
371945: avg 26885 max 281616 ticks
---
At this point, we see the following on the console:
[593492.694806] XFS: possible memory allocation deadlock in kmem_alloc (mode:0x250)
[593506.724367] XFS: possible memory allocation deadlock in kmem_alloc (mode:0x250)
[593524.837717] XFS: possible memory allocation deadlock in kmem_alloc (mode:0x250)
[593556.742386] XFS: possible memory allocation deadlock in kmem_alloc (mode:0x250)

This is the same message presented in
http://oss.sgi.com/bugzilla/show_bug.cgi?id=410

We started testing with 2.6.38-rc7 and have seen this bug through to the .0 release.  \
This does not appear to be present in 2.6.33, but we have not done testing in \
between.  We have tested with ext4 and do not encounter this bug. CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
# CONFIG_XFS_DEBUG is not set
# CONFIG_VXFS_FS is not set

Here is the stack from the process:
[<ffffffff81357553>] call_rwsem_down_write_failed+0x13/0x20
[<ffffffff812ddf1e>] xfs_ilock+0x7e/0x110
[<ffffffff8130132f>] __xfs_get_blocks+0x8f/0x4e0
[<ffffffff813017b1>] xfs_get_blocks+0x11/0x20
[<ffffffff8114ba3e>] __block_write_begin+0x1ee/0x5b0
[<ffffffff8114be9d>] block_page_mkwrite+0x9d/0xf0
[<ffffffff81307e05>] xfs_vm_page_mkwrite+0x15/0x20
[<ffffffff810f2ddb>] do_wp_page+0x54b/0x820
[<ffffffff810f347c>] handle_pte_fault+0x3cc/0x820
[<ffffffff810f5145>] handle_mm_fault+0x175/0x2f0
[<ffffffff8102e399>] do_page_fault+0x159/0x470
[<ffffffff816cf6cf>] page_fault+0x1f/0x30
[<ffffffffffffffff>] 0xffffffffffffffff

# uname -a
Linux testhost 2.6.38 #2 SMP PREEMPT Fri Mar 18 15:00:59 GMT 2011 x86_64 GNU/Linux

Please let me know if additional information is required.

Thanks!

Sean


["vmtest.c" (text/plain)]

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdint.h>
#include <inttypes.h>
#include <errno.h>
#include <fcntl.h>
#include <err.h>


static inline uint64_t
rdtsc(void)
{
    uint32_t low, high;
    __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
    return low | ((uint64_t) high) << 32;
}


void *
mmapfile(
    const char * filename,
    uint64_t len
)
{
    int perms = 0666;
    int open_flag = O_RDWR | O_CREAT;
    int mmap_flags = PROT_READ | PROT_WRITE;

    const int fd = open(filename, open_flag, perms);
    if (fd < 0)
	goto fail;

    // Ensure that the file is empty and the right size
    if (ftruncate(fd, 0) < 0)
	goto fail;

    if (ftruncate(fd, len) < 0)
	goto fail;

    // Map the entire actual length of the file
    void * const base = mmap(
	NULL,
	len,
	mmap_flags,
	MAP_SHARED | MAP_POPULATE,
	fd,
	0
    );
    if (base == MAP_FAILED)
	goto fail;

    close(fd);
    return base;

fail:
    err(1, "%s: Unable to map %"PRIu64" bytes", filename, len);
}


int main(
    int argc,
    char ** argv
)
{
    const char * filename = argv[1];
    const uint64_t len = argc > 2 ? strtoul(argv[2], NULL, 0) : (5ul << 30);
    const uint64_t max_index = len / sizeof(uint64_t);

    uint64_t mmap_time = -rdtsc();
    uint64_t * const buf = mmapfile(filename, len);
    mmap_time += rdtsc();
    fprintf(stderr, "%s: mapped %"PRIu64" bytes in %"PRIu64" ticks\n",
	filename,
	len,
	mmap_time
    );

    while (1)
    {
	uint64_t max = 0;
	uint64_t sum = 0;
	uint64_t i;
	const uint64_t loop_start = rdtsc();
	const uint64_t iters = 1 << 30;

	uint64_t start = loop_start;
	for (i = 0 ; i < iters ; i++)
	{
	    uint64_t i = lrand48() % max_index;
	    buf[i] += start;

	    uint64_t end = rdtsc();
	    const uint64_t delta = end - start;
	    start = end;

	    sum += delta;
	    if (delta > max)
		max = delta;

	    // Force a report every 10 billion ticks ~= 3 seconds
	    if (end - loop_start > 10e9)
		break;
	}

	printf("%"PRIu64": avg %"PRIu64" max %"PRIu64" ticks\n",
	    i,
	    i ? sum / i : 0,
	    max
	);
    }

    return 0;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic