/* mm->page_table_lock is held. mmap_sem is not held */
static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone)
{
pte_t pte;
swp_entry_t entry;
/* Don't look at this pte if it's been accessed recently. */
if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) {
mark_page_accessed(page);
return 0;
}
/* Don't bother unmapping pages that are active */
if (PageActive(page))
return 0;
/* Don't bother replenishing zones not under pressure.. */
if (!memclass(page_zone(page), classzone))
return 0;
if (TryLockPage(page))
return 0;
/* From this point on, the odds are that we're going to
* nuke this pte, so read and clear the pte. This hook
* is needed on CPUs which update the accessed and dirty
* bits in hardware.
*/
flush_cache_page(vma, address);
pte = ptep_get_and_clear(page_table);
flush_tlb_page(vma, address);
if (pte_dirty(pte))
set_page_dirty(page);
/*
* Is the page already in the swap cache? If so, then
* we can just drop our reference to it without doing
* any IO - it's already up-to-date on disk.
*/
if (PageSwapCache(page)) {
entry.val = page->index;
swap_duplicate(entry);
set_swap_pte:
set_pte(page_table, swp_entry_to_pte(entry));
drop_pte:
mm->rss--;
UnlockPage(page);
{
int freeable = page_count(page) - !!page->buffers <= 2;
page_cache_release(page);
return freeable;
}
}
/*
* Is it a clean page? Then it must be recoverable
* by just paging it in again, and we can just drop
* it.. or if it's dirty but has backing store,
* just mark the page dirty and drop it.
*
* However, this won't actually free any real
* memory, as the page will just be in the page cache
* somewhere, and as such we should just continue
* our scan.
*
* Basically, this just makes it possible for us to do
* some real work in the future in "refill_inactive()".
*/
if (page->mapping)
goto drop_pte;
if (!PageDirty(page))
goto drop_pte;
/*
* Anonymous buffercache pages can be left behind by
* concurrent truncate and pagefault.
*/
if (page->buffers)
goto preserve;
/*
* This is a dirty, swappable page. First of all,
* get a suitable swap entry for it, and make sure
* we have the swap cache set up to associate the
* page with that swap entry.
*/
for (;;) {
entry = get_swap_page();
if (!entry.val)
break;
/* Add it to the swap cache and mark it dirty
* (adding to the page cache will clear the dirty
* and uptodate bits, so we need to do it again)
*/
if (add_to_swap_cache(page, entry) == 0) {
SetPageUptodate(page);
set_page_dirty(page);
goto set_swap_pte;
}
//.........这里部分代码省略.........
static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
struct page *dir_page = NULL;
struct nilfs_dir_entry *dir_de = NULL;
struct page *old_page;
struct nilfs_dir_entry *old_de;
struct nilfs_transaction_info ti;
int err;
err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1);
if (unlikely(err))
return err;
err = -ENOENT;
old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_de)
goto out;
if (S_ISDIR(old_inode->i_mode)) {
err = -EIO;
dir_de = nilfs_dotdot(old_inode, &dir_page);
if (!dir_de)
goto out_old;
}
if (new_inode) {
struct page *new_page;
struct nilfs_dir_entry *new_de;
err = -ENOTEMPTY;
if (dir_de && !nilfs_empty_dir(new_inode))
goto out_dir;
err = -ENOENT;
new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
nilfs_set_link(new_dir, new_de, new_page, old_inode);
nilfs_mark_inode_dirty(new_dir);
new_inode->i_ctime = CURRENT_TIME;
if (dir_de)
drop_nlink(new_inode);
drop_nlink(new_inode);
nilfs_mark_inode_dirty(new_inode);
} else {
err = nilfs_add_link(new_dentry, old_inode);
if (err)
goto out_dir;
if (dir_de) {
inc_nlink(new_dir);
nilfs_mark_inode_dirty(new_dir);
}
}
/*
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
old_inode->i_ctime = CURRENT_TIME;
nilfs_delete_entry(old_de, old_page);
if (dir_de) {
nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
drop_nlink(old_dir);
}
nilfs_mark_inode_dirty(old_dir);
nilfs_mark_inode_dirty(old_inode);
err = nilfs_transaction_commit(old_dir->i_sb);
return err;
out_dir:
if (dir_de) {
kunmap(dir_page);
page_cache_release(dir_page);
}
out_old:
kunmap(old_page);
page_cache_release(old_page);
out:
nilfs_transaction_abort(old_dir->i_sb);
return err;
}
开发者ID:DenisLug,项目名称:mptcp,代码行数:87,代码来源:namei.c
示例3: zswap_writeback_entry
/*
* Attempts to free an entry by adding a page to the swap cache,
* decompressing the entry data into the page, and issuing a
* bio write to write the page back to the swap device.
*
* This can be thought of as a "resumed writeback" of the page
* to the swap device. We are basically resuming the same swap
* writeback path that was intercepted with the frontswap_store()
* in the first place. After the page has been decompressed into
* the swap cache, the compressed version stored by zswap can be
* freed.
*/
static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
{
struct zswap_header *zhdr;
swp_entry_t swpentry;
struct zswap_tree *tree;
pgoff_t offset;
struct zswap_entry *entry;
struct page *page;
u8 *src, *dst;
unsigned int dlen;
int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
};
/* extract swpentry from data */
zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
swpentry = zhdr->swpentry; /* here */
zpool_unmap_handle(pool, handle);
tree = zswap_trees[swp_type(swpentry)];
offset = swp_offset(swpentry);
/* find and ref zswap entry */
spin_lock(&tree->lock);
entry = zswap_entry_find_get(&tree->rbroot, offset);
if (!entry) {
/* entry was invalidated */
spin_unlock(&tree->lock);
return 0;
}
spin_unlock(&tree->lock);
BUG_ON(offset != entry->offset);
/* try to allocate swap cache page */
switch (zswap_get_swap_cache_page(swpentry, &page)) {
case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
ret = -ENOMEM;
goto fail;
case ZSWAP_SWAPCACHE_EXIST:
/* page is already in the swap cache, ignore for now */
page_cache_release(page);
ret = -EEXIST;
goto fail;
case ZSWAP_SWAPCACHE_NEW: /* page is locked */
/* decompress */
dlen = PAGE_SIZE;
src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
ZPOOL_MM_RO) + sizeof(struct zswap_header);
dst = kmap_atomic(page);
ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
entry->length, dst, &dlen);
kunmap_atomic(dst);
zpool_unmap_handle(zswap_pool, entry->handle);
BUG_ON(ret);
BUG_ON(dlen != PAGE_SIZE);
/* page is up to date */
SetPageUptodate(page);
}
/* move it to the tail of the inactive list after end_writeback */
SetPageReclaim(page);
/* start writeback */
__swap_writepage(page, &wbc, end_swap_bio_write);
page_cache_release(page);
zswap_written_back_pages++;
spin_lock(&tree->lock);
/* drop local reference */
zswap_entry_put(tree, entry);
/*
* There are two possible situations for entry here:
* (1) refcount is 1(normal case), entry is valid and on the tree
* (2) refcount is 0, entry is freed and not on the tree
* because invalidate happened during writeback
* search the tree and free the entry if find entry
*/
if (entry == zswap_rb_search(&tree->rbroot, offset))
zswap_entry_put(tree, entry);
spin_unlock(&tree->lock);
goto end;
/*
//.........这里部分代码省略.........
/*
* Prepare the write for the inline data.
* If the the data can be written into the inode, we just read
* the page and make it uptodate, and start the journal.
* Otherwise read the page, makes it dirty so that it can be
* handle in writepages(the i_disksize update is left to the
* normal ext4_da_write_end).
*/
int ext4_da_write_inline_data_begin(struct address_space *mapping,
struct inode *inode,
loff_t pos, unsigned len,
unsigned flags,
struct page **pagep,
void **fsdata)
{
int ret, inline_size;
handle_t *handle;
struct page *page;
struct ext4_iloc iloc;
ret = ext4_get_inode_loc(inode, &iloc);
if (ret)
return ret;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
handle = NULL;
goto out;
}
inline_size = ext4_get_max_inline_size(inode);
ret = -ENOSPC;
if (inline_size >= pos + len) {
ret = ext4_prepare_inline_data(handle, inode, pos + len);
if (ret && ret != -ENOSPC)
goto out;
}
if (ret == -ENOSPC) {
ret = ext4_da_convert_inline_data_to_extent(mapping,
inode,
flags,
fsdata);
goto out;
}
/*
* We cannot recurse into the filesystem as the transaction
* is already started.
*/
flags |= AOP_FLAG_NOFS;
page = grab_cache_page_write_begin(mapping, 0, flags);
if (!page) {
ret = -ENOMEM;
goto out;
}
down_read(&EXT4_I(inode)->xattr_sem);
if (!ext4_has_inline_data(inode)) {
ret = 0;
goto out_release_page;
}
if (!PageUptodate(page)) {
ret = ext4_read_inline_page(inode, page);
if (ret < 0)
goto out_release_page;
}
up_read(&EXT4_I(inode)->xattr_sem);
*pagep = page;
handle = NULL;
brelse(iloc.bh);
return 1;
out_release_page:
up_read(&EXT4_I(inode)->xattr_sem);
unlock_page(page);
page_cache_release(page);
out:
if (handle)
ext4_journal_stop(handle);
brelse(iloc.bh);
return ret;
}
int rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, uint32_t fAccess, RTR0PROCESS R0Process)
{
const int cPages = cb >> PAGE_SHIFT;
struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
struct vm_area_struct **papVMAs;
PRTR0MEMOBJLNX pMemLnx;
int rc = VERR_NO_MEMORY;
NOREF(fAccess);
/*
* Check for valid task and size overflows.
*/
if (!pTask)
return VERR_NOT_SUPPORTED;
if (((size_t)cPages << PAGE_SHIFT) != cb)
return VERR_OUT_OF_RANGE;
/*
* Allocate the memory object and a temporary buffer for the VMAs.
*/
pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb);
if (!pMemLnx)
return VERR_NO_MEMORY;
papVMAs = (struct vm_area_struct **)RTMemAlloc(sizeof(*papVMAs) * cPages);
if (papVMAs)
{
down_read(&pTask->mm->mmap_sem);
/*
* Get user pages.
*/
rc = get_user_pages(pTask, /* Task for fault acounting. */
pTask->mm, /* Whose pages. */
R3Ptr, /* Where from. */
cPages, /* How many pages. */
1, /* Write to memory. */
0, /* force. */
&pMemLnx->apPages[0], /* Page array. */
papVMAs); /* vmas */
if (rc == cPages)
{
/*
* Flush dcache (required?), protect against fork and _really_ pin the page
* table entries. get_user_pages() will protect against swapping out the
* pages but it will NOT protect against removing page table entries. This
* can be achieved with
* - using mlock / mmap(..., MAP_LOCKED, ...) from userland. This requires
* an appropriate limit set up with setrlimit(..., RLIMIT_MEMLOCK, ...).
* Usual Linux distributions support only a limited size of locked pages
* (e.g. 32KB).
* - setting the PageReserved bit (as we do in rtR0MemObjLinuxAllocPages()
* or by
* - setting the VM_LOCKED flag. This is the same as doing mlock() without
* a range check.
*/
/** @todo The Linux fork() protection will require more work if this API
* is to be used for anything but locking VM pages. */
while (rc-- > 0)
{
flush_dcache_page(pMemLnx->apPages[rc]);
papVMAs[rc]->vm_flags |= (VM_DONTCOPY | VM_LOCKED);
}
up_read(&pTask->mm->mmap_sem);
RTMemFree(papVMAs);
pMemLnx->Core.u.Lock.R0Process = R0Process;
pMemLnx->cPages = cPages;
Assert(!pMemLnx->fMappedToRing0);
*ppMem = &pMemLnx->Core;
return VINF_SUCCESS;
}
/*
* Failed - we need to unlock any pages that we succeeded to lock.
*/
while (rc-- > 0)
{
if (!PageReserved(pMemLnx->apPages[rc]))
SetPageDirty(pMemLnx->apPages[rc]);
page_cache_release(pMemLnx->apPages[rc]);
}
up_read(&pTask->mm->mmap_sem);
RTMemFree(papVMAs);
rc = VERR_LOCK_FAILED;
}
rtR0MemObjDelete(&pMemLnx->Core);
return rc;
}
/*
* Locate a page of swap in physical memory, reserving swap cache space
* and reading the disk if it is not already cached.
* A failure return means that either the page allocation failed or that
* the swap entry is no longer in use.
*/
struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
struct vm_area_struct *vma, unsigned long addr)
{
struct page *found_page, *new_page = NULL;
int err;
do {
/*
* First check the swap cache. Since this is normally
* called after lookup_swap_cache() failed, re-calling
* that would confuse statistics.
*/
found_page = find_get_page(&swapper_space, entry.val);
if (found_page)
break;
/*
* Get a new page to read into from swap.
*/
if (!new_page) {
new_page = alloc_page_vma(gfp_mask, vma, addr);
if (!new_page)
break; /* Out of memory */
}
/*
* call radix_tree_preload() while we can wait.
*/
err = radix_tree_preload(gfp_mask & GFP_KERNEL);
if (err)
break;
/*
* Swap entry may have been freed since our caller observed it.
*/
err = swapcache_prepare(entry);
if (err == -EEXIST) {
radix_tree_preload_end();
/*
* We might race against get_swap_page() and stumble
* across a SWAP_HAS_CACHE swap_map entry whose page
* has not been brought into the swapcache yet, while
* the other end is scheduled away waiting on discard
* I/O completion at scan_swap_map().
*
* In order to avoid turning this transitory state
* into a permanent loop around this -EEXIST case
* if !CONFIG_PREEMPT and the I/O completion happens
* to be waiting on the CPU waitqueue where we are now
* busy looping, we just conditionally invoke the
* scheduler here, if there are some more important
* tasks to run.
*/
cond_resched();
continue;
}
if (err) { /* swp entry is obsolete ? */
radix_tree_preload_end();
break;
}
/* May fail (-ENOMEM) if radix-tree node allocation failed. */
__set_page_locked(new_page);
SetPageSwapBacked(new_page);
err = __add_to_swap_cache(new_page, entry);
if (likely(!err)) {
radix_tree_preload_end();
/*
* Initiate read into locked page and return.
*/
lru_cache_add_anon(new_page);
swap_readpage(new_page);
return new_page;
}
radix_tree_preload_end();
ClearPageSwapBacked(new_page);
__clear_page_locked(new_page);
/*
* add_to_swap_cache() doesn't return -EEXIST, so we can safely
* clear SWAP_HAS_CACHE flag.
*/
swapcache_free(entry, NULL);
} while (err != -ENOMEM);
if (new_page)
page_cache_release(new_page);
return found_page;
}
/*
* Perform a free_page(), also freeing any swap cache associated with
* this page if it is the last user of the page.
*/
void free_page_and_swap_cache(struct page *page)
{
free_swap_cache(page);
page_cache_release(page);
}
/*
* Try to write data in the inode.
* If the inode has inline data, check whether the new write can be
* in the inode also. If not, create the page the handle, move the data
* to the page make it update and let the later codes create extent for it.
*/
int ext4_try_to_write_inline_data(struct address_space *mapping,
struct inode *inode,
loff_t pos, unsigned len,
unsigned flags,
struct page **pagep)
{
int ret;
handle_t *handle;
struct page *page;
struct ext4_iloc iloc;
if (pos + len > ext4_get_max_inline_size(inode))
goto convert;
ret = ext4_get_inode_loc(inode, &iloc);
if (ret)
return ret;
/*
* The possible write could happen in the inode,
* so try to reserve the space in inode first.
*/
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
handle = NULL;
goto out;
}
ret = ext4_prepare_inline_data(handle, inode, pos + len);
if (ret && ret != -ENOSPC)
goto out;
/* We don't have space in inline inode, so convert it to extent. */
if (ret == -ENOSPC) {
ext4_journal_stop(handle);
brelse(iloc.bh);
goto convert;
}
flags |= AOP_FLAG_NOFS;
page = grab_cache_page_write_begin(mapping, 0, flags);
if (!page) {
ret = -ENOMEM;
goto out;
}
*pagep = page;
down_read(&EXT4_I(inode)->xattr_sem);
if (!ext4_has_inline_data(inode)) {
ret = 0;
unlock_page(page);
page_cache_release(page);
goto out_up_read;
}
if (!PageUptodate(page)) {
ret = ext4_read_inline_page(inode, page);
if (ret < 0)
goto out_up_read;
}
ret = 1;
handle = NULL;
out_up_read:
up_read(&EXT4_I(inode)->xattr_sem);
out:
if (handle)
ext4_journal_stop(handle);
brelse(iloc.bh);
return ret;
convert:
return ext4_convert_inline_data_to_extent(mapping,
inode, flags);
}
请发表评论