aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c588
1 files changed, 166 insertions, 422 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5715279..755ee81 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -37,7 +37,6 @@
#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_bmap.h"
@@ -52,7 +51,7 @@ kmem_zone_t *xfs_ifork_zone;
kmem_zone_t *xfs_inode_zone;
/*
- * Used in xfs_itruncate(). This is the maximum number of extents
+ * Used in xfs_itruncate_extents(). This is the maximum number of extents
* freed from a file in a single transaction.
*/
#define XFS_ITRUNC_MAX_EXTENTS 2
@@ -167,7 +166,7 @@ xfs_imap_to_bp(
dip = (xfs_dinode_t *)xfs_buf_offset(bp,
(i << mp->m_sb.sb_inodelog));
- di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC &&
+ di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
XFS_DINODE_GOOD_VERSION(dip->di_version);
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
XFS_ERRTAG_ITOBP_INOTOBP,
@@ -191,12 +190,6 @@ xfs_imap_to_bp(
}
xfs_inobp_check(mp, bp);
-
- /*
- * Mark the buffer as an inode buffer now that it looks good
- */
- XFS_BUF_SET_VTYPE(bp, B_FS_INO);
-
*bpp = bp;
return 0;
}
@@ -369,7 +362,7 @@ xfs_iformat(
/*
* no local regular files yet
*/
- if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
+ if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
xfs_warn(ip->i_mount,
"corrupt inode %Lu (local format for regular file).",
(unsigned long long) ip->i_ino);
@@ -802,7 +795,7 @@ xfs_iread(
* If we got something that isn't an inode it means someone
* (nfs or dmi) has a stale handle.
*/
- if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
+ if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
#ifdef DEBUG
xfs_alert(mp,
"%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
@@ -1041,7 +1034,7 @@ xfs_ialloc(
if (pip && XFS_INHERIT_GID(pip)) {
ip->i_d.di_gid = pip->i_d.di_gid;
- if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
+ if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
ip->i_d.di_mode |= S_ISGID;
}
}
@@ -1098,14 +1091,14 @@ xfs_ialloc(
if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
uint di_flags = 0;
- if ((mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(mode)) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
di_flags |= XFS_DIFLAG_EXTSZINHERIT;
ip->i_d.di_extsize = pip->i_d.di_extsize;
}
- } else if ((mode & S_IFMT) == S_IFREG) {
+ } else if (S_ISREG(mode)) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_REALTIME;
if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
@@ -1153,7 +1146,7 @@ xfs_ialloc(
/*
* Log the new values stuffed into the inode.
*/
- xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, flags);
/* now that we have an i_mode we can setup inode ops and unlock */
@@ -1179,17 +1172,18 @@ xfs_ialloc(
* at least do it for regular files.
*/
#ifdef DEBUG
-void
+STATIC void
xfs_isize_check(
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- xfs_fsize_t isize)
+ struct xfs_inode *ip,
+ xfs_fsize_t isize)
{
- xfs_fileoff_t map_first;
- int nimaps;
- xfs_bmbt_irec_t imaps[2];
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t map_first;
+ int nimaps;
+ xfs_bmbt_irec_t imaps[2];
+ int error;
- if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
+ if (!S_ISREG(ip->i_d.di_mode))
return;
if (XFS_IS_REALTIME_INODE(ip))
@@ -1204,178 +1198,23 @@ xfs_isize_check(
* The filesystem could be shutting down, so bmapi may return
* an error.
*/
- if (xfs_bmapi(NULL, ip, map_first,
+ error = xfs_bmapi_read(ip, map_first,
(XFS_B_TO_FSB(mp,
- (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) -
- map_first),
- XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps,
- NULL))
- return;
+ (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first),
+ imaps, &nimaps, XFS_BMAPI_ENTIRE);
+ if (error)
+ return;
ASSERT(nimaps == 1);
ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
}
+#else /* DEBUG */
+#define xfs_isize_check(ip, isize)
#endif /* DEBUG */
/*
- * Calculate the last possible buffered byte in a file. This must
- * include data that was buffered beyond the EOF by the write code.
- * This also needs to deal with overflowing the xfs_fsize_t type
- * which can happen for sizes near the limit.
- *
- * We also need to take into account any blocks beyond the EOF. It
- * may be the case that they were buffered by a write which failed.
- * In that case the pages will still be in memory, but the inode size
- * will never have been updated.
- */
-STATIC xfs_fsize_t
-xfs_file_last_byte(
- xfs_inode_t *ip)
-{
- xfs_mount_t *mp;
- xfs_fsize_t last_byte;
- xfs_fileoff_t last_block;
- xfs_fileoff_t size_last_block;
- int error;
-
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
-
- mp = ip->i_mount;
- /*
- * Only check for blocks beyond the EOF if the extents have
- * been read in. This eliminates the need for the inode lock,
- * and it also saves us from looking when it really isn't
- * necessary.
- */
- if (ip->i_df.if_flags & XFS_IFEXTENTS) {
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- error = xfs_bmap_last_offset(NULL, ip, &last_block,
- XFS_DATA_FORK);
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- if (error) {
- last_block = 0;
- }
- } else {
- last_block = 0;
- }
- size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size);
- last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
-
- last_byte = XFS_FSB_TO_B(mp, last_block);
- if (last_byte < 0) {
- return XFS_MAXIOFFSET(mp);
- }
- last_byte += (1 << mp->m_writeio_log);
- if (last_byte < 0) {
- return XFS_MAXIOFFSET(mp);
- }
- return last_byte;
-}
-
-/*
- * Start the truncation of the file to new_size. The new size
- * must be smaller than the current size. This routine will
- * clear the buffer and page caches of file data in the removed
- * range, and xfs_itruncate_finish() will remove the underlying
- * disk blocks.
- *
- * The inode must have its I/O lock locked EXCLUSIVELY, and it
- * must NOT have the inode lock held at all. This is because we're
- * calling into the buffer/page cache code and we can't hold the
- * inode lock when we do so.
- *
- * We need to wait for any direct I/Os in flight to complete before we
- * proceed with the truncate. This is needed to prevent the extents
- * being read or written by the direct I/Os from being removed while the
- * I/O is in flight as there is no other method of synchronising
- * direct I/O with the truncate operation. Also, because we hold
- * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
- * started until the truncate completes and drops the lock. Essentially,
- * the xfs_ioend_wait() call forms an I/O barrier that provides strict
- * ordering between direct I/Os and the truncate operation.
- *
- * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
- * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used
- * in the case that the caller is locking things out of order and
- * may not be able to call xfs_itruncate_finish() with the inode lock
- * held without dropping the I/O lock. If the caller must drop the
- * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start()
- * must be called again with all the same restrictions as the initial
- * call.
- */
-int
-xfs_itruncate_start(
- xfs_inode_t *ip,
- uint flags,
- xfs_fsize_t new_size)
-{
- xfs_fsize_t last_byte;
- xfs_off_t toss_start;
- xfs_mount_t *mp;
- int error = 0;
-
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
- ASSERT((new_size == 0) || (new_size <= ip->i_size));
- ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
- (flags == XFS_ITRUNC_MAYBE));
-
- mp = ip->i_mount;
-
- /* wait for the completion of any pending DIOs */
- if (new_size == 0 || new_size < ip->i_size)
- xfs_ioend_wait(ip);
-
- /*
- * Call toss_pages or flushinval_pages to get rid of pages
- * overlapping the region being removed. We have to use
- * the less efficient flushinval_pages in the case that the
- * caller may not be able to finish the truncate without
- * dropping the inode's I/O lock. Make sure
- * to catch any pages brought in by buffers overlapping
- * the EOF by searching out beyond the isize by our
- * block size. We round new_size up to a block boundary
- * so that we don't toss things on the same block as
- * new_size but before it.
- *
- * Before calling toss_page or flushinval_pages, make sure to
- * call remapf() over the same region if the file is mapped.
- * This frees up mapped file references to the pages in the
- * given range and for the flushinval_pages case it ensures
- * that we get the latest mapped changes flushed out.
- */
- toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
- toss_start = XFS_FSB_TO_B(mp, toss_start);
- if (toss_start < 0) {
- /*
- * The place to start tossing is beyond our maximum
- * file size, so there is no way that the data extended
- * out there.
- */
- return 0;
- }
- last_byte = xfs_file_last_byte(ip);
- trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
- if (last_byte > toss_start) {
- if (flags & XFS_ITRUNC_DEFINITE) {
- xfs_tosspages(ip, toss_start,
- -1, FI_REMAPF_LOCKED);
- } else {
- error = xfs_flushinval_pages(ip, toss_start,
- -1, FI_REMAPF_LOCKED);
- }
- }
-
-#ifdef DEBUG
- if (new_size == 0) {
- ASSERT(VN_CACHED(VFS_I(ip)) == 0);
- }
-#endif
- return error;
-}
-
-/*
- * Shrink the file to the given new_size. The new size must be smaller than
- * the current size. This will free up the underlying blocks in the removed
- * range after a call to xfs_itruncate_start() or xfs_atruncate_start().
+ * Free up the underlying blocks past new_size. The new size must be smaller
+ * than the current size. This routine can be used both for the attribute and
+ * data fork, and does not modify the inode size, which is left to the caller.
*
* The transaction passed to this routine must have made a permanent log
* reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
@@ -1387,31 +1226,6 @@ xfs_itruncate_start(
* will be "held" within the returned transaction. This routine does NOT
* require any disk space to be reserved for it within the transaction.
*
- * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it
- * indicates the fork which is to be truncated. For the attribute fork we only
- * support truncation to size 0.
- *
- * We use the sync parameter to indicate whether or not the first transaction
- * we perform might have to be synchronous. For the attr fork, it needs to be
- * so if the unlink of the inode is not yet known to be permanent in the log.
- * This keeps us from freeing and reusing the blocks of the attribute fork
- * before the unlink of the inode becomes permanent.
- *
- * For the data fork, we normally have to run synchronously if we're being
- * called out of the inactive path or we're being called out of the create path
- * where we're truncating an existing file. Either way, the truncate needs to
- * be sync so blocks don't reappear in the file with altered data in case of a
- * crash. wsync filesystems can run the first case async because anything that
- * shrinks the inode has to run sync so by the time we're called here from
- * inactive, the inode size is permanently set to 0.
- *
- * Calls from the truncate path always need to be sync unless we're in a wsync
- * filesystem and the file has already been unlinked.
- *
- * The caller is responsible for correctly setting the sync parameter. It gets
- * too hard for us to guess here which path we're being called out of just
- * based on inode state.
- *
* If we get an error, we must return with the inode locked and linked into the
* current transaction. This keeps things simple for the higher level code,
* because it always knows that the inode is locked and held in the transaction
@@ -1419,116 +1233,30 @@ xfs_itruncate_start(
* dirty on error so that transactions can be easily aborted if possible.
*/
int
-xfs_itruncate_finish(
- xfs_trans_t **tp,
- xfs_inode_t *ip,
- xfs_fsize_t new_size,
- int fork,
- int sync)
+xfs_itruncate_extents(
+ struct xfs_trans **tpp,
+ struct xfs_inode *ip,
+ int whichfork,
+ xfs_fsize_t new_size)
{
- xfs_fsblock_t first_block;
- xfs_fileoff_t first_unmap_block;
- xfs_fileoff_t last_block;
- xfs_filblks_t unmap_len=0;
- xfs_mount_t *mp;
- xfs_trans_t *ntp;
- int done;
- int committed;
- xfs_bmap_free_t free_list;
- int error;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp = *tpp;
+ struct xfs_trans *ntp;
+ xfs_bmap_free_t free_list;
+ xfs_fsblock_t first_block;
+ xfs_fileoff_t first_unmap_block;
+ xfs_fileoff_t last_block;
+ xfs_filblks_t unmap_len;
+ int committed;
+ int error = 0;
+ int done = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- ASSERT((new_size == 0) || (new_size <= ip->i_size));
- ASSERT(*tp != NULL);
- ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
- ASSERT(ip->i_transp == *tp);
+ ASSERT(new_size <= ip->i_size);
+ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(ip->i_itemp != NULL);
ASSERT(ip->i_itemp->ili_lock_flags == 0);
-
-
- ntp = *tp;
- mp = (ntp)->t_mountp;
- ASSERT(! XFS_NOT_DQATTACHED(mp, ip));
-
- /*
- * We only support truncating the entire attribute fork.
- */
- if (fork == XFS_ATTR_FORK) {
- new_size = 0LL;
- }
- first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
- trace_xfs_itruncate_finish_start(ip, new_size);
-
- /*
- * The first thing we do is set the size to new_size permanently
- * on disk. This way we don't have to worry about anyone ever
- * being able to look at the data being freed even in the face
- * of a crash. What we're getting around here is the case where
- * we free a block, it is allocated to another file, it is written
- * to, and then we crash. If the new data gets written to the
- * file but the log buffers containing the free and reallocation
- * don't, then we'd end up with garbage in the blocks being freed.
- * As long as we make the new_size permanent before actually
- * freeing any blocks it doesn't matter if they get written to.
- *
- * The callers must signal into us whether or not the size
- * setting here must be synchronous. There are a few cases
- * where it doesn't have to be synchronous. Those cases
- * occur if the file is unlinked and we know the unlink is
- * permanent or if the blocks being truncated are guaranteed
- * to be beyond the inode eof (regardless of the link count)
- * and the eof value is permanent. Both of these cases occur
- * only on wsync-mounted filesystems. In those cases, we're
- * guaranteed that no user will ever see the data in the blocks
- * that are being truncated so the truncate can run async.
- * In the free beyond eof case, the file may wind up with
- * more blocks allocated to it than it needs if we crash
- * and that won't get fixed until the next time the file
- * is re-opened and closed but that's ok as that shouldn't
- * be too many blocks.
- *
- * However, we can't just make all wsync xactions run async
- * because there's one call out of the create path that needs
- * to run sync where it's truncating an existing file to size
- * 0 whose size is > 0.
- *
- * It's probably possible to come up with a test in this
- * routine that would correctly distinguish all the above
- * cases from the values of the function parameters and the
- * inode state but for sanity's sake, I've decided to let the
- * layers above just tell us. It's simpler to correctly figure
- * out in the layer above exactly under what conditions we
- * can run async and I think it's easier for others read and
- * follow the logic in case something has to be changed.
- * cscope is your friend -- rcc.
- *
- * The attribute fork is much simpler.
- *
- * For the attribute fork we allow the caller to tell us whether
- * the unlink of the inode that led to this call is yet permanent
- * in the on disk log. If it is not and we will be freeing extents
- * in this inode then we make the first transaction synchronous
- * to make sure that the unlink is permanent by the time we free
- * the blocks.
- */
- if (fork == XFS_DATA_FORK) {
- if (ip->i_d.di_nextents > 0) {
- /*
- * If we are not changing the file size then do
- * not update the on-disk file size - we may be
- * called from xfs_inactive_free_eofblocks(). If we
- * update the on-disk file size and then the system
- * crashes before the contents of the file are
- * flushed to disk then the files may be full of
- * holes (ie NULL files bug).
- */
- if (ip->i_size != new_size) {
- ip->i_d.di_size = new_size;
- ip->i_size = new_size;
- xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
- }
- }
- }
+ ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
/*
* Since it is possible for space to become allocated beyond
@@ -1539,128 +1267,142 @@ xfs_itruncate_finish(
* beyond the maximum file size (ie it is the same as last_block),
* then there is nothing to do.
*/
+ first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
- ASSERT(first_unmap_block <= last_block);
- done = 0;
- if (last_block == first_unmap_block) {
- done = 1;
- } else {
- unmap_len = last_block - first_unmap_block + 1;
- }
+ if (first_unmap_block == last_block)
+ return 0;
+
+ ASSERT(first_unmap_block < last_block);
+ unmap_len = last_block - first_unmap_block + 1;
while (!done) {
- /*
- * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi()
- * will tell us whether it freed the entire range or
- * not. If this is a synchronous mount (wsync),
- * then we can tell bunmapi to keep all the
- * transactions asynchronous since the unlink
- * transaction that made this inode inactive has
- * already hit the disk. There's no danger of
- * the freed blocks being reused, there being a
- * crash, and the reused blocks suddenly reappearing
- * in this file with garbage in them once recovery
- * runs.
- */
xfs_bmap_init(&free_list, &first_block);
- error = xfs_bunmapi(ntp, ip,
+ error = xfs_bunmapi(tp, ip,
first_unmap_block, unmap_len,
- xfs_bmapi_aflag(fork),
+ xfs_bmapi_aflag(whichfork),
XFS_ITRUNC_MAX_EXTENTS,
&first_block, &free_list,
&done);
- if (error) {
- /*
- * If the bunmapi call encounters an error,
- * return to the caller where the transaction
- * can be properly aborted. We just need to
- * make sure we're not holding any resources
- * that we were not when we came in.
- */
- xfs_bmap_cancel(&free_list);
- return error;
- }
+ if (error)
+ goto out_bmap_cancel;
/*
* Duplicate the transaction that has the permanent
* reservation and commit the old transaction.
*/
- error = xfs_bmap_finish(tp, &free_list, &committed);
- ntp = *tp;
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
if (committed)
- xfs_trans_ijoin(ntp, ip);
-
- if (error) {
- /*
- * If the bmap finish call encounters an error, return
- * to the caller where the transaction can be properly
- * aborted. We just need to make sure we're not
- * holding any resources that we were not when we came
- * in.
- *
- * Aborting from this point might lose some blocks in
- * the file system, but oh well.
- */
- xfs_bmap_cancel(&free_list);
- return error;
- }
+ xfs_trans_ijoin(tp, ip, 0);
+ if (error)
+ goto out_bmap_cancel;
if (committed) {
/*
* Mark the inode dirty so it will be logged and
* moved forward in the log as part of every commit.
*/
- xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
- ntp = xfs_trans_dup(ntp);
- error = xfs_trans_commit(*tp, 0);
- *tp = ntp;
+ ntp = xfs_trans_dup(tp);
+ error = xfs_trans_commit(tp, 0);
+ tp = ntp;
- xfs_trans_ijoin(ntp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
if (error)
- return error;
+ goto out;
+
/*
- * transaction commit worked ok so we can drop the extra ticket
+ * Transaction commit worked ok so we can drop the extra ticket
* reference that we gained in xfs_trans_dup()
*/
- xfs_log_ticket_put(ntp->t_ticket);
- error = xfs_trans_reserve(ntp, 0,
+ xfs_log_ticket_put(tp->t_ticket);
+ error = xfs_trans_reserve(tp, 0,
XFS_ITRUNCATE_LOG_RES(mp), 0,
XFS_TRANS_PERM_LOG_RES,
XFS_ITRUNCATE_LOG_COUNT);
if (error)
- return error;
+ goto out;
}
+
+out:
+ *tpp = tp;
+ return error;
+out_bmap_cancel:
/*
- * Only update the size in the case of the data fork, but
- * always re-log the inode so that our permanent transaction
- * can keep on rolling it forward in the log.
+ * If the bunmapi call encounters an error, return to the caller where
+ * the transaction can be properly aborted. We just need to make sure
+ * we're not holding any resources that we were not when we came in.
*/
- if (fork == XFS_DATA_FORK) {
- xfs_isize_check(mp, ip, new_size);
+ xfs_bmap_cancel(&free_list);
+ goto out;
+}
+
+int
+xfs_itruncate_data(
+ struct xfs_trans **tpp,
+ struct xfs_inode *ip,
+ xfs_fsize_t new_size)
+{
+ int error;
+
+ trace_xfs_itruncate_data_start(ip, new_size);
+
+ /*
+ * The first thing we do is set the size to new_size permanently on
+ * disk. This way we don't have to worry about anyone ever being able
+ * to look at the data being freed even in the face of a crash.
+ * What we're getting around here is the case where we free a block, it
+ * is allocated to another file, it is written to, and then we crash.
+ * If the new data gets written to the file but the log buffers
+ * containing the free and reallocation don't, then we'd end up with
+ * garbage in the blocks being freed. As long as we make the new_size
+ * permanent before actually freeing any blocks it doesn't matter if
+ * they get written to.
+ */
+ if (ip->i_d.di_nextents > 0) {
/*
- * If we are not changing the file size then do
- * not update the on-disk file size - we may be
- * called from xfs_inactive_free_eofblocks(). If we
- * update the on-disk file size and then the system
- * crashes before the contents of the file are
- * flushed to disk then the files may be full of
- * holes (ie NULL files bug).
+ * If we are not changing the file size then do not update
+ * the on-disk file size - we may be called from
+ * xfs_inactive_free_eofblocks(). If we update the on-disk
+ * file size and then the system crashes before the contents
+ * of the file are flushed to disk then the files may be
+ * full of holes (ie NULL files bug).
*/
if (ip->i_size != new_size) {
ip->i_d.di_size = new_size;
ip->i_size = new_size;
+ xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
}
}
- xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
- ASSERT((new_size != 0) ||
- (fork == XFS_ATTR_FORK) ||
- (ip->i_delayed_blks == 0));
- ASSERT((new_size != 0) ||
- (fork == XFS_ATTR_FORK) ||
- (ip->i_d.di_nextents == 0));
- trace_xfs_itruncate_finish_end(ip, new_size);
+
+ error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
+ if (error)
+ return error;
+
+ /*
+ * If we are not changing the file size then do not update the on-disk
+ * file size - we may be called from xfs_inactive_free_eofblocks().
+ * If we update the on-disk file size and then the system crashes
+ * before the contents of the file are flushed to disk then the files
+ * may be full of holes (ie NULL files bug).
+ */
+ xfs_isize_check(ip, new_size);
+ if (ip->i_size != new_size) {
+ ip->i_d.di_size = new_size;
+ ip->i_size = new_size;
+ }
+
+ ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
+ ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
+
+ /*
+ * Always re-log the inode so that our permanent transaction can keep
+ * on rolling it forward in the log.
+ */
+ xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
+
+ trace_xfs_itruncate_data_end(ip, new_size);
return 0;
}
@@ -1686,7 +1428,6 @@ xfs_iunlink(
ASSERT(ip->i_d.di_nlink == 0);
ASSERT(ip->i_d.di_mode != 0);
- ASSERT(ip->i_transp == tp);
mp = tp->t_mountp;
@@ -1709,7 +1450,7 @@ xfs_iunlink(
ASSERT(agi->agi_unlinked[bucket_index]);
ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
- if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
+ if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
/*
* There is already another inode in the bucket we need
* to add ourselves to. Add us at the front of the list.
@@ -1720,8 +1461,7 @@ xfs_iunlink(
if (error)
return error;
- ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
- /* both on-disk, don't endian flip twice */
+ ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
offset = ip->i_imap.im_boffset +
offsetof(xfs_dinode_t, di_next_unlinked);
@@ -1786,7 +1526,7 @@ xfs_iunlink_remove(
agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
ASSERT(agino != 0);
bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
- ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO);
+ ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
ASSERT(agi->agi_unlinked[bucket_index]);
if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
@@ -1898,7 +1638,7 @@ xfs_iunlink_remove(
* inodes that are in memory - they all must be marked stale and attached to
* the cluster buffer.
*/
-STATIC void
+STATIC int
xfs_ifree_cluster(
xfs_inode_t *free_ip,
xfs_trans_t *tp,
@@ -1944,6 +1684,8 @@ xfs_ifree_cluster(
mp->m_bsize * blks_per_cluster,
XBF_LOCK);
+ if (!bp)
+ return ENOMEM;
/*
* Walk the inodes already attached to the buffer and mark them
* stale. These will all have the flush locks held, so an
@@ -1951,7 +1693,7 @@ xfs_ifree_cluster(
* stale first, we will not attempt to lock them in the loop
* below as the XFS_ISTALE flag will be set.
*/
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ lip = bp->b_fspriv;
while (lip) {
if (lip->li_type == XFS_LI_INODE) {
iip = (xfs_inode_log_item_t *)lip;
@@ -2053,6 +1795,7 @@ retry:
}
xfs_perag_put(pag);
+ return 0;
}
/*
@@ -2078,12 +1821,11 @@ xfs_ifree(
xfs_buf_t *ibp;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(ip->i_transp == tp);
ASSERT(ip->i_d.di_nlink == 0);
ASSERT(ip->i_d.di_nextents == 0);
ASSERT(ip->i_d.di_anextents == 0);
ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
- ((ip->i_d.di_mode & S_IFMT) != S_IFREG));
+ (!S_ISREG(ip->i_d.di_mode)));
ASSERT(ip->i_d.di_nblocks == 0);
/*
@@ -2133,10 +1875,10 @@ xfs_ifree(
dip->di_mode = 0;
if (delete) {
- xfs_ifree_cluster(ip, tp, first_ino);
+ error = xfs_ifree_cluster(ip, tp, first_ino);
}
- return 0;
+ return error;
}
/*
@@ -2725,13 +2467,13 @@ cluster_corrupt_out:
* mark the buffer as an error and call them. Otherwise
* mark it as stale and brelse.
*/
- if (XFS_BUF_IODONE_FUNC(bp)) {
+ if (bp->b_iodone) {
XFS_BUF_UNDONE(bp);
- XFS_BUF_STALE(bp);
- XFS_BUF_ERROR(bp,EIO);
+ xfs_buf_stale(bp);
+ xfs_buf_ioerror(bp, EIO);
xfs_buf_ioend(bp, 0);
} else {
- XFS_BUF_STALE(bp);
+ xfs_buf_stale(bp);
xfs_buf_relse(bp);
}
}
@@ -2840,7 +2582,7 @@ xfs_iflush(
* If the buffer is pinned then push on the log now so we won't
* get stuck waiting in the write for too long.
*/
- if (XFS_BUF_ISPINNED(bp))
+ if (xfs_buf_ispinned(bp))
xfs_log_force(mp, 0);
/*
@@ -2852,9 +2594,11 @@ xfs_iflush(
goto cluster_corrupt_out;
if (flags & SYNC_WAIT)
- error = xfs_bwrite(mp, bp);
+ error = xfs_bwrite(bp);
else
- xfs_bdwrite(mp, bp);
+ xfs_buf_delwri_queue(bp);
+
+ xfs_buf_relse(bp);
return error;
corrupt_out:
@@ -2912,7 +2656,7 @@ xfs_iflush_int(
*/
xfs_synchronize_times(ip);
- if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
+ if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
@@ -2926,7 +2670,7 @@ xfs_iflush_int(
__func__, ip->i_ino, ip, ip->i_d.di_magic);
goto corrupt_out;
}
- if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+ if (S_ISREG(ip->i_d.di_mode)) {
if (XFS_TEST_ERROR(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
@@ -2936,7 +2680,7 @@ xfs_iflush_int(
__func__, ip->i_ino, ip);
goto corrupt_out;
}
- } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ } else if (S_ISDIR(ip->i_d.di_mode)) {
if (XFS_TEST_ERROR(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
@@ -3065,8 +2809,8 @@ xfs_iflush_int(
*/
xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
- ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
+ ASSERT(bp->b_fspriv != NULL);
+ ASSERT(bp->b_iodone != NULL);
} else {
/*
* We're flushing an inode which is not in the AIL and has