diff options
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r-- | fs/xfs/xfs_log.c | 451 |
1 files changed, 218 insertions, 233 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 41d5b8f..10ca5e5 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -150,6 +150,117 @@ xlog_grant_add_space( } while (head_val != old); } +STATIC bool +xlog_reserveq_wake( + struct log *log, + int *free_bytes) +{ + struct xlog_ticket *tic; + int need_bytes; + + list_for_each_entry(tic, &log->l_reserveq, t_queue) { + if (tic->t_flags & XLOG_TIC_PERM_RESERV) + need_bytes = tic->t_unit_res * tic->t_cnt; + else + need_bytes = tic->t_unit_res; + + if (*free_bytes < need_bytes) + return false; + *free_bytes -= need_bytes; + + trace_xfs_log_grant_wake_up(log, tic); + wake_up(&tic->t_wait); + } + + return true; +} + +STATIC bool +xlog_writeq_wake( + struct log *log, + int *free_bytes) +{ + struct xlog_ticket *tic; + int need_bytes; + + list_for_each_entry(tic, &log->l_writeq, t_queue) { + ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); + + need_bytes = tic->t_unit_res; + + if (*free_bytes < need_bytes) + return false; + *free_bytes -= need_bytes; + + trace_xfs_log_regrant_write_wake_up(log, tic); + wake_up(&tic->t_wait); + } + + return true; +} + +STATIC int +xlog_reserveq_wait( + struct log *log, + struct xlog_ticket *tic, + int need_bytes) +{ + list_add_tail(&tic->t_queue, &log->l_reserveq); + + do { + if (XLOG_FORCED_SHUTDOWN(log)) + goto shutdown; + xlog_grant_push_ail(log, need_bytes); + + XFS_STATS_INC(xs_sleep_logspace); + trace_xfs_log_grant_sleep(log, tic); + + xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); + trace_xfs_log_grant_wake(log, tic); + + spin_lock(&log->l_grant_reserve_lock); + if (XLOG_FORCED_SHUTDOWN(log)) + goto shutdown; + } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes); + + list_del_init(&tic->t_queue); + return 0; +shutdown: + list_del_init(&tic->t_queue); + return XFS_ERROR(EIO); +} + +STATIC int +xlog_writeq_wait( + struct log *log, + struct xlog_ticket *tic, + int need_bytes) +{ + list_add_tail(&tic->t_queue, &log->l_writeq); + + do { + if (XLOG_FORCED_SHUTDOWN(log)) + goto shutdown; + xlog_grant_push_ail(log, need_bytes); + + XFS_STATS_INC(xs_sleep_logspace); + trace_xfs_log_regrant_write_sleep(log, tic); + + xlog_wait(&tic->t_wait, &log->l_grant_write_lock); + trace_xfs_log_regrant_write_wake(log, tic); + + spin_lock(&log->l_grant_write_lock); + if (XLOG_FORCED_SHUTDOWN(log)) + goto shutdown; + } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes); + + list_del_init(&tic->t_queue); + return 0; +shutdown: + list_del_init(&tic->t_queue); + return XFS_ERROR(EIO); +} + static void xlog_tic_reset_res(xlog_ticket_t *tic) { @@ -350,8 +461,19 @@ xfs_log_reserve( retval = xlog_grant_log_space(log, internal_ticket); } + if (unlikely(retval)) { + /* + * If we are failing, make sure the ticket doesn't have any + * current reservations. We don't want to add this back + * when the ticket/ transaction gets cancelled. + */ + internal_ticket->t_curr_res = 0; + /* ungrant will give back unit_res * t_cnt. */ + internal_ticket->t_cnt = 0; + } + return retval; -} /* xfs_log_reserve */ +} /* @@ -531,8 +653,9 @@ xfs_log_unmount_write(xfs_mount_t *mp) .lv_iovecp = ®, }; - /* remove inited flag */ + /* remove inited flag, and account for space used */ tic->t_flags = 0; + tic->t_curr_res -= sizeof(magic); error = xlog_write(log, &vec, tic, &lsn, NULL, XLOG_UNMOUNT_TRANS); /* @@ -626,7 +749,7 @@ xfs_log_item_init( struct xfs_mount *mp, struct xfs_log_item *item, int type, - struct xfs_item_ops *ops) + const struct xfs_item_ops *ops) { item->li_mountp = mp; item->li_ailp = mp->m_ail; @@ -871,23 +994,17 @@ xlog_space_left( void xlog_iodone(xfs_buf_t *bp) { - xlog_in_core_t *iclog; - xlog_t *l; - int aborted; - - iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); - ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2); - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); - aborted = 0; - l = iclog->ic_log; + xlog_in_core_t *iclog = bp->b_fspriv; + xlog_t *l = iclog->ic_log; + int aborted = 0; /* * Race to shutdown the filesystem if we see an error. */ - if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, + if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp, XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { - xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp)); - XFS_BUF_STALE(bp); + xfs_buf_ioerror_alert(bp, __func__); + xfs_buf_stale(bp); xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR); /* * This flag will be propagated to the trans-committed @@ -1053,13 +1170,11 @@ xlog_alloc_log(xfs_mount_t *mp, xlog_get_iclog_buffer_size(mp, log); error = ENOMEM; - bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); + bp = xfs_buf_alloc(mp->m_logdev_targp, 0, log->l_iclog_size, 0); if (!bp) goto out_free_log; - XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); - ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + bp->b_iodone = xlog_iodone; + ASSERT(xfs_buf_islocked(bp)); log->l_xbuf = bp; spin_lock_init(&log->l_icloglock); @@ -1090,10 +1205,8 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_iclog_size, 0); if (!bp) goto out_free_iclog; - if (!XFS_BUF_CPSEMA(bp)) - ASSERT(0); - XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); + + bp->b_iodone = xlog_iodone; iclog->ic_bp = bp; iclog->ic_data = bp->b_addr; #ifdef DEBUG @@ -1117,8 +1230,7 @@ xlog_alloc_log(xfs_mount_t *mp, iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; - ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); - ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); + ASSERT(xfs_buf_islocked(iclog->ic_bp)); init_waitqueue_head(&iclog->ic_force_wait); init_waitqueue_head(&iclog->ic_write_wait); @@ -1254,12 +1366,11 @@ STATIC int xlog_bdstrat( struct xfs_buf *bp) { - struct xlog_in_core *iclog; + struct xlog_in_core *iclog = bp->b_fspriv; - iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); if (iclog->ic_state & XLOG_STATE_IOERROR) { - XFS_BUF_ERROR(bp, EIO); - XFS_BUF_STALE(bp); + xfs_buf_ioerror(bp, EIO); + xfs_buf_stale(bp); xfs_buf_ioend(bp, 0); /* * It would seem logical to return EIO here, but we rely on @@ -1269,7 +1380,6 @@ xlog_bdstrat( return 0; } - bp->b_flags |= _XBF_RUN_QUEUES; xfs_buf_iorequest(bp); return 0; } @@ -1351,8 +1461,6 @@ xlog_sync(xlog_t *log, } bp = iclog->ic_bp; - ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1); - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); @@ -1366,22 +1474,27 @@ xlog_sync(xlog_t *log, iclog->ic_bwritecnt = 1; } XFS_BUF_SET_COUNT(bp, count); - XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */ + bp->b_fspriv = iclog; XFS_BUF_ZEROFLAGS(bp); - XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); - bp->b_flags |= XBF_LOG_BUFFER; + bp->b_flags |= XBF_SYNCIO; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { + bp->b_flags |= XBF_FUA; + /* - * If we have an external log device, flush the data device - * before flushing the log to make sure all meta data - * written back from the AIL actually made it to disk - * before writing out the new log tail LSN in the log buffer. + * Flush the data device before flushing the log to make + * sure all meta data written back from the AIL actually made + * it to disk before stamping the new log tail LSN into the + * log buffer. For an external log we need to issue the + * flush explicitly, and unfortunately synchronously here; + * for an internal log we can simply use the block layer + * state machine for preflushes. */ if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); - XFS_BUF_ORDERED(bp); + else + bp->b_flags |= XBF_FLUSH; } ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); @@ -1397,27 +1510,23 @@ xlog_sync(xlog_t *log, */ XFS_BUF_WRITE(bp); - if ((error = xlog_bdstrat(bp))) { - xfs_ioerror_alert("xlog_sync", log->l_mp, bp, - XFS_BUF_ADDR(bp)); + error = xlog_bdstrat(bp); + if (error) { + xfs_buf_ioerror_alert(bp, "xlog_sync"); return error; } if (split) { bp = iclog->ic_log->l_xbuf; - ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == - (unsigned long)1); - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ - XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+ - (__psint_t)count), split); - XFS_BUF_SET_FSPRIVATE(bp, iclog); + xfs_buf_associate_memory(bp, + (char *)&iclog->ic_header + count, split); + bp->b_fspriv = iclog; XFS_BUF_ZEROFLAGS(bp); - XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); - bp->b_flags |= XBF_LOG_BUFFER; + bp->b_flags |= XBF_SYNCIO; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) - XFS_BUF_ORDERED(bp); - dptr = XFS_BUF_PTR(bp); + bp->b_flags |= XBF_FUA; + dptr = bp->b_addr; /* * Bump the cycle numbers at the start of each block * since this part of the buffer is at the start of @@ -1437,9 +1546,9 @@ xlog_sync(xlog_t *log, /* account for internal log which doesn't start at block #0 */ XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); XFS_BUF_WRITE(bp); - if ((error = xlog_bdstrat(bp))) { - xfs_ioerror_alert("xlog_sync (split)", log->l_mp, - bp, XFS_BUF_ADDR(bp)); + error = xlog_bdstrat(bp); + if (error) { + xfs_buf_ioerror_alert(bp, "xlog_sync (split)"); return error; } } @@ -2495,8 +2604,8 @@ restart: /* * Atomically get the log space required for a log ticket. * - * Once a ticket gets put onto the reserveq, it will only return after - * the needed reservation is satisfied. + * Once a ticket gets put onto the reserveq, it will only return after the + * needed reservation is satisfied. * * This function is structured so that it has a lock free fast path. This is * necessary because every new transaction reservation will come through this @@ -2504,113 +2613,53 @@ restart: * every pass. * * As tickets are only ever moved on and off the reserveq under the - * l_grant_reserve_lock, we only need to take that lock if we are going - * to add the ticket to the queue and sleep. We can avoid taking the lock if the - * ticket was never added to the reserveq because the t_queue list head will be - * empty and we hold the only reference to it so it can safely be checked - * unlocked. + * l_grant_reserve_lock, we only need to take that lock if we are going to add + * the ticket to the queue and sleep. We can avoid taking the lock if the ticket + * was never added to the reserveq because the t_queue list head will be empty + * and we hold the only reference to it so it can safely be checked unlocked. */ STATIC int -xlog_grant_log_space(xlog_t *log, - xlog_ticket_t *tic) +xlog_grant_log_space( + struct log *log, + struct xlog_ticket *tic) { - int free_bytes; - int need_bytes; + int free_bytes, need_bytes; + int error = 0; -#ifdef DEBUG - if (log->l_flags & XLOG_ACTIVE_RECOVERY) - panic("grant Recovery problem"); -#endif + ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); trace_xfs_log_grant_enter(log, tic); + /* + * If there are other waiters on the queue then give them a chance at + * logspace before us. Wake up the first waiters, if we do not wake + * up all the waiters then go to sleep waiting for more free space, + * otherwise try to get some space for this transaction. + */ need_bytes = tic->t_unit_res; if (tic->t_flags & XFS_LOG_PERM_RESERV) need_bytes *= tic->t_ocnt; - - /* something is already sleeping; insert new transaction at end */ - if (!list_empty_careful(&log->l_reserveq)) { - spin_lock(&log->l_grant_reserve_lock); - /* recheck the queue now we are locked */ - if (list_empty(&log->l_reserveq)) { - spin_unlock(&log->l_grant_reserve_lock); - goto redo; - } - list_add_tail(&tic->t_queue, &log->l_reserveq); - - trace_xfs_log_grant_sleep1(log, tic); - - /* - * Gotta check this before going to sleep, while we're - * holding the grant lock. - */ - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; - - XFS_STATS_INC(xs_sleep_logspace); - xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); - - /* - * If we got an error, and the filesystem is shutting down, - * we'll catch it down below. So just continue... - */ - trace_xfs_log_grant_wake1(log, tic); - } - -redo: - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return_unlocked; - free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); - if (free_bytes < need_bytes) { + if (!list_empty_careful(&log->l_reserveq)) { spin_lock(&log->l_grant_reserve_lock); - if (list_empty(&tic->t_queue)) - list_add_tail(&tic->t_queue, &log->l_reserveq); - - trace_xfs_log_grant_sleep2(log, tic); - - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; - - xlog_grant_push_ail(log, need_bytes); - - XFS_STATS_INC(xs_sleep_logspace); - xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); - - trace_xfs_log_grant_wake2(log, tic); - goto redo; - } - - if (!list_empty(&tic->t_queue)) { + if (!xlog_reserveq_wake(log, &free_bytes) || + free_bytes < need_bytes) + error = xlog_reserveq_wait(log, tic, need_bytes); + spin_unlock(&log->l_grant_reserve_lock); + } else if (free_bytes < need_bytes) { spin_lock(&log->l_grant_reserve_lock); - list_del_init(&tic->t_queue); + error = xlog_reserveq_wait(log, tic, need_bytes); spin_unlock(&log->l_grant_reserve_lock); } + if (error) + return error; - /* we've got enough space */ xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); trace_xfs_log_grant_exit(log, tic); xlog_verify_grant_tail(log); return 0; - -error_return_unlocked: - spin_lock(&log->l_grant_reserve_lock); -error_return: - list_del_init(&tic->t_queue); - spin_unlock(&log->l_grant_reserve_lock); - trace_xfs_log_grant_error(log, tic); - - /* - * If we are failing, make sure the ticket doesn't have any - * current reservations. We don't want to add this back when - * the ticket/transaction gets cancelled. - */ - tic->t_curr_res = 0; - tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - return XFS_ERROR(EIO); -} /* xlog_grant_log_space */ - +} /* * Replenish the byte reservation required by moving the grant write head. @@ -2619,10 +2668,12 @@ error_return: * free fast path. */ STATIC int -xlog_regrant_write_log_space(xlog_t *log, - xlog_ticket_t *tic) +xlog_regrant_write_log_space( + struct log *log, + struct xlog_ticket *tic) { - int free_bytes, need_bytes; + int free_bytes, need_bytes; + int error = 0; tic->t_curr_res = tic->t_unit_res; xlog_tic_reset_res(tic); @@ -2630,104 +2681,38 @@ xlog_regrant_write_log_space(xlog_t *log, if (tic->t_cnt > 0) return 0; -#ifdef DEBUG - if (log->l_flags & XLOG_ACTIVE_RECOVERY) - panic("regrant Recovery problem"); -#endif + ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); trace_xfs_log_regrant_write_enter(log, tic); - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return_unlocked; - /* If there are other waiters on the queue then give them a - * chance at logspace before us. Wake up the first waiters, - * if we do not wake up all the waiters then go to sleep waiting - * for more free space, otherwise try to get some space for - * this transaction. + /* + * If there are other waiters on the queue then give them a chance at + * logspace before us. Wake up the first waiters, if we do not wake + * up all the waiters then go to sleep waiting for more free space, + * otherwise try to get some space for this transaction. */ need_bytes = tic->t_unit_res; - if (!list_empty_careful(&log->l_writeq)) { - struct xlog_ticket *ntic; - - spin_lock(&log->l_grant_write_lock); - free_bytes = xlog_space_left(log, &log->l_grant_write_head); - list_for_each_entry(ntic, &log->l_writeq, t_queue) { - ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); - - if (free_bytes < ntic->t_unit_res) - break; - free_bytes -= ntic->t_unit_res; - wake_up(&ntic->t_wait); - } - - if (ntic != list_first_entry(&log->l_writeq, - struct xlog_ticket, t_queue)) { - if (list_empty(&tic->t_queue)) - list_add_tail(&tic->t_queue, &log->l_writeq); - trace_xfs_log_regrant_write_sleep1(log, tic); - - xlog_grant_push_ail(log, need_bytes); - - XFS_STATS_INC(xs_sleep_logspace); - xlog_wait(&tic->t_wait, &log->l_grant_write_lock); - trace_xfs_log_regrant_write_wake1(log, tic); - } else - spin_unlock(&log->l_grant_write_lock); - } - -redo: - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return_unlocked; - free_bytes = xlog_space_left(log, &log->l_grant_write_head); - if (free_bytes < need_bytes) { + if (!list_empty_careful(&log->l_writeq)) { spin_lock(&log->l_grant_write_lock); - if (list_empty(&tic->t_queue)) - list_add_tail(&tic->t_queue, &log->l_writeq); - - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; - - xlog_grant_push_ail(log, need_bytes); - - XFS_STATS_INC(xs_sleep_logspace); - trace_xfs_log_regrant_write_sleep2(log, tic); - xlog_wait(&tic->t_wait, &log->l_grant_write_lock); - - trace_xfs_log_regrant_write_wake2(log, tic); - goto redo; - } - - if (!list_empty(&tic->t_queue)) { + if (!xlog_writeq_wake(log, &free_bytes) || + free_bytes < need_bytes) + error = xlog_writeq_wait(log, tic, need_bytes); + spin_unlock(&log->l_grant_write_lock); + } else if (free_bytes < need_bytes) { spin_lock(&log->l_grant_write_lock); - list_del_init(&tic->t_queue); + error = xlog_writeq_wait(log, tic, need_bytes); spin_unlock(&log->l_grant_write_lock); } - /* we've got enough space */ + if (error) + return error; + xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); trace_xfs_log_regrant_write_exit(log, tic); xlog_verify_grant_tail(log); return 0; - - - error_return_unlocked: - spin_lock(&log->l_grant_write_lock); - error_return: - list_del_init(&tic->t_queue); - spin_unlock(&log->l_grant_write_lock); - trace_xfs_log_regrant_write_error(log, tic); - - /* - * If we are failing, make sure the ticket doesn't have any - * current reservations. We don't want to add this back when - * the ticket/transaction gets cancelled. - */ - tic->t_curr_res = 0; - tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - return XFS_ERROR(EIO); -} /* xlog_regrant_write_log_space */ - +} /* The first cnt-1 times through here we don't need to * move the grant write head because the permanent @@ -3521,13 +3506,13 @@ xlog_verify_iclog(xlog_t *log, spin_unlock(&log->l_icloglock); /* check log magic numbers */ - if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) + if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); ptr = (xfs_caddr_t) &iclog->ic_header; for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; ptr += BBSIZE) { - if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) + if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) xfs_emerg(log->l_mp, "%s: unexpected magic num", __func__); } |