The log item flags contain a field that is protected by the AIL
lock - the XFS_LI_IN_AIL flag. We use non-atomic RMW operations to
set and clear these flags, but most of the updates and checks are
not done with the AIL lock held and so are susceptible to update
races.
Fix this by changing the log item flags to use atomic bitops rather
than be reliant on the AIL lock for update serialisation.
Signed-Off-By: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
19 files changed:
xfs_bui_item_unlock(
struct xfs_log_item *lip)
{
xfs_bui_item_unlock(
struct xfs_log_item *lip)
{
- if (lip->li_flags & XFS_LI_ABORTED)
+ if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
xfs_bui_release(BUI_ITEM(lip));
}
xfs_bui_release(BUI_ITEM(lip));
}
{
struct xfs_bud_log_item *budp = BUD_ITEM(lip);
{
struct xfs_bud_log_item *budp = BUD_ITEM(lip);
- if (lip->li_flags & XFS_LI_ABORTED) {
+ if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
xfs_bui_release(budp->bud_buip);
kmem_zone_free(xfs_bud_zone, budp);
}
xfs_bui_release(budp->bud_buip);
kmem_zone_free(xfs_bud_zone, budp);
}
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
- bool aborted = !!(lip->li_flags & XFS_LI_ABORTED);
bool hold = !!(bip->bli_flags & XFS_BLI_HOLD);
bool dirty = !!(bip->bli_flags & XFS_BLI_DIRTY);
#if defined(DEBUG) || defined(XFS_WARN)
bool ordered = !!(bip->bli_flags & XFS_BLI_ORDERED);
#endif
bool hold = !!(bip->bli_flags & XFS_BLI_HOLD);
bool dirty = !!(bip->bli_flags & XFS_BLI_DIRTY);
#if defined(DEBUG) || defined(XFS_WARN)
bool ordered = !!(bip->bli_flags & XFS_BLI_ORDERED);
#endif
+ aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags);
+
/* Clear the buffer's association with this transaction. */
bp->b_transp = NULL;
/* Clear the buffer's association with this transaction. */
bp->b_transp = NULL;
* since it's cheaper, and then we recheck while
* holding the lock before removing the dquot from the AIL.
*/
* since it's cheaper, and then we recheck while
* holding the lock before removing the dquot from the AIL.
*/
- if ((lip->li_flags & XFS_LI_IN_AIL) &&
+ if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) &&
((lip->li_lsn == qip->qli_flush_lsn) ||
((lip->li_lsn == qip->qli_flush_lsn) ||
- (lip->li_flags & XFS_LI_FAILED))) {
+ test_bit(XFS_LI_FAILED, &lip->li_flags))) {
/* xfs_trans_ail_delete() drops the AIL lock. */
spin_lock(&ailp->ail_lock);
/* xfs_trans_ail_delete() drops the AIL lock. */
spin_lock(&ailp->ail_lock);
* Clear the failed state since we are about to drop the
* flush lock
*/
* Clear the failed state since we are about to drop the
* flush lock
*/
- if (lip->li_flags & XFS_LI_FAILED)
- xfs_clear_li_failed(lip);
+ xfs_clear_li_failed(lip);
spin_unlock(&ailp->ail_lock);
}
}
spin_unlock(&ailp->ail_lock);
}
}
* The buffer containing this item failed to be written back
* previously. Resubmit the buffer for IO
*/
* The buffer containing this item failed to be written back
* previously. Resubmit the buffer for IO
*/
- if (lip->li_flags & XFS_LI_FAILED) {
+ if (test_bit(XFS_LI_FAILED, &lip->li_flags)) {
if (!xfs_buf_trylock(bp))
return XFS_ITEM_LOCKED;
if (!xfs_buf_trylock(bp))
return XFS_ITEM_LOCKED;
xfs_efi_item_unlock(
struct xfs_log_item *lip)
{
xfs_efi_item_unlock(
struct xfs_log_item *lip)
{
- if (lip->li_flags & XFS_LI_ABORTED)
+ if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
xfs_efi_release(EFI_ITEM(lip));
}
xfs_efi_release(EFI_ITEM(lip));
}
{
struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
{
struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
- if (lip->li_flags & XFS_LI_ABORTED) {
+ if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
xfs_efi_release(efdp->efd_efip);
xfs_efd_item_free(efdp);
}
xfs_efi_release(efdp->efd_efip);
xfs_efd_item_free(efdp);
}
xfs_idestroy_fork(ip, XFS_COW_FORK);
if (ip->i_itemp) {
xfs_idestroy_fork(ip, XFS_COW_FORK);
if (ip->i_itemp) {
- ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL));
+ ASSERT(!test_bit(XFS_LI_IN_AIL,
+ &ip->i_itemp->ili_item.li_flags));
xfs_inode_item_destroy(ip);
ip->i_itemp = NULL;
}
xfs_inode_item_destroy(ip);
ip->i_itemp = NULL;
}
{
struct xfs_icreate_item *icp = ICR_ITEM(lip);
{
struct xfs_icreate_item *icp = ICR_ITEM(lip);
- if (icp->ic_item.li_flags & XFS_LI_ABORTED)
+ if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
kmem_zone_free(xfs_icreate_zone, icp);
return;
}
kmem_zone_free(xfs_icreate_zone, icp);
return;
}
if (!try_lock) {
for (j = (i - 1); j >= 0 && !try_lock; j--) {
lp = (xfs_log_item_t *)ips[j]->i_itemp;
if (!try_lock) {
for (j = (i - 1); j >= 0 && !try_lock; j--) {
lp = (xfs_log_item_t *)ips[j]->i_itemp;
- if (lp && (lp->li_flags & XFS_LI_IN_AIL))
+ if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
* and try again.
*/
lp = (xfs_log_item_t *)ip0->i_itemp;
* and try again.
*/
lp = (xfs_log_item_t *)ip0->i_itemp;
- if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
+ if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
xfs_iunlock(ip0, ip0_mode);
if ((++attempts % 5) == 0)
if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
xfs_iunlock(ip0, ip0_mode);
if ((++attempts % 5) == 0)
* The buffer containing this item failed to be written back
* previously. Resubmit the buffer for IO.
*/
* The buffer containing this item failed to be written back
* previously. Resubmit the buffer for IO.
*/
- if (lip->li_flags & XFS_LI_FAILED) {
+ if (test_bit(XFS_LI_FAILED, &lip->li_flags)) {
if (!xfs_buf_trylock(bp))
return XFS_ITEM_LOCKED;
if (!xfs_buf_trylock(bp))
return XFS_ITEM_LOCKED;
*/
iip = INODE_ITEM(blip);
if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
*/
iip = INODE_ITEM(blip);
if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
- (blip->li_flags & XFS_LI_FAILED))
+ test_bit(XFS_LI_FAILED, &blip->li_flags))
need_ail++;
}
/* make sure we capture the state of the initial inode. */
iip = INODE_ITEM(lip);
if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
need_ail++;
}
/* make sure we capture the state of the initial inode. */
iip = INODE_ITEM(lip);
if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
- lip->li_flags & XFS_LI_FAILED)
+ test_bit(XFS_LI_FAILED, &lip->li_flags))
xfs_inode_log_item_t *iip = ip->i_itemp;
if (iip) {
xfs_inode_log_item_t *iip = ip->i_itemp;
if (iip) {
- if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
+ if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags)) {
xfs_trans_ail_remove(&iip->ili_item,
stale ? SHUTDOWN_LOG_IO_ERROR :
SHUTDOWN_CORRUPT_INCORE);
xfs_trans_ail_remove(&iip->ili_item,
stale ? SHUTDOWN_LOG_IO_ERROR :
SHUTDOWN_CORRUPT_INCORE);
xfs_warn(mp, "log item: ");
xfs_warn(mp, " type = 0x%x", lip->li_type);
xfs_warn(mp, "log item: ");
xfs_warn(mp, " type = 0x%x", lip->li_type);
- xfs_warn(mp, " flags = 0x%x", lip->li_flags);
+ xfs_warn(mp, " flags = 0x%lx", lip->li_flags);
if (!lv)
continue;
xfs_warn(mp, " niovecs = %d", lv->lv_niovecs);
if (!lv)
continue;
xfs_warn(mp, " niovecs = %d", lv->lv_niovecs);
ASSERT(atomic_read(&dqp->q_pincount) == 0);
ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
ASSERT(atomic_read(&dqp->q_pincount) == 0);
ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
- !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
+ !test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags));
xfs_dqfunlock(dqp);
xfs_dqunlock(dqp);
xfs_dqfunlock(dqp);
xfs_dqunlock(dqp);
xfs_cui_item_unlock(
struct xfs_log_item *lip)
{
xfs_cui_item_unlock(
struct xfs_log_item *lip)
{
- if (lip->li_flags & XFS_LI_ABORTED)
+ if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
xfs_cui_release(CUI_ITEM(lip));
}
xfs_cui_release(CUI_ITEM(lip));
}
{
struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
{
struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
- if (lip->li_flags & XFS_LI_ABORTED) {
+ if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
xfs_cui_release(cudp->cud_cuip);
kmem_zone_free(xfs_cud_zone, cudp);
}
xfs_cui_release(cudp->cud_cuip);
kmem_zone_free(xfs_cud_zone, cudp);
}
xfs_rui_item_unlock(
struct xfs_log_item *lip)
{
xfs_rui_item_unlock(
struct xfs_log_item *lip)
{
- if (lip->li_flags & XFS_LI_ABORTED)
+ if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
xfs_rui_release(RUI_ITEM(lip));
}
xfs_rui_release(RUI_ITEM(lip));
}
{
struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
{
struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
- if (lip->li_flags & XFS_LI_ABORTED) {
+ if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
xfs_rui_release(rudp->rud_ruip);
kmem_zone_free(xfs_rud_zone, rudp);
}
xfs_rui_release(rudp->rud_ruip);
kmem_zone_free(xfs_rud_zone, rudp);
}
__field(int, bli_refcount)
__field(unsigned, bli_flags)
__field(void *, li_desc)
__field(int, bli_refcount)
__field(unsigned, bli_flags)
__field(void *, li_desc)
- __field(unsigned, li_flags)
+ __field(unsigned long, li_flags)
),
TP_fast_assign(
__entry->dev = bip->bli_buf->b_target->bt_dev;
),
TP_fast_assign(
__entry->dev = bip->bli_buf->b_target->bt_dev;
__field(dev_t, dev)
__field(void *, lip)
__field(uint, type)
__field(dev_t, dev)
__field(void *, lip)
__field(uint, type)
+ __field(unsigned long, flags)
__field(xfs_lsn_t, lsn)
),
TP_fast_assign(
__field(xfs_lsn_t, lsn)
),
TP_fast_assign(
__field(dev_t, dev)
__field(void *, lip)
__field(uint, type)
__field(dev_t, dev)
__field(void *, lip)
__field(uint, type)
+ __field(unsigned long, flags)
__field(xfs_lsn_t, old_lsn)
__field(xfs_lsn_t, new_lsn)
),
__field(xfs_lsn_t, old_lsn)
__field(xfs_lsn_t, new_lsn)
),
if (commit_lsn != NULLCOMMITLSN)
lip->li_ops->iop_committing(lip, commit_lsn);
if (abort)
if (commit_lsn != NULLCOMMITLSN)
lip->li_ops->iop_committing(lip, commit_lsn);
if (abort)
- lip->li_flags |= XFS_LI_ABORTED;
+ set_bit(XFS_LI_ABORTED, &lip->li_flags);
lip->li_ops->iop_unlock(lip);
xfs_trans_free_item_desc(lidp);
lip->li_ops->iop_unlock(lip);
xfs_trans_free_item_desc(lidp);
xfs_lsn_t item_lsn;
if (aborted)
xfs_lsn_t item_lsn;
if (aborted)
- lip->li_flags |= XFS_LI_ABORTED;
+ set_bit(XFS_LI_ABORTED, &lip->li_flags);
item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
/* item_lsn of -1 means the item needs no further processing */
item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
/* item_lsn of -1 means the item needs no further processing */
struct xfs_mount *li_mountp; /* ptr to fs mount */
struct xfs_ail *li_ailp; /* ptr to AIL */
uint li_type; /* item type */
struct xfs_mount *li_mountp; /* ptr to fs mount */
struct xfs_ail *li_ailp; /* ptr to AIL */
uint li_type; /* item type */
- uint li_flags; /* misc flags */
+ unsigned long li_flags; /* misc flags */
struct xfs_buf *li_buf; /* real buffer pointer */
struct list_head li_bio_list; /* buffer item list */
void (*li_cb)(struct xfs_buf *,
struct xfs_buf *li_buf; /* real buffer pointer */
struct list_head li_bio_list; /* buffer item list */
void (*li_cb)(struct xfs_buf *,
xfs_lsn_t li_seq; /* CIL commit seq */
} xfs_log_item_t;
xfs_lsn_t li_seq; /* CIL commit seq */
} xfs_log_item_t;
-#define XFS_LI_IN_AIL 0x1
-#define XFS_LI_ABORTED 0x2
-#define XFS_LI_FAILED 0x4
+/*
+ * li_flags use the (set/test/clear)_bit atomic interfaces because updates can
+ * race with each other and we don't want to have to use the AIL lock to
+ * serialise all updates.
+ */
+#define XFS_LI_IN_AIL 0
+#define XFS_LI_ABORTED 1
+#define XFS_LI_FAILED 2
- { XFS_LI_IN_AIL, "IN_AIL" }, \
- { XFS_LI_ABORTED, "ABORTED" }, \
- { XFS_LI_FAILED, "FAILED" }
+ { (1 << XFS_LI_IN_AIL), "IN_AIL" }, \
+ { (1 << XFS_LI_ABORTED), "ABORTED" }, \
+ { (1 << XFS_LI_FAILED), "FAILED" }
struct xfs_item_ops {
void (*iop_size)(xfs_log_item_t *, int *, int *);
struct xfs_item_ops {
void (*iop_size)(xfs_log_item_t *, int *, int *);
/*
* Check the next and previous entries are valid.
*/
/*
* Check the next and previous entries are valid.
*/
- ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
+ ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
if (&prev_lip->li_ail != &ailp->ail_head)
ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
if (&prev_lip->li_ail != &ailp->ail_head)
ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
for (i = 0; i < nr_items; i++) {
struct xfs_log_item *lip = log_items[i];
for (i = 0; i < nr_items; i++) {
struct xfs_log_item *lip = log_items[i];
- if (lip->li_flags & XFS_LI_IN_AIL) {
+ if (test_and_set_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
/* check if we really need to move the item */
if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
continue;
/* check if we really need to move the item */
if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
continue;
if (mlip == lip)
mlip_changed = 1;
} else {
if (mlip == lip)
mlip_changed = 1;
} else {
- lip->li_flags |= XFS_LI_IN_AIL;
trace_xfs_ail_insert(lip, 0, lsn);
}
lip->li_lsn = lsn;
trace_xfs_ail_insert(lip, 0, lsn);
}
lip->li_lsn = lsn;
trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
xfs_ail_delete(ailp, lip);
xfs_clear_li_failed(lip);
trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
xfs_ail_delete(ailp, lip);
xfs_clear_li_failed(lip);
- lip->li_flags &= ~XFS_LI_IN_AIL;
+ clear_bit(XFS_LI_IN_AIL, &lip->li_flags);
lip->li_lsn = 0;
return mlip == lip;
lip->li_lsn = 0;
return mlip == lip;
struct xfs_mount *mp = ailp->ail_mount;
bool mlip_changed;
struct xfs_mount *mp = ailp->ail_mount;
bool mlip_changed;
- if (!(lip->li_flags & XFS_LI_IN_AIL)) {
+ if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
spin_unlock(&ailp->ail_lock);
if (!XFS_FORCED_SHUTDOWN(mp)) {
xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
spin_unlock(&ailp->ail_lock);
if (!XFS_FORCED_SHUTDOWN(mp)) {
xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
ASSERT(bp->b_pincount == 0);
***/
ASSERT(atomic_read(&bip->bli_refcount) == 0);
ASSERT(bp->b_pincount == 0);
***/
ASSERT(atomic_read(&bip->bli_refcount) == 0);
- ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
+ ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
xfs_buf_item_relse(bp);
}
ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
xfs_buf_item_relse(bp);
}
spin_lock(&ailp->ail_lock);
/* xfs_trans_ail_delete() drops the AIL lock */
spin_lock(&ailp->ail_lock);
/* xfs_trans_ail_delete() drops the AIL lock */
- if (lip->li_flags & XFS_LI_IN_AIL)
+ if (test_bit(XFS_LI_IN_AIL, &lip->li_flags))
xfs_trans_ail_delete(ailp, lip, shutdown_type);
else
spin_unlock(&ailp->ail_lock);
xfs_trans_ail_delete(ailp, lip, shutdown_type);
else
spin_unlock(&ailp->ail_lock);
{
struct xfs_buf *bp = lip->li_buf;
{
struct xfs_buf *bp = lip->li_buf;
- ASSERT(lip->li_flags & XFS_LI_IN_AIL);
+ ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
lockdep_assert_held(&lip->li_ailp->ail_lock);
lockdep_assert_held(&lip->li_ailp->ail_lock);
- if (lip->li_flags & XFS_LI_FAILED) {
- lip->li_flags &= ~XFS_LI_FAILED;
+ if (test_and_clear_bit(XFS_LI_FAILED, &lip->li_flags)) {
lip->li_buf = NULL;
xfs_buf_rele(bp);
}
lip->li_buf = NULL;
xfs_buf_rele(bp);
}
{
lockdep_assert_held(&lip->li_ailp->ail_lock);
{
lockdep_assert_held(&lip->li_ailp->ail_lock);
- if (!(lip->li_flags & XFS_LI_FAILED)) {
+ if (!test_and_set_bit(XFS_LI_FAILED, &lip->li_flags)) {
- lip->li_flags |= XFS_LI_FAILED;