]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge branch 'for-3.2/drivers' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Nov 2011 00:22:14 +0000 (17:22 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Nov 2011 00:22:14 +0000 (17:22 -0700)
* 'for-3.2/drivers' of git://git.kernel.dk/linux-block: (30 commits)
  virtio-blk: use ida to allocate disk index
  hpsa: add small delay when using PCI Power Management to reset for kump
  cciss: add small delay when using PCI Power Management to reset for kump
  xen/blkback: Fix two races in the handling of barrier requests.
  xen/blkback: Check for proper operation.
  xen/blkback: Fix the inhibition to map pages when discarding sector ranges.
  xen/blkback: Report VBD_WSECT (wr_sect) properly.
  xen/blkback: Support 'feature-barrier' aka old-style BARRIER requests.
  xen-blkfront: plug device number leak in xlblk_init() error path
  xen-blkfront: If no barrier or flush is supported, use invalid operation.
  xen-blkback: use kzalloc() in favor of kmalloc()+memset()
  xen-blkback: fixed indentation and comments
  xen-blkfront: fix a deadlock while handling discard response
  xen-blkfront: Handle discard requests.
  xen-blkback: Implement discard requests ('feature-discard')
  xen-blkfront: add BLKIF_OP_DISCARD and discard request struct
  drivers/block/loop.c: remove unnecessary bdev argument from loop_clr_fd()
  drivers/block/loop.c: emit uevent on auto release
  drivers/block/cpqarray.c: use pci_dev->revision
  loop: always allow userspace partitions and optionally support automatic scanning
  ...

Fic up trivial header file includsion conflict in drivers/block/loop.c

1  2 
block/genhd.c
drivers/block/loop.c
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/common.h
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c
drivers/scsi/hpsa.c
fs/block_dev.c
include/linux/genhd.h
include/linux/loop.h

diff --combined block/genhd.c
index 024fc3944fb5b0a40311d5fde7d2c0cc950928a8,2429ecbbd97d07c8dd224c7bd9b3f554d14efa82..9253839714ff95b4acc6da413bd87f610ddce44c
@@@ -19,7 -19,6 +19,7 @@@
  #include <linux/mutex.h>
  #include <linux/idr.h>
  #include <linux/log2.h>
 +#include <linux/ctype.h>
  
  #include "blk.h"
  
@@@ -537,7 -536,7 +537,7 @@@ void register_disk(struct gendisk *disk
        disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
  
        /* No minors to use for partitions */
-       if (!disk_partitionable(disk))
+       if (!disk_part_scan_enabled(disk))
                goto exit;
  
        /* No such device (e.g., media were just removed) */
@@@ -612,12 -611,6 +612,12 @@@ void add_disk(struct gendisk *disk
        register_disk(disk);
        blk_register_queue(disk);
  
 +      /*
 +       * Take an extra ref on queue which will be put on disk_release()
 +       * so that it sticks around as long as @disk is there.
 +       */
 +      WARN_ON_ONCE(blk_get_queue(disk->queue));
 +
        retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
                                   "bdi");
        WARN_ON(retval);
@@@ -848,7 -841,7 +848,7 @@@ static int show_partition(struct seq_fi
        char buf[BDEVNAME_SIZE];
  
        /* Don't show non-partitionable removeable devices or empty devices */
-       if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
+       if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
                                   (sgp->flags & GENHD_FL_REMOVABLE)))
                return 0;
        if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
@@@ -916,74 -909,6 +916,74 @@@ static int __init genhd_device_init(voi
  
  subsys_initcall(genhd_device_init);
  
 +static ssize_t alias_show(struct device *dev,
 +                             struct device_attribute *attr, char *buf)
 +{
 +      struct gendisk *disk = dev_to_disk(dev);
 +      ssize_t ret = 0;
 +
 +      if (disk->alias)
 +              ret = snprintf(buf, ALIAS_LEN, "%s\n", disk->alias);
 +      return ret;
 +}
 +
 +static ssize_t alias_store(struct device *dev, struct device_attribute *attr,
 +                         const char *buf, size_t count)
 +{
 +      struct gendisk *disk = dev_to_disk(dev);
 +      char *alias;
 +      char *envp[] = { NULL, NULL };
 +      unsigned char c;
 +      int i;
 +      ssize_t ret = count;
 +
 +      if (!count)
 +              return -EINVAL;
 +
 +      if (count >= ALIAS_LEN) {
 +              printk(KERN_ERR "alias: alias is too long\n");
 +              return -EINVAL;
 +      }
 +
 +      /* Validation check */
 +      for (i = 0; i < count; i++) {
 +              c = buf[i];
 +              if (i == count - 1 && c == '\n')
 +                      break;
 +              if (!isalnum(c) && c != '_' && c != '-') {
 +                      printk(KERN_ERR "alias: invalid alias\n");
 +                      return -EINVAL;
 +              }
 +      }
 +
 +      if (disk->alias) {
 +              printk(KERN_INFO "alias: %s is already assigned (%s)\n",
 +                     disk->disk_name, disk->alias);
 +              return -EINVAL;
 +      }
 +
 +      alias = kasprintf(GFP_KERNEL, "%s", buf);
 +      if (!alias)
 +              return -ENOMEM;
 +
 +      if (alias[count - 1] == '\n')
 +              alias[count - 1] = '\0';
 +
 +      envp[0] = kasprintf(GFP_KERNEL, "ALIAS=%s", alias);
 +      if (!envp[0]) {
 +              kfree(alias);
 +              return -ENOMEM;
 +      }
 +
 +      disk->alias = alias;
 +      printk(KERN_INFO "alias: assigned %s to %s\n", alias, disk->disk_name);
 +
 +      kobject_uevent_env(&dev->kobj, KOBJ_ADD, envp);
 +
 +      kfree(envp[0]);
 +      return ret;
 +}
 +
  static ssize_t disk_range_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
  {
@@@ -1043,7 -968,6 +1043,7 @@@ static ssize_t disk_discard_alignment_s
        return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
  }
  
 +static DEVICE_ATTR(alias, S_IRUGO|S_IWUSR, alias_show, alias_store);
  static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
  static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
  static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
@@@ -1066,7 -990,6 +1066,7 @@@ static struct device_attribute dev_attr
  #endif
  
  static struct attribute *disk_attrs[] = {
 +      &dev_attr_alias.attr,
        &dev_attr_range.attr,
        &dev_attr_ext_range.attr,
        &dev_attr_removable.attr,
@@@ -1172,8 -1095,6 +1172,8 @@@ static void disk_release(struct device 
        disk_replace_part_tbl(disk, NULL);
        free_part_stats(&disk->part0);
        free_part_info(&disk->part0);
 +      if (disk->queue)
 +              blk_put_queue(disk->queue);
        kfree(disk);
  }
  struct class block_class = {
@@@ -1225,17 -1146,17 +1225,17 @@@ static int diskstats_show(struct seq_fi
                cpu = part_stat_lock();
                part_round_stats(cpu, hd);
                part_stat_unlock();
 -              seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
 -                         "%u %lu %lu %llu %u %u %u %u\n",
 +              seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
 +                         "%u %lu %lu %lu %u %u %u %u\n",
                           MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
                           disk_name(gp, hd->partno, buf),
                           part_stat_read(hd, ios[READ]),
                           part_stat_read(hd, merges[READ]),
 -                         (unsigned long long)part_stat_read(hd, sectors[READ]),
 +                         part_stat_read(hd, sectors[READ]),
                           jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
                           part_stat_read(hd, ios[WRITE]),
                           part_stat_read(hd, merges[WRITE]),
 -                         (unsigned long long)part_stat_read(hd, sectors[WRITE]),
 +                         part_stat_read(hd, sectors[WRITE]),
                           jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
                           part_in_flight(hd),
                           jiffies_to_msecs(part_stat_read(hd, io_ticks)),
diff --combined drivers/block/loop.c
index c77983ea86c8798a35605206e35266087f764809,9b2f5d3c19abc96c1c594dd6d247140c64728086..3d806820280e3bc4aaa5e81d6bea411f6597e400
  #include <linux/kthread.h>
  #include <linux/splice.h>
  #include <linux/sysfs.h>
 +#include <linux/miscdevice.h>
+ #include <linux/falloc.h>
  #include <asm/uaccess.h>
  
 -static LIST_HEAD(loop_devices);
 -static DEFINE_MUTEX(loop_devices_mutex);
 +static DEFINE_IDR(loop_index_idr);
 +static DEFINE_MUTEX(loop_index_mutex);
  
  static int max_part;
  static int part_shift;
@@@ -202,6 -203,74 +204,6 @@@ lo_do_transfer(struct loop_device *lo, 
        return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
  }
  
 -/**
 - * do_lo_send_aops - helper for writing data to a loop device
 - *
 - * This is the fast version for backing filesystems which implement the address
 - * space operations write_begin and write_end.
 - */
 -static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
 -              loff_t pos, struct page *unused)
 -{
 -      struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
 -      struct address_space *mapping = file->f_mapping;
 -      pgoff_t index;
 -      unsigned offset, bv_offs;
 -      int len, ret;
 -
 -      mutex_lock(&mapping->host->i_mutex);
 -      index = pos >> PAGE_CACHE_SHIFT;
 -      offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
 -      bv_offs = bvec->bv_offset;
 -      len = bvec->bv_len;
 -      while (len > 0) {
 -              sector_t IV;
 -              unsigned size, copied;
 -              int transfer_result;
 -              struct page *page;
 -              void *fsdata;
 -
 -              IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
 -              size = PAGE_CACHE_SIZE - offset;
 -              if (size > len)
 -                      size = len;
 -
 -              ret = pagecache_write_begin(file, mapping, pos, size, 0,
 -                                                      &page, &fsdata);
 -              if (ret)
 -                      goto fail;
 -
 -              file_update_time(file);
 -
 -              transfer_result = lo_do_transfer(lo, WRITE, page, offset,
 -                              bvec->bv_page, bv_offs, size, IV);
 -              copied = size;
 -              if (unlikely(transfer_result))
 -                      copied = 0;
 -
 -              ret = pagecache_write_end(file, mapping, pos, size, copied,
 -                                                      page, fsdata);
 -              if (ret < 0 || ret != copied)
 -                      goto fail;
 -
 -              if (unlikely(transfer_result))
 -                      goto fail;
 -
 -              bv_offs += copied;
 -              len -= copied;
 -              offset = 0;
 -              index++;
 -              pos += copied;
 -      }
 -      ret = 0;
 -out:
 -      mutex_unlock(&mapping->host->i_mutex);
 -      return ret;
 -fail:
 -      ret = -1;
 -      goto out;
 -}
 -
  /**
   * __do_lo_send_write - helper for writing data to a loop device
   *
@@@ -229,8 -298,10 +231,8 @@@ static int __do_lo_send_write(struct fi
  /**
   * do_lo_send_direct_write - helper for writing data to a loop device
   *
 - * This is the fast, non-transforming version for backing filesystems which do
 - * not implement the address space operations write_begin and write_end.
 - * It uses the write file operation which should be present on all writeable
 - * filesystems.
 + * This is the fast, non-transforming version that does not need double
 + * buffering.
   */
  static int do_lo_send_direct_write(struct loop_device *lo,
                struct bio_vec *bvec, loff_t pos, struct page *page)
  /**
   * do_lo_send_write - helper for writing data to a loop device
   *
 - * This is the slow, transforming version for filesystems which do not
 - * implement the address space operations write_begin and write_end.  It
 - * uses the write file operation which should be present on all writeable
 - * filesystems.
 - *
 - * Using fops->write is slower than using aops->{prepare,commit}_write in the
 - * transforming case because we need to double buffer the data as we cannot do
 - * the transformations in place as we do not have direct access to the
 - * destination pages of the backing file.
 + * This is the slow, transforming version that needs to double buffer the
 + * data as it cannot do the transformations in place without having direct
 + * access to the destination pages of the backing file.
   */
  static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
                loff_t pos, struct page *page)
@@@ -274,16 -351,17 +276,16 @@@ static int lo_send(struct loop_device *
        struct page *page = NULL;
        int i, ret = 0;
  
 -      do_lo_send = do_lo_send_aops;
 -      if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
 +      if (lo->transfer != transfer_none) {
 +              page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
 +              if (unlikely(!page))
 +                      goto fail;
 +              kmap(page);
 +              do_lo_send = do_lo_send_write;
 +      } else {
                do_lo_send = do_lo_send_direct_write;
 -              if (lo->transfer != transfer_none) {
 -                      page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
 -                      if (unlikely(!page))
 -                              goto fail;
 -                      kmap(page);
 -                      do_lo_send = do_lo_send_write;
 -              }
        }
 +
        bio_for_each_segment(bvec, bio, i) {
                ret = do_lo_send(lo, bvec, pos, page);
                if (ret < 0)
@@@ -407,6 -485,29 +409,29 @@@ static int do_bio_filebacked(struct loo
                        }
                }
  
+               /*
+                * We use punch hole to reclaim the free space used by the
+                * image a.k.a. discard. However we do support discard if
+                * encryption is enabled, because it may give an attacker
+                * useful information.
+                */
+               if (bio->bi_rw & REQ_DISCARD) {
+                       struct file *file = lo->lo_backing_file;
+                       int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+                       if ((!file->f_op->fallocate) ||
+                           lo->lo_encrypt_key_size) {
+                               ret = -EOPNOTSUPP;
+                               goto out;
+                       }
+                       ret = file->f_op->fallocate(file, mode, pos,
+                                                   bio->bi_size);
+                       if (unlikely(ret && ret != -EINVAL &&
+                                    ret != -EOPNOTSUPP))
+                               ret = -EIO;
+                       goto out;
+               }
                ret = lo_send(lo, bio, pos);
  
                if ((bio->bi_rw & REQ_FUA) && !ret) {
@@@ -437,7 -538,7 +462,7 @@@ static struct bio *loop_get_bio(struct 
        return bio_list_pop(&lo->lo_bio_list);
  }
  
 -static int loop_make_request(struct request_queue *q, struct bio *old_bio)
 +static void loop_make_request(struct request_queue *q, struct bio *old_bio)
  {
        struct loop_device *lo = q->queuedata;
        int rw = bio_rw(old_bio);
        loop_add_bio(lo, old_bio);
        wake_up(&lo->lo_event);
        spin_unlock_irq(&lo->lo_lock);
 -      return 0;
 +      return;
  
  out:
        spin_unlock_irq(&lo->lo_lock);
        bio_io_error(old_bio);
 -      return 0;
  }
  
  struct switch_request {
@@@ -622,7 -724,7 +647,7 @@@ static int loop_change_fd(struct loop_d
                goto out_putf;
  
        fput(old_file);
-       if (max_part > 0)
+       if (lo->lo_flags & LO_FLAGS_PARTSCAN)
                ioctl_by_bdev(bdev, BLKRRPART, 0);
        return 0;
  
@@@ -644,10 -746,17 +669,10 @@@ static inline int is_loop_device(struc
  static ssize_t loop_attr_show(struct device *dev, char *page,
                              ssize_t (*callback)(struct loop_device *, char *))
  {
 -      struct loop_device *l, *lo = NULL;
 -
 -      mutex_lock(&loop_devices_mutex);
 -      list_for_each_entry(l, &loop_devices, lo_list)
 -              if (disk_to_dev(l->lo_disk) == dev) {
 -                      lo = l;
 -                      break;
 -              }
 -      mutex_unlock(&loop_devices_mutex);
 +      struct gendisk *disk = dev_to_disk(dev);
 +      struct loop_device *lo = disk->private_data;
  
 -      return lo ? callback(lo, page) : -EIO;
 +      return callback(lo, page);
  }
  
  #define LOOP_ATTR_RO(_name)                                           \
@@@ -665,10 -774,10 +690,10 @@@ static ssize_t loop_attr_backing_file_s
        ssize_t ret;
        char *p = NULL;
  
 -      mutex_lock(&lo->lo_ctl_mutex);
 +      spin_lock_irq(&lo->lo_lock);
        if (lo->lo_backing_file)
                p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
 -      mutex_unlock(&lo->lo_ctl_mutex);
 +      spin_unlock_irq(&lo->lo_lock);
  
        if (IS_ERR_OR_NULL(p))
                ret = PTR_ERR(p);
@@@ -699,16 -808,25 +724,25 @@@ static ssize_t loop_attr_autoclear_show
        return sprintf(buf, "%s\n", autoclear ? "1" : "0");
  }
  
+ static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
+ {
+       int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
+       return sprintf(buf, "%s\n", partscan ? "1" : "0");
+ }
  LOOP_ATTR_RO(backing_file);
  LOOP_ATTR_RO(offset);
  LOOP_ATTR_RO(sizelimit);
  LOOP_ATTR_RO(autoclear);
+ LOOP_ATTR_RO(partscan);
  
  static struct attribute *loop_attrs[] = {
        &loop_attr_backing_file.attr,
        &loop_attr_offset.attr,
        &loop_attr_sizelimit.attr,
        &loop_attr_autoclear.attr,
+       &loop_attr_partscan.attr,
        NULL,
  };
  
@@@ -729,6 -847,35 +763,35 @@@ static void loop_sysfs_exit(struct loop
                           &loop_attribute_group);
  }
  
+ static void loop_config_discard(struct loop_device *lo)
+ {
+       struct file *file = lo->lo_backing_file;
+       struct inode *inode = file->f_mapping->host;
+       struct request_queue *q = lo->lo_queue;
+       /*
+        * We use punch hole to reclaim the free space used by the
+        * image a.k.a. discard. However we do support discard if
+        * encryption is enabled, because it may give an attacker
+        * useful information.
+        */
+       if ((!file->f_op->fallocate) ||
+           lo->lo_encrypt_key_size) {
+               q->limits.discard_granularity = 0;
+               q->limits.discard_alignment = 0;
+               q->limits.max_discard_sectors = 0;
+               q->limits.discard_zeroes_data = 0;
+               queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+               return;
+       }
+       q->limits.discard_granularity = inode->i_sb->s_blocksize;
+       q->limits.discard_alignment = inode->i_sb->s_blocksize;
+       q->limits.max_discard_sectors = UINT_MAX >> 9;
+       q->limits.discard_zeroes_data = 1;
+       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ }
  static int loop_set_fd(struct loop_device *lo, fmode_t mode,
                       struct block_device *bdev, unsigned int arg)
  {
        mapping = file->f_mapping;
        inode = mapping->host;
  
 -      if (!(file->f_mode & FMODE_WRITE))
 -              lo_flags |= LO_FLAGS_READ_ONLY;
 -
        error = -EINVAL;
 -      if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
 -              const struct address_space_operations *aops = mapping->a_ops;
 -
 -              if (aops->write_begin)
 -                      lo_flags |= LO_FLAGS_USE_AOPS;
 -              if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
 -                      lo_flags |= LO_FLAGS_READ_ONLY;
 +      if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
 +              goto out_putf;
  
 -              lo_blocksize = S_ISBLK(inode->i_mode) ?
 -                      inode->i_bdev->bd_block_size : PAGE_SIZE;
 +      if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
 +          !file->f_op->write)
 +              lo_flags |= LO_FLAGS_READ_ONLY;
  
 -              error = 0;
 -      } else {
 -              goto out_putf;
 -      }
 +      lo_blocksize = S_ISBLK(inode->i_mode) ?
 +              inode->i_bdev->bd_block_size : PAGE_SIZE;
  
 +      error = -EFBIG;
        size = get_loop_size(lo, file);
 -
 -      if ((loff_t)(sector_t)size != size) {
 -              error = -EFBIG;
 +      if ((loff_t)(sector_t)size != size)
                goto out_putf;
 -      }
  
 -      if (!(mode & FMODE_WRITE))
 -              lo_flags |= LO_FLAGS_READ_ONLY;
 +      error = 0;
  
        set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
  
        }
        lo->lo_state = Lo_bound;
        wake_up_process(lo->lo_thread);
-       if (max_part > 0)
+       if (part_shift)
+               lo->lo_flags |= LO_FLAGS_PARTSCAN;
+       if (lo->lo_flags & LO_FLAGS_PARTSCAN)
                ioctl_by_bdev(bdev, BLKRRPART, 0);
        return 0;
  
@@@ -890,10 -1051,11 +955,11 @@@ loop_init_xfer(struct loop_device *lo, 
        return err;
  }
  
- static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
+ static int loop_clr_fd(struct loop_device *lo)
  {
        struct file *filp = lo->lo_backing_file;
        gfp_t gfp = lo->old_gfp_mask;
+       struct block_device *bdev = lo->lo_device;
  
        if (lo->lo_state != Lo_bound)
                return -ENXIO;
  
        kthread_stop(lo->lo_thread);
  
 +      spin_lock_irq(&lo->lo_lock);
        lo->lo_backing_file = NULL;
 +      spin_unlock_irq(&lo->lo_lock);
  
        loop_release_xfer(lo);
        lo->transfer = NULL;
        lo->lo_offset = 0;
        lo->lo_sizelimit = 0;
        lo->lo_encrypt_key_size = 0;
-       lo->lo_flags = 0;
        lo->lo_thread = NULL;
        memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
        memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
        lo->lo_state = Lo_unbound;
        /* This is safe: open() is still holding a reference. */
        module_put(THIS_MODULE);
-       if (max_part > 0 && bdev)
+       if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
                ioctl_by_bdev(bdev, BLKRRPART, 0);
+       lo->lo_flags = 0;
+       if (!part_shift)
+               lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
        mutex_unlock(&lo->lo_ctl_mutex);
        /*
         * Need not hold lo_ctl_mutex to fput backing file.
@@@ -995,6 -1157,7 +1063,7 @@@ loop_set_status(struct loop_device *lo
                if (figure_loop_size(lo))
                        return -EFBIG;
        }
+       loop_config_discard(lo);
  
        memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
        memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
             (info->lo_flags & LO_FLAGS_AUTOCLEAR))
                lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
  
+       if ((info->lo_flags & LO_FLAGS_PARTSCAN) &&
+            !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
+               lo->lo_flags |= LO_FLAGS_PARTSCAN;
+               lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
+               ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
+       }
        lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
        lo->lo_init[0] = info->lo_init[0];
        lo->lo_init[1] = info->lo_init[1];
@@@ -1203,7 -1373,7 +1279,7 @@@ static int lo_ioctl(struct block_devic
                break;
        case LOOP_CLR_FD:
                /* loop_clr_fd would have unlocked lo_ctl_mutex on success */
-               err = loop_clr_fd(lo, bdev);
+               err = loop_clr_fd(lo);
                if (!err)
                        goto out_unlocked;
                break;
@@@ -1390,22 -1560,13 +1466,22 @@@ static int lo_compat_ioctl(struct block
  
  static int lo_open(struct block_device *bdev, fmode_t mode)
  {
 -      struct loop_device *lo = bdev->bd_disk->private_data;
 +      struct loop_device *lo;
 +      int err = 0;
 +
 +      mutex_lock(&loop_index_mutex);
 +      lo = bdev->bd_disk->private_data;
 +      if (!lo) {
 +              err = -ENXIO;
 +              goto out;
 +      }
  
        mutex_lock(&lo->lo_ctl_mutex);
        lo->lo_refcnt++;
        mutex_unlock(&lo->lo_ctl_mutex);
 -
 -      return 0;
 +out:
 +      mutex_unlock(&loop_index_mutex);
 +      return err;
  }
  
  static int lo_release(struct gendisk *disk, fmode_t mode)
                 * In autoclear mode, stop the loop thread
                 * and remove configuration after last close.
                 */
-               err = loop_clr_fd(lo, NULL);
+               err = loop_clr_fd(lo);
                if (!err)
                        goto out_unlocked;
        } else {
@@@ -1471,71 -1632,40 +1547,71 @@@ int loop_register_transfer(struct loop_
        return 0;
  }
  
 +static int unregister_transfer_cb(int id, void *ptr, void *data)
 +{
 +      struct loop_device *lo = ptr;
 +      struct loop_func_table *xfer = data;
 +
 +      mutex_lock(&lo->lo_ctl_mutex);
 +      if (lo->lo_encryption == xfer)
 +              loop_release_xfer(lo);
 +      mutex_unlock(&lo->lo_ctl_mutex);
 +      return 0;
 +}
 +
  int loop_unregister_transfer(int number)
  {
        unsigned int n = number;
 -      struct loop_device *lo;
        struct loop_func_table *xfer;
  
        if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
                return -EINVAL;
  
        xfer_funcs[n] = NULL;
 -
 -      list_for_each_entry(lo, &loop_devices, lo_list) {
 -              mutex_lock(&lo->lo_ctl_mutex);
 -
 -              if (lo->lo_encryption == xfer)
 -                      loop_release_xfer(lo);
 -
 -              mutex_unlock(&lo->lo_ctl_mutex);
 -      }
 -
 +      idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
        return 0;
  }
  
  EXPORT_SYMBOL(loop_register_transfer);
  EXPORT_SYMBOL(loop_unregister_transfer);
  
 -static struct loop_device *loop_alloc(int i)
 +static int loop_add(struct loop_device **l, int i)
  {
        struct loop_device *lo;
        struct gendisk *disk;
 +      int err;
  
        lo = kzalloc(sizeof(*lo), GFP_KERNEL);
 -      if (!lo)
 +      if (!lo) {
 +              err = -ENOMEM;
                goto out;
 +      }
 +
 +      err = idr_pre_get(&loop_index_idr, GFP_KERNEL);
 +      if (err < 0)
 +              goto out_free_dev;
 +
 +      if (i >= 0) {
 +              int m;
 +
 +              /* create specific i in the index */
 +              err = idr_get_new_above(&loop_index_idr, lo, i, &m);
 +              if (err >= 0 && i != m) {
 +                      idr_remove(&loop_index_idr, m);
 +                      err = -EEXIST;
 +              }
 +      } else if (i == -1) {
 +              int m;
 +
 +              /* get next free nr */
 +              err = idr_get_new(&loop_index_idr, lo, &m);
 +              if (err >= 0)
 +                      i = m;
 +      } else {
 +              err = -EINVAL;
 +      }
 +      if (err < 0)
 +              goto out_free_dev;
  
        lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
        if (!lo->lo_queue)
        if (!disk)
                goto out_free_queue;
  
+       /*
+        * Disable partition scanning by default. The in-kernel partition
+        * scanning can be requested individually per-device during its
+        * setup. Userspace can always add and remove partitions from all
+        * devices. The needed partition minors are allocated from the
+        * extended minor space, the main loop device numbers will continue
+        * to match the loop minors, regardless of the number of partitions
+        * used.
+        *
+        * If max_part is given, partition scanning is globally enabled for
+        * all loop devices. The minors for the main loop devices will be
+        * multiples of max_part.
+        *
+        * Note: Global-for-all-devices, set-only-at-init, read-only module
+        * parameteters like 'max_loop' and 'max_part' make things needlessly
+        * complicated, are too static, inflexible and may surprise
+        * userspace tools. Parameters like this in general should be avoided.
+        */
+       if (!part_shift)
+               disk->flags |= GENHD_FL_NO_PART_SCAN;
+       disk->flags |= GENHD_FL_EXT_DEVT;
        mutex_init(&lo->lo_ctl_mutex);
        lo->lo_number           = i;
        lo->lo_thread           = NULL;
        disk->private_data      = lo;
        disk->queue             = lo->lo_queue;
        sprintf(disk->disk_name, "loop%d", i);
 -      return lo;
 +      add_disk(disk);
 +      *l = lo;
 +      return lo->lo_number;
  
  out_free_queue:
        blk_cleanup_queue(lo->lo_queue);
  out_free_dev:
        kfree(lo);
  out:
 -      return NULL;
 +      return err;
  }
  
 -static void loop_free(struct loop_device *lo)
 +static void loop_remove(struct loop_device *lo)
  {
 +      del_gendisk(lo->lo_disk);
        blk_cleanup_queue(lo->lo_queue);
        put_disk(lo->lo_disk);
 -      list_del(&lo->lo_list);
        kfree(lo);
  }
  
 -static struct loop_device *loop_init_one(int i)
 +static int find_free_cb(int id, void *ptr, void *data)
 +{
 +      struct loop_device *lo = ptr;
 +      struct loop_device **l = data;
 +
 +      if (lo->lo_state == Lo_unbound) {
 +              *l = lo;
 +              return 1;
 +      }
 +      return 0;
 +}
 +
 +static int loop_lookup(struct loop_device **l, int i)
  {
        struct loop_device *lo;
 +      int ret = -ENODEV;
  
 -      list_for_each_entry(lo, &loop_devices, lo_list) {
 -              if (lo->lo_number == i)
 -                      return lo;
 +      if (i < 0) {
 +              int err;
 +
 +              err = idr_for_each(&loop_index_idr, &find_free_cb, &lo);
 +              if (err == 1) {
 +                      *l = lo;
 +                      ret = lo->lo_number;
 +              }
 +              goto out;
        }
  
 -      lo = loop_alloc(i);
 +      /* lookup and return a specific i */
 +      lo = idr_find(&loop_index_idr, i);
        if (lo) {
 -              add_disk(lo->lo_disk);
 -              list_add_tail(&lo->lo_list, &loop_devices);
 +              *l = lo;
 +              ret = lo->lo_number;
        }
 -      return lo;
 -}
 -
 -static void loop_del_one(struct loop_device *lo)
 -{
 -      del_gendisk(lo->lo_disk);
 -      loop_free(lo);
 +out:
 +      return ret;
  }
  
  static struct kobject *loop_probe(dev_t dev, int *part, void *data)
  {
        struct loop_device *lo;
        struct kobject *kobj;
 +      int err;
  
 -      mutex_lock(&loop_devices_mutex);
 -      lo = loop_init_one(MINOR(dev) >> part_shift);
 -      kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
 -      mutex_unlock(&loop_devices_mutex);
 +      mutex_lock(&loop_index_mutex);
 +      err = loop_lookup(&lo, MINOR(dev) >> part_shift);
 +      if (err < 0)
 +              err = loop_add(&lo, MINOR(dev) >> part_shift);
 +      if (err < 0)
 +              kobj = ERR_PTR(err);
 +      else
 +              kobj = get_disk(lo->lo_disk);
 +      mutex_unlock(&loop_index_mutex);
  
        *part = 0;
        return kobj;
  }
  
 +static long loop_control_ioctl(struct file *file, unsigned int cmd,
 +                             unsigned long parm)
 +{
 +      struct loop_device *lo;
 +      int ret = -ENOSYS;
 +
 +      mutex_lock(&loop_index_mutex);
 +      switch (cmd) {
 +      case LOOP_CTL_ADD:
 +              ret = loop_lookup(&lo, parm);
 +              if (ret >= 0) {
 +                      ret = -EEXIST;
 +                      break;
 +              }
 +              ret = loop_add(&lo, parm);
 +              break;
 +      case LOOP_CTL_REMOVE:
 +              ret = loop_lookup(&lo, parm);
 +              if (ret < 0)
 +                      break;
 +              mutex_lock(&lo->lo_ctl_mutex);
 +              if (lo->lo_state != Lo_unbound) {
 +                      ret = -EBUSY;
 +                      mutex_unlock(&lo->lo_ctl_mutex);
 +                      break;
 +              }
 +              if (lo->lo_refcnt > 0) {
 +                      ret = -EBUSY;
 +                      mutex_unlock(&lo->lo_ctl_mutex);
 +                      break;
 +              }
 +              lo->lo_disk->private_data = NULL;
 +              mutex_unlock(&lo->lo_ctl_mutex);
 +              idr_remove(&loop_index_idr, lo->lo_number);
 +              loop_remove(lo);
 +              break;
 +      case LOOP_CTL_GET_FREE:
 +              ret = loop_lookup(&lo, -1);
 +              if (ret >= 0)
 +                      break;
 +              ret = loop_add(&lo, -1);
 +      }
 +      mutex_unlock(&loop_index_mutex);
 +
 +      return ret;
 +}
 +
 +static const struct file_operations loop_ctl_fops = {
 +      .open           = nonseekable_open,
 +      .unlocked_ioctl = loop_control_ioctl,
 +      .compat_ioctl   = loop_control_ioctl,
 +      .owner          = THIS_MODULE,
 +      .llseek         = noop_llseek,
 +};
 +
 +static struct miscdevice loop_misc = {
 +      .minor          = LOOP_CTRL_MINOR,
 +      .name           = "loop-control",
 +      .fops           = &loop_ctl_fops,
 +};
 +
 +MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR);
 +MODULE_ALIAS("devname:loop-control");
 +
  static int __init loop_init(void)
  {
        int i, nr;
        unsigned long range;
 -      struct loop_device *lo, *next;
 +      struct loop_device *lo;
 +      int err;
  
 -      /*
 -       * loop module now has a feature to instantiate underlying device
 -       * structure on-demand, provided that there is an access dev node.
 -       * However, this will not work well with user space tool that doesn't
 -       * know about such "feature".  In order to not break any existing
 -       * tool, we do the following:
 -       *
 -       * (1) if max_loop is specified, create that many upfront, and this
 -       *     also becomes a hard limit.
 -       * (2) if max_loop is not specified, create 8 loop device on module
 -       *     load, user can further extend loop device by create dev node
 -       *     themselves and have kernel automatically instantiate actual
 -       *     device on-demand.
 -       */
 +      err = misc_register(&loop_misc);
 +      if (err < 0)
 +              return err;
  
        part_shift = 0;
        if (max_part > 0) {
        if (max_loop > 1UL << (MINORBITS - part_shift))
                return -EINVAL;
  
 +      /*
 +       * If max_loop is specified, create that many devices upfront.
 +       * This also becomes a hard limit. If max_loop is not specified,
 +       * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module
 +       * init time. Loop devices can be requested on-demand with the
 +       * /dev/loop-control interface, or be instantiated by accessing
 +       * a 'dead' device node.
 +       */
        if (max_loop) {
                nr = max_loop;
                range = max_loop << part_shift;
        } else {
 -              nr = 8;
 +              nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT;
                range = 1UL << MINORBITS;
        }
  
        if (register_blkdev(LOOP_MAJOR, "loop"))
                return -EIO;
  
 -      for (i = 0; i < nr; i++) {
 -              lo = loop_alloc(i);
 -              if (!lo)
 -                      goto Enomem;
 -              list_add_tail(&lo->lo_list, &loop_devices);
 -      }
 -
 -      /* point of no return */
 -
 -      list_for_each_entry(lo, &loop_devices, lo_list)
 -              add_disk(lo->lo_disk);
 -
        blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
                                  THIS_MODULE, loop_probe, NULL, NULL);
  
 +      /* pre-create number of devices given by config or max_loop */
 +      mutex_lock(&loop_index_mutex);
 +      for (i = 0; i < nr; i++)
 +              loop_add(&lo, i);
 +      mutex_unlock(&loop_index_mutex);
 +
        printk(KERN_INFO "loop: module loaded\n");
        return 0;
 +}
  
 -Enomem:
 -      printk(KERN_INFO "loop: out of memory\n");
 -
 -      list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
 -              loop_free(lo);
 +static int loop_exit_cb(int id, void *ptr, void *data)
 +{
 +      struct loop_device *lo = ptr;
  
 -      unregister_blkdev(LOOP_MAJOR, "loop");
 -      return -ENOMEM;
 +      loop_remove(lo);
 +      return 0;
  }
  
  static void __exit loop_exit(void)
  {
        unsigned long range;
 -      struct loop_device *lo, *next;
  
        range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
  
 -      list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
 -              loop_del_one(lo);
 +      idr_for_each(&loop_index_idr, &loop_exit_cb, NULL);
 +      idr_remove_all(&loop_index_idr);
 +      idr_destroy(&loop_index_idr);
  
        blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
        unregister_blkdev(LOOP_MAJOR, "loop");
 +
 +      misc_deregister(&loop_misc);
  }
  
  module_init(loop_init);
index 1540792b1e547ee882fcbc168bb5d4156d5d1678,79efec24569bb9558414c16e14a07b3a66153205..15ec4db194d1bb793e4409dfed54563e4b71b080
@@@ -39,6 -39,9 +39,9 @@@
  #include <linux/list.h>
  #include <linux/delay.h>
  #include <linux/freezer.h>
+ #include <linux/loop.h>
+ #include <linux/falloc.h>
+ #include <linux/fs.h>
  
  #include <xen/events.h>
  #include <xen/page.h>
@@@ -258,13 -261,16 +261,16 @@@ irqreturn_t xen_blkif_be_int(int irq, v
  
  static void print_stats(struct xen_blkif *blkif)
  {
-       pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d\n",
+       pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d"
+                "  |  ds %4d\n",
                 current->comm, blkif->st_oo_req,
-                blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
+                blkif->st_rd_req, blkif->st_wr_req,
+                blkif->st_f_req, blkif->st_ds_req);
        blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
        blkif->st_rd_req = 0;
        blkif->st_wr_req = 0;
        blkif->st_oo_req = 0;
+       blkif->st_ds_req = 0;
  }
  
  int xen_blkif_schedule(void *arg)
@@@ -396,7 -402,7 +402,7 @@@ static int xen_blkbk_map(struct blkif_r
                        continue;
  
                ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
 -                      blkbk->pending_page(pending_req, i), false);
 +                      blkbk->pending_page(pending_req, i), NULL);
                if (ret) {
                        pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n",
                                 (unsigned long)map[i].dev_bus_addr, ret);
        return ret;
  }
  
+ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
+ {
+       int err = 0;
+       int status = BLKIF_RSP_OKAY;
+       struct block_device *bdev = blkif->vbd.bdev;
+       if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
+               /* just forward the discard request */
+               err = blkdev_issue_discard(bdev,
+                               req->u.discard.sector_number,
+                               req->u.discard.nr_sectors,
+                               GFP_KERNEL, 0);
+       else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
+               /* punch a hole in the backing file */
+               struct loop_device *lo = bdev->bd_disk->private_data;
+               struct file *file = lo->lo_backing_file;
+               if (file->f_op->fallocate)
+                       err = file->f_op->fallocate(file,
+                               FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+                               req->u.discard.sector_number << 9,
+                               req->u.discard.nr_sectors << 9);
+               else
+                       err = -EOPNOTSUPP;
+       } else
+               err = -EOPNOTSUPP;
+       if (err == -EOPNOTSUPP) {
+               pr_debug(DRV_PFX "discard op failed, not supported\n");
+               status = BLKIF_RSP_EOPNOTSUPP;
+       } else if (err)
+               status = BLKIF_RSP_ERROR;
+       make_response(blkif, req->id, req->operation, status);
+ }
+ static void xen_blk_drain_io(struct xen_blkif *blkif)
+ {
+       atomic_set(&blkif->drain, 1);
+       do {
+               /* The initial value is one, and one refcnt taken at the
+                * start of the xen_blkif_schedule thread. */
+               if (atomic_read(&blkif->refcnt) <= 2)
+                       break;
+               wait_for_completion_interruptible_timeout(
+                               &blkif->drain_complete, HZ);
+               if (!atomic_read(&blkif->drain))
+                       break;
+       } while (!kthread_should_stop());
+       atomic_set(&blkif->drain, 0);
+ }
  /*
   * Completion callback on the bio's. Called as bh->b_end_io()
   */
@@@ -422,6 -481,11 +481,11 @@@ static void __end_block_io_op(struct pe
                pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
                xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
                pending_req->status = BLKIF_RSP_EOPNOTSUPP;
+       } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
+                   (error == -EOPNOTSUPP)) {
+               pr_debug(DRV_PFX "write barrier op failed, not supported\n");
+               xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
+               pending_req->status = BLKIF_RSP_EOPNOTSUPP;
        } else if (error) {
                pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
                         " error=%d\n", error);
                make_response(pending_req->blkif, pending_req->id,
                              pending_req->operation, pending_req->status);
                xen_blkif_put(pending_req->blkif);
+               if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
+                       if (atomic_read(&pending_req->blkif->drain))
+                               complete(&pending_req->blkif->drain_complete);
+               }
                free_req(pending_req);
        }
  }
@@@ -532,7 -600,6 +600,6 @@@ do_block_io_op(struct xen_blkif *blkif
  
        return more_to_do;
  }
  /*
   * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
   * and call the 'submit_bio' to pass it to the underlying storage.
@@@ -549,6 -616,7 +616,7 @@@ static int dispatch_rw_block_io(struct 
        int i, nbio = 0;
        int operation;
        struct blk_plug plug;
+       bool drain = false;
  
        switch (req->operation) {
        case BLKIF_OP_READ:
                blkif->st_wr_req++;
                operation = WRITE_ODIRECT;
                break;
+       case BLKIF_OP_WRITE_BARRIER:
+               drain = true;
        case BLKIF_OP_FLUSH_DISKCACHE:
                blkif->st_f_req++;
                operation = WRITE_FLUSH;
                break;
-       case BLKIF_OP_WRITE_BARRIER:
+       case BLKIF_OP_DISCARD:
+               blkif->st_ds_req++;
+               operation = REQ_DISCARD;
+               break;
        default:
                operation = 0; /* make gcc happy */
                goto fail_response;
  
        /* Check that the number of segments is sane. */
        nseg = req->nr_segments;
-       if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
+       if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
+                               operation != REQ_DISCARD) ||
            unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
                pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
                         nseg);
                }
        }
  
+       /* Wait on all outstanding I/O's and once that has been completed
+        * issue the WRITE_FLUSH.
+        */
+       if (drain)
+               xen_blk_drain_io(pending_req->blkif);
        /*
         * If we have failed at this point, we need to undo the M2P override,
         * set gnttab_set_unmap_op on all of the grant references and perform
         * the hypercall to unmap the grants - that is all done in
         * xen_blkbk_unmap.
         */
-       if (xen_blkbk_map(req, pending_req, seg))
+       if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
                goto fail_flush;
  
-       /* This corresponding xen_blkif_put is done in __end_block_io_op */
+       /*
+        * This corresponding xen_blkif_put is done in __end_block_io_op, or
+        * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
+        */
        xen_blkif_get(blkif);
  
        for (i = 0; i < nseg; i++) {
                preq.sector_number += seg[i].nsec;
        }
  
-       /* This will be hit if the operation was a flush. */
+       /* This will be hit if the operation was a flush or discard. */
        if (!bio) {
-               BUG_ON(operation != WRITE_FLUSH);
+               BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
  
-               bio = bio_alloc(GFP_KERNEL, 0);
-               if (unlikely(bio == NULL))
-                       goto fail_put_bio;
+               if (operation == WRITE_FLUSH) {
+                       bio = bio_alloc(GFP_KERNEL, 0);
+                       if (unlikely(bio == NULL))
+                               goto fail_put_bio;
  
-               biolist[nbio++] = bio;
-               bio->bi_bdev    = preq.bdev;
-               bio->bi_private = pending_req;
-               bio->bi_end_io  = end_block_io_op;
+                       biolist[nbio++] = bio;
+                       bio->bi_bdev    = preq.bdev;
+                       bio->bi_private = pending_req;
+                       bio->bi_end_io  = end_block_io_op;
+               } else if (operation == REQ_DISCARD) {
+                       xen_blk_discard(blkif, req);
+                       xen_blkif_put(blkif);
+                       free_req(pending_req);
+                       return 0;
+               }
        }
  
        /*
  
        if (operation == READ)
                blkif->st_rd_sect += preq.nr_sects;
-       else if (operation == WRITE || operation == WRITE_FLUSH)
+       else if (operation & WRITE)
                blkif->st_wr_sect += preq.nr_sects;
  
        return 0;
@@@ -765,9 -855,9 +855,9 @@@ static int __init xen_blkif_init(void
  
        mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
  
-       blkbk->pending_reqs          = kmalloc(sizeof(blkbk->pending_reqs[0]) *
+       blkbk->pending_reqs          = kzalloc(sizeof(blkbk->pending_reqs[0]) *
                                        xen_blkif_reqs, GFP_KERNEL);
-       blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) *
+       blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
                                        mmap_pages, GFP_KERNEL);
        blkbk->pending_pages         = kzalloc(sizeof(blkbk->pending_pages[0]) *
                                        mmap_pages, GFP_KERNEL);
        if (rc)
                goto failed_init;
  
-       memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
        INIT_LIST_HEAD(&blkbk->pending_free);
        spin_lock_init(&blkbk->pending_free_lock);
        init_waitqueue_head(&blkbk->pending_free_wq);
index c4bd34063ecc8f01303932d4ec35d239e2f0d4a0,e638457d9de44d3305011207b333cf85cacb2757..de09f525d6c174509af3a1e25ce6609fbb2ef1fc
@@@ -27,6 -27,7 +27,6 @@@
  #ifndef __XEN_BLKIF__BACKEND__COMMON_H__
  #define __XEN_BLKIF__BACKEND__COMMON_H__
  
 -#include <linux/version.h>
  #include <linux/module.h>
  #include <linux/interrupt.h>
  #include <linux/slab.h>
@@@ -62,13 -63,26 +62,26 @@@ struct blkif_common_response 
  
  /* i386 protocol version */
  #pragma pack(push, 4)
+ struct blkif_x86_32_request_rw {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ };
+ struct blkif_x86_32_request_discard {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       uint64_t nr_sectors;
+ };
  struct blkif_x86_32_request {
        uint8_t        operation;    /* BLKIF_OP_???                         */
        uint8_t        nr_segments;  /* number of segments                   */
        blkif_vdev_t   handle;       /* only for read/write requests         */
        uint64_t       id;           /* private guest value, echoed in resp  */
-       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       union {
+               struct blkif_x86_32_request_rw rw;
+               struct blkif_x86_32_request_discard discard;
+       } u;
  };
  struct blkif_x86_32_response {
        uint64_t        id;              /* copied from request */
  #pragma pack(pop)
  
  /* x86_64 protocol version */
+ struct blkif_x86_64_request_rw {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ };
+ struct blkif_x86_64_request_discard {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       uint64_t nr_sectors;
+ };
  struct blkif_x86_64_request {
        uint8_t        operation;    /* BLKIF_OP_???                         */
        uint8_t        nr_segments;  /* number of segments                   */
        blkif_vdev_t   handle;       /* only for read/write requests         */
        uint64_t       __attribute__((__aligned__(8))) id;
-       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       union {
+               struct blkif_x86_64_request_rw rw;
+               struct blkif_x86_64_request_discard discard;
+       } u;
  };
  struct blkif_x86_64_response {
        uint64_t       __attribute__((__aligned__(8))) id;
@@@ -112,6 -139,11 +138,11 @@@ enum blkif_protocol 
        BLKIF_PROTOCOL_X86_64 = 3,
  };
  
+ enum blkif_backend_type {
+       BLKIF_BACKEND_PHY  = 1,
+       BLKIF_BACKEND_FILE = 2,
+ };
  struct xen_vbd {
        /* What the domain refers to this vbd as. */
        blkif_vdev_t            handle;
@@@ -137,6 -169,7 +168,7 @@@ struct xen_blkif 
        unsigned int            irq;
        /* Comms information. */
        enum blkif_protocol     blk_protocol;
+       enum blkif_backend_type blk_backend_type;
        union blkif_back_rings  blk_rings;
        struct vm_struct        *blk_ring_area;
        /* The VBD attached to this interface. */
        atomic_t                refcnt;
  
        wait_queue_head_t       wq;
+       /* for barrier (drain) requests */
+       struct completion       drain_complete;
+       atomic_t                drain;
        /* One thread per one blkif. */
        struct task_struct      *xenblkd;
        unsigned int            waiting_reqs;
        int                     st_wr_req;
        int                     st_oo_req;
        int                     st_f_req;
+       int                     st_ds_req;
        int                     st_rd_sect;
        int                     st_wr_sect;
  
  
  struct phys_req {
        unsigned short          dev;
-       unsigned short          nr_sects;
+       blkif_sector_t          nr_sects;
        struct block_device     *bdev;
        blkif_sector_t          sector_number;
  };
@@@ -195,6 -232,8 +231,8 @@@ int xen_blkif_schedule(void *arg)
  int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
                              struct backend_info *be, int state);
  
+ int xen_blkbk_barrier(struct xenbus_transaction xbt,
+                     struct backend_info *be, int state);
  struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
  
  static inline void blkif_get_x86_32_req(struct blkif_request *dst,
        dst->nr_segments = src->nr_segments;
        dst->handle = src->handle;
        dst->id = src->id;
-       dst->u.rw.sector_number = src->sector_number;
-       barrier();
-       if (n > dst->nr_segments)
-               n = dst->nr_segments;
-       for (i = 0; i < n; i++)
-               dst->u.rw.seg[i] = src->seg[i];
+       switch (src->operation) {
+       case BLKIF_OP_READ:
+       case BLKIF_OP_WRITE:
+       case BLKIF_OP_WRITE_BARRIER:
+       case BLKIF_OP_FLUSH_DISKCACHE:
+               dst->u.rw.sector_number = src->u.rw.sector_number;
+               barrier();
+               if (n > dst->nr_segments)
+                       n = dst->nr_segments;
+               for (i = 0; i < n; i++)
+                       dst->u.rw.seg[i] = src->u.rw.seg[i];
+               break;
+       case BLKIF_OP_DISCARD:
+               dst->u.discard.sector_number = src->u.discard.sector_number;
+               dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+               break;
+       default:
+               break;
+       }
  }
  
  static inline void blkif_get_x86_64_req(struct blkif_request *dst,
        dst->nr_segments = src->nr_segments;
        dst->handle = src->handle;
        dst->id = src->id;
-       dst->u.rw.sector_number = src->sector_number;
-       barrier();
-       if (n > dst->nr_segments)
-               n = dst->nr_segments;
-       for (i = 0; i < n; i++)
-               dst->u.rw.seg[i] = src->seg[i];
+       switch (src->operation) {
+       case BLKIF_OP_READ:
+       case BLKIF_OP_WRITE:
+       case BLKIF_OP_WRITE_BARRIER:
+       case BLKIF_OP_FLUSH_DISKCACHE:
+               dst->u.rw.sector_number = src->u.rw.sector_number;
+               barrier();
+               if (n > dst->nr_segments)
+                       n = dst->nr_segments;
+               for (i = 0; i < n; i++)
+                       dst->u.rw.seg[i] = src->u.rw.seg[i];
+               break;
+       case BLKIF_OP_DISCARD:
+               dst->u.discard.sector_number = src->u.discard.sector_number;
+               dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+               break;
+       default:
+               break;
+       }
  }
  
  #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
index 5fd2010f7d2bd96e1dc7b4c290fa71299d480d14,a6d43030b1078e027e4c0ccb41cea7070518e428..2c008afe63d9dbb5499712f9abd3333904a35a3c
@@@ -114,6 -114,8 +114,8 @@@ static struct xen_blkif *xen_blkif_allo
        spin_lock_init(&blkif->blk_ring_lock);
        atomic_set(&blkif->refcnt, 1);
        init_waitqueue_head(&blkif->wq);
+       init_completion(&blkif->drain_complete);
+       atomic_set(&blkif->drain, 0);
        blkif->st_print = jiffies;
        init_waitqueue_head(&blkif->waiting_to_free);
  
@@@ -272,6 -274,7 +274,7 @@@ VBD_SHOW(oo_req,  "%d\n", be->blkif->st
  VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
  VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
  VBD_SHOW(f_req,  "%d\n", be->blkif->st_f_req);
+ VBD_SHOW(ds_req,  "%d\n", be->blkif->st_ds_req);
  VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
  VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
  
@@@ -280,6 -283,7 +283,7 @@@ static struct attribute *xen_vbdstat_at
        &dev_attr_rd_req.attr,
        &dev_attr_wr_req.attr,
        &dev_attr_f_req.attr,
+       &dev_attr_ds_req.attr,
        &dev_attr_rd_sect.attr,
        &dev_attr_wr_sect.attr,
        NULL
@@@ -419,6 -423,73 +423,73 @@@ int xen_blkbk_flush_diskcache(struct xe
        return err;
  }
  
+ int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
+ {
+       struct xenbus_device *dev = be->dev;
+       struct xen_blkif *blkif = be->blkif;
+       char *type;
+       int err;
+       int state = 0;
+       type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
+       if (!IS_ERR(type)) {
+               if (strncmp(type, "file", 4) == 0) {
+                       state = 1;
+                       blkif->blk_backend_type = BLKIF_BACKEND_FILE;
+               }
+               if (strncmp(type, "phy", 3) == 0) {
+                       struct block_device *bdev = be->blkif->vbd.bdev;
+                       struct request_queue *q = bdev_get_queue(bdev);
+                       if (blk_queue_discard(q)) {
+                               err = xenbus_printf(xbt, dev->nodename,
+                                       "discard-granularity", "%u",
+                                       q->limits.discard_granularity);
+                               if (err) {
+                                       xenbus_dev_fatal(dev, err,
+                                               "writing discard-granularity");
+                                       goto kfree;
+                               }
+                               err = xenbus_printf(xbt, dev->nodename,
+                                       "discard-alignment", "%u",
+                                       q->limits.discard_alignment);
+                               if (err) {
+                                       xenbus_dev_fatal(dev, err,
+                                               "writing discard-alignment");
+                                       goto kfree;
+                               }
+                               state = 1;
+                               blkif->blk_backend_type = BLKIF_BACKEND_PHY;
+                       }
+               }
+       } else {
+               err = PTR_ERR(type);
+               xenbus_dev_fatal(dev, err, "reading type");
+               goto out;
+       }
+       err = xenbus_printf(xbt, dev->nodename, "feature-discard",
+                           "%d", state);
+       if (err)
+               xenbus_dev_fatal(dev, err, "writing feature-discard");
+ kfree:
+       kfree(type);
+ out:
+       return err;
+ }
+ int xen_blkbk_barrier(struct xenbus_transaction xbt,
+                     struct backend_info *be, int state)
+ {
+       struct xenbus_device *dev = be->dev;
+       int err;
+       err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
+                           "%d", state);
+       if (err)
+               xenbus_dev_fatal(dev, err, "writing feature-barrier");
+       return err;
+ }
  /*
   * Entry point to this code when a new device is created.  Allocate the basic
   * structures, and watch the store waiting for the hotplug scripts to tell us
@@@ -601,11 -672,11 +672,11 @@@ static void frontend_changed(struct xen
                break;
  
        case XenbusStateClosing:
 -              xen_blkif_disconnect(be->blkif);
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
  
        case XenbusStateClosed:
 +              xen_blkif_disconnect(be->blkif);
                xenbus_switch_state(dev, XenbusStateClosed);
                if (xenbus_dev_is_online(dev))
                        break;
@@@ -650,6 -721,11 +721,11 @@@ again
        if (err)
                goto abort;
  
+       err = xen_blkbk_discard(xbt, be);
+       /* If we can't advertise it is OK. */
+       err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
        err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
                            (unsigned long long)vbd_sz(&be->blkif->vbd));
        if (err) {
index 9ea8c2576c70e768f22ad8ea2cc423337611fb4f,773da7d6491e882ab30e980cb0bb7699c8d72ee2..7b2ec5908413da7b989044828879afa173613b87
@@@ -98,6 -98,9 +98,9 @@@ struct blkfront_inf
        unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
+       unsigned int feature_discard;
+       unsigned int discard_granularity;
+       unsigned int discard_alignment;
        int is_ready;
  };
  
@@@ -123,8 -126,8 +126,8 @@@ static DEFINE_SPINLOCK(minor_lock)
  #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
  #define EMULATED_HD_DISK_MINOR_OFFSET (0)
  #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
 -#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
 -#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
 +#define EMULATED_SD_DISK_MINOR_OFFSET (0)
 +#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
  
  #define DEV_NAME      "xvd"   /* name in /dev */
  
@@@ -302,29 -305,36 +305,36 @@@ static int blkif_queue_request(struct r
                ring_req->operation = info->flush_op;
        }
  
-       ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
-       BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+       if (unlikely(req->cmd_flags & REQ_DISCARD)) {
+               /* id, sector_number and handle are set above. */
+               ring_req->operation = BLKIF_OP_DISCARD;
+               ring_req->nr_segments = 0;
+               ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
+       } else {
+               ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
+               BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
  
-       for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
-               buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
-               fsect = sg->offset >> 9;
-               lsect = fsect + (sg->length >> 9) - 1;
-               /* install a grant reference. */
-               ref = gnttab_claim_grant_reference(&gref_head);
-               BUG_ON(ref == -ENOSPC);
+               for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
+                       buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
+                       fsect = sg->offset >> 9;
+                       lsect = fsect + (sg->length >> 9) - 1;
+                       /* install a grant reference. */
+                       ref = gnttab_claim_grant_reference(&gref_head);
+                       BUG_ON(ref == -ENOSPC);
  
-               gnttab_grant_foreign_access_ref(
-                               ref,
-                               info->xbdev->otherend_id,
-                               buffer_mfn,
-                               rq_data_dir(req) );
-               info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
-               ring_req->u.rw.seg[i] =
-                               (struct blkif_request_segment) {
-                                       .gref       = ref,
-                                       .first_sect = fsect,
-                                       .last_sect  = lsect };
+                       gnttab_grant_foreign_access_ref(
+                                       ref,
+                                       info->xbdev->otherend_id,
+                                       buffer_mfn,
+                                       rq_data_dir(req));
+                       info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
+                       ring_req->u.rw.seg[i] =
+                                       (struct blkif_request_segment) {
+                                               .gref       = ref,
+                                               .first_sect = fsect,
+                                               .last_sect  = lsect };
+               }
        }
  
        info->ring.req_prod_pvt++;
@@@ -370,7 -380,9 +380,9 @@@ static void do_blkif_request(struct req
  
                blk_start_request(req);
  
-               if (req->cmd_type != REQ_TYPE_FS) {
+               if ((req->cmd_type != REQ_TYPE_FS) ||
+                   ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
+                   !info->flush_op)) {
                        __blk_end_request_all(req, -EIO);
                        continue;
                }
@@@ -399,6 -411,7 +411,7 @@@ wait
  static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
  {
        struct request_queue *rq;
+       struct blkfront_info *info = gd->private_data;
  
        rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
        if (rq == NULL)
  
        queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
  
+       if (info->feature_discard) {
+               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
+               blk_queue_max_discard_sectors(rq, get_capacity(gd));
+               rq->limits.discard_granularity = info->discard_granularity;
+               rq->limits.discard_alignment = info->discard_alignment;
+       }
        /* Hard sector size and max sectors impersonate the equiv. hardware. */
        blk_queue_logical_block_size(rq, sector_size);
        blk_queue_max_hw_sectors(rq, 512);
@@@ -529,7 -549,7 +549,7 @@@ static int xlvbd_alloc_gendisk(blkif_se
                minor = BLKIF_MINOR_EXT(info->vdevice);
                nr_parts = PARTS_PER_EXT_DISK;
                offset = minor / nr_parts;
 -              if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4)
 +              if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
                        printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
                                        "emulated IDE disks,\n\t choose an xvd device name"
                                        "from xvde on\n", info->vdevice);
@@@ -722,6 -742,17 +742,17 @@@ static irqreturn_t blkif_interrupt(int 
  
                error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
                switch (bret->operation) {
+               case BLKIF_OP_DISCARD:
+                       if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+                               struct request_queue *rq = info->rq;
+                               printk(KERN_WARNING "blkfront: %s: discard op failed\n",
+                                          info->gd->disk_name);
+                               error = -EOPNOTSUPP;
+                               info->feature_discard = 0;
+                               queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
+                       }
+                       __blk_end_request_all(req, error);
+                       break;
                case BLKIF_OP_FLUSH_DISKCACHE:
                case BLKIF_OP_WRITE_BARRIER:
                        if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
@@@ -1098,6 -1129,33 +1129,33 @@@ blkfront_closing(struct blkfront_info *
        bdput(bdev);
  }
  
+ static void blkfront_setup_discard(struct blkfront_info *info)
+ {
+       int err;
+       char *type;
+       unsigned int discard_granularity;
+       unsigned int discard_alignment;
+       type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
+       if (IS_ERR(type))
+               return;
+       if (strncmp(type, "phy", 3) == 0) {
+               err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+                       "discard-granularity", "%u", &discard_granularity,
+                       "discard-alignment", "%u", &discard_alignment,
+                       NULL);
+               if (!err) {
+                       info->feature_discard = 1;
+                       info->discard_granularity = discard_granularity;
+                       info->discard_alignment = discard_alignment;
+               }
+       } else if (strncmp(type, "file", 4) == 0)
+               info->feature_discard = 1;
+       kfree(type);
+ }
  /*
   * Invoked when the backend is finally 'ready' (and has told produced
   * the details about the physical device - #sectors, size, etc).
@@@ -1108,7 -1166,7 +1166,7 @@@ static void blkfront_connect(struct blk
        unsigned long sector_size;
        unsigned int binfo;
        int err;
-       int barrier, flush;
+       int barrier, flush, discard;
  
        switch (info->connected) {
        case BLKIF_STATE_CONNECTED:
                info->feature_flush = REQ_FLUSH;
                info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
        }
-               
+       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+                           "feature-discard", "%d", &discard,
+                           NULL);
+       if (!err && discard)
+               blkfront_setup_discard(info);
        err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
        if (err) {
                xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@@ -1385,6 -1450,8 +1450,8 @@@ static struct xenbus_driver blkfront = 
  
  static int __init xlblk_init(void)
  {
+       int ret;
        if (!xen_domain())
                return -ENODEV;
  
                return -ENODEV;
        }
  
-       return xenbus_register_frontend(&blkfront);
+       ret = xenbus_register_frontend(&blkfront);
+       if (ret) {
+               unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
+               return ret;
+       }
+       return 0;
  }
  module_init(xlblk_init);
  
diff --combined drivers/scsi/hpsa.c
index 9825ecf3495793cc5a6c1b166337d9ab85a0cf58,381929813cbdc284233877ed6d33a1eab855d1af..bbdc9f960a66fd7105a314f89e900e5b17492af9
@@@ -676,16 -676,6 +676,16 @@@ static void hpsa_scsi_replace_entry(str
        BUG_ON(entry < 0 || entry >= HPSA_MAX_SCSI_DEVS_PER_HBA);
        removed[*nremoved] = h->dev[entry];
        (*nremoved)++;
 +
 +      /*
 +       * New physical devices won't have target/lun assigned yet
 +       * so we need to preserve the values in the slot we are replacing.
 +       */
 +      if (new_entry->target == -1) {
 +              new_entry->target = h->dev[entry]->target;
 +              new_entry->lun = h->dev[entry]->lun;
 +      }
 +
        h->dev[entry] = new_entry;
        added[*nadded] = new_entry;
        (*nadded)++;
@@@ -1558,17 -1548,10 +1558,17 @@@ static inline void hpsa_set_bus_target_
  }
  
  static int hpsa_update_device_info(struct ctlr_info *h,
 -      unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device)
 +      unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device,
 +      unsigned char *is_OBDR_device)
  {
 -#define OBDR_TAPE_INQ_SIZE 49
 +
 +#define OBDR_SIG_OFFSET 43
 +#define OBDR_TAPE_SIG "$DR-10"
 +#define OBDR_SIG_LEN (sizeof(OBDR_TAPE_SIG) - 1)
 +#define OBDR_TAPE_INQ_SIZE (OBDR_SIG_OFFSET + OBDR_SIG_LEN)
 +
        unsigned char *inq_buff;
 +      unsigned char *obdr_sig;
  
        inq_buff = kzalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
        if (!inq_buff)
        else
                this_device->raid_level = RAID_UNKNOWN;
  
 +      if (is_OBDR_device) {
 +              /* See if this is a One-Button-Disaster-Recovery device
 +               * by looking for "$DR-10" at offset 43 in inquiry data.
 +               */
 +              obdr_sig = &inq_buff[OBDR_SIG_OFFSET];
 +              *is_OBDR_device = (this_device->devtype == TYPE_ROM &&
 +                                      strncmp(obdr_sig, OBDR_TAPE_SIG,
 +                                              OBDR_SIG_LEN) == 0);
 +      }
 +
        kfree(inq_buff);
        return 0;
  
@@@ -1743,7 -1716,7 +1743,7 @@@ static int add_msa2xxx_enclosure_device
                return 0;
        }
  
 -      if (hpsa_update_device_info(h, scsi3addr, this_device))
 +      if (hpsa_update_device_info(h, scsi3addr, this_device, NULL))
                return 0;
        (*nmsa2xxx_enclosures)++;
        hpsa_set_bus_target_lun(this_device, bus, target, 0);
@@@ -1835,6 -1808,7 +1835,6 @@@ static void hpsa_update_scsi_devices(st
         */
        struct ReportLUNdata *physdev_list = NULL;
        struct ReportLUNdata *logdev_list = NULL;
 -      unsigned char *inq_buff = NULL;
        u32 nphysicals = 0;
        u32 nlogicals = 0;
        u32 ndev_allocated = 0;
                GFP_KERNEL);
        physdev_list = kzalloc(reportlunsize, GFP_KERNEL);
        logdev_list = kzalloc(reportlunsize, GFP_KERNEL);
 -      inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
        tmpdevice = kzalloc(sizeof(*tmpdevice), GFP_KERNEL);
  
 -      if (!currentsd || !physdev_list || !logdev_list ||
 -              !inq_buff || !tmpdevice) {
 +      if (!currentsd || !physdev_list || !logdev_list || !tmpdevice) {
                dev_err(&h->pdev->dev, "out of memory\n");
                goto out;
        }
        /* adjust our table of devices */
        nmsa2xxx_enclosures = 0;
        for (i = 0; i < nphysicals + nlogicals + 1; i++) {
 -              u8 *lunaddrbytes;
 +              u8 *lunaddrbytes, is_OBDR = 0;
  
                /* Figure out where the LUN ID info is coming from */
                lunaddrbytes = figure_lunaddrbytes(h, raid_ctlr_position,
                        continue;
  
                /* Get device type, vendor, model, device id */
 -              if (hpsa_update_device_info(h, lunaddrbytes, tmpdevice))
 +              if (hpsa_update_device_info(h, lunaddrbytes, tmpdevice,
 +                                                      &is_OBDR))
                        continue; /* skip it if we can't talk to it. */
                figure_bus_target_lun(h, lunaddrbytes, &bus, &target, &lun,
                        tmpdevice);
                hpsa_set_bus_target_lun(this_device, bus, target, lun);
  
                switch (this_device->devtype) {
 -              case TYPE_ROM: {
 +              case TYPE_ROM:
                        /* We don't *really* support actual CD-ROM devices,
                         * just "One Button Disaster Recovery" tape drive
                         * which temporarily pretends to be a CD-ROM drive.
                         * device by checking for "$DR-10" in bytes 43-48 of
                         * the inquiry data.
                         */
 -                              char obdr_sig[7];
 -#define OBDR_TAPE_SIG "$DR-10"
 -                              strncpy(obdr_sig, &inq_buff[43], 6);
 -                              obdr_sig[6] = '\0';
 -                              if (strncmp(obdr_sig, OBDR_TAPE_SIG, 6) != 0)
 -                                      /* Not OBDR device, ignore it. */
 -                                      break;
 -                      }
 -                      ncurrent++;
 +                      if (is_OBDR)
 +                              ncurrent++;
                        break;
                case TYPE_DISK:
                        if (i < nphysicals)
@@@ -1965,6 -1947,7 +1965,6 @@@ out
        for (i = 0; i < ndev_allocated; i++)
                kfree(currentsd[i]);
        kfree(currentsd);
 -      kfree(inq_buff);
        kfree(physdev_list);
        kfree(logdev_list);
  }
@@@ -3300,6 -3283,13 +3300,13 @@@ static int hpsa_controller_hard_reset(s
                pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
                pmcsr |= PCI_D0;
                pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+               /*
+                * The P600 requires a small delay when changing states.
+                * Otherwise we may think the board did not reset and we bail.
+                * This for kdump only and is particular to the P600.
+                */
+               msleep(500);
        }
        return 0;
  }
@@@ -3438,8 -3428,10 +3445,8 @@@ static __devinit int hpsa_kdump_hard_re
        } else {
                use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
                if (use_doorbell) {
 -                      dev_warn(&pdev->dev, "Controller claims that "
 -                              "'Bit 2 doorbell reset' is "
 -                              "supported, but not 'bit 5 doorbell reset'.  "
 -                              "Firmware update is recommended.\n");
 +                      dev_warn(&pdev->dev, "Soft reset not supported. "
 +                              "Firmware update is required.\n");
                        rc = -ENOTSUPP; /* try soft reset */
                        goto unmap_cfgtable;
                }
diff --combined fs/block_dev.c
index 1c44b8d54504e1c633774fd6befe966f0c1e7dc1,0bed0d4588dd87f103c5239d66f6058461f89253..b07f1da1de4e34470fd64af913c9366e0d6c8513
@@@ -971,7 -971,7 +971,7 @@@ static void flush_disk(struct block_dev
  
        if (!bdev->bd_disk)
                return;
-       if (disk_partitionable(bdev->bd_disk))
+       if (disk_part_scan_enabled(bdev->bd_disk))
                bdev->bd_invalidated = 1;
  }
  
@@@ -1085,7 -1085,6 +1085,7 @@@ static int __blkdev_put(struct block_de
  static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
  {
        struct gendisk *disk;
 +      struct module *owner;
        int ret;
        int partno;
        int perm = 0;
        disk = get_gendisk(bdev->bd_dev, &partno);
        if (!disk)
                goto out;
 +      owner = disk->fops->owner;
  
        disk_block_events(disk);
        mutex_lock_nested(&bdev->bd_mutex, for_part);
                                        bdev->bd_disk = NULL;
                                        mutex_unlock(&bdev->bd_mutex);
                                        disk_unblock_events(disk);
 -                                      module_put(disk->fops->owner);
                                        put_disk(disk);
 +                                      module_put(owner);
                                        goto restart;
                                }
                        }
                                goto out_unlock_bdev;
                }
                /* only one opener holds refs to the module and disk */
 -              module_put(disk->fops->owner);
                put_disk(disk);
 +              module_put(owner);
        }
        bdev->bd_openers++;
        if (for_part)
   out_unlock_bdev:
        mutex_unlock(&bdev->bd_mutex);
        disk_unblock_events(disk);
 -      module_put(disk->fops->owner);
        put_disk(disk);
 +      module_put(owner);
   out:
        bdput(bdev);
  
@@@ -1431,11 -1429,6 +1431,11 @@@ static int __blkdev_put(struct block_de
                WARN_ON_ONCE(bdev->bd_holders);
                sync_blockdev(bdev);
                kill_bdev(bdev);
 +              /* ->release can cause the old bdi to disappear,
 +               * so must switch it out first
 +               */
 +              bdev_inode_switch_bdi(bdev->bd_inode,
 +                                      &default_backing_dev_info);
        }
        if (bdev->bd_contains == bdev) {
                if (disk->fops->release)
        if (!bdev->bd_openers) {
                struct module *owner = disk->fops->owner;
  
 -              put_disk(disk);
 -              module_put(owner);
                disk_put_part(bdev->bd_part);
                bdev->bd_part = NULL;
                bdev->bd_disk = NULL;
 -              bdev_inode_switch_bdi(bdev->bd_inode,
 -                                      &default_backing_dev_info);
                if (bdev != bdev->bd_contains)
                        victim = bdev->bd_contains;
                bdev->bd_contains = NULL;
 +
 +              put_disk(disk);
 +              module_put(owner);
        }
        mutex_unlock(&bdev->bd_mutex);
        bdput(bdev);
diff --combined include/linux/genhd.h
index 6957350e122f2444173ff94e0d650e0dc01ef8c6,6d18f3531f180f401d35e8028b3395b339182c2e..9de31bc98c8803bc96bac1f0751da3ca695bf8df
@@@ -21,8 -21,6 +21,8 @@@
  #define dev_to_part(device)   container_of((device), struct hd_struct, __dev)
  #define disk_to_dev(disk)     (&(disk)->part0.__dev)
  #define part_to_dev(part)     (&((part)->__dev))
 +#define alias_name(disk)      ((disk)->alias ? (disk)->alias : \
 +                                               (disk)->disk_name)
  
  extern struct device_type part_type;
  extern struct kobject *block_depr;
@@@ -60,7 -58,6 +60,7 @@@ enum 
  
  #define DISK_MAX_PARTS                        256
  #define DISK_NAME_LEN                 32
 +#define ALIAS_LEN                     256
  
  #include <linux/major.h>
  #include <linux/device.h>
@@@ -131,6 -128,7 +131,7 @@@ struct hd_struct 
  #define GENHD_FL_EXT_DEVT                     64 /* allow extended devt */
  #define GENHD_FL_NATIVE_CAPACITY              128
  #define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE   256
+ #define GENHD_FL_NO_PART_SCAN                 512
  
  enum {
        DISK_EVENT_MEDIA_CHANGE                 = 1 << 0, /* media changed */
@@@ -165,7 -163,6 +166,7 @@@ struct gendisk 
                                           * disks that can't be partitioned. */
  
        char disk_name[DISK_NAME_LEN];  /* name of major driver */
 +      char *alias;                    /* alias name of disk */
        char *(*devnode)(struct gendisk *gd, mode_t *mode);
  
        unsigned int events;            /* supported events */
@@@ -238,9 -235,10 +239,10 @@@ static inline int disk_max_parts(struc
        return disk->minors;
  }
  
- static inline bool disk_partitionable(struct gendisk *disk)
+ static inline bool disk_part_scan_enabled(struct gendisk *disk)
  {
-       return disk_max_parts(disk) > 1;
+       return disk_max_parts(disk) > 1 &&
+               !(disk->flags & GENHD_FL_NO_PART_SCAN);
  }
  
  static inline dev_t disk_devt(struct gendisk *disk)
diff --combined include/linux/loop.h
index a06880689115ded34b73c39443004638635f2d3b,4367fc507fe916e1559c7050e24601778a18422c..11a41a8f08eb9e98cb1105982d093c3d075d5206
@@@ -64,6 -64,7 +64,6 @@@ struct loop_device 
  
        struct request_queue    *lo_queue;
        struct gendisk          *lo_disk;
 -      struct list_head        lo_list;
  };
  
  #endif /* __KERNEL__ */
@@@ -73,7 -74,9 +73,8 @@@
   */
  enum {
        LO_FLAGS_READ_ONLY      = 1,
 -      LO_FLAGS_USE_AOPS       = 2,
        LO_FLAGS_AUTOCLEAR      = 4,
+       LO_FLAGS_PARTSCAN       = 8,
  };
  
  #include <asm/posix_types.h>  /* for __kernel_old_dev_t */
@@@ -159,8 -162,4 +160,8 @@@ int loop_unregister_transfer(int number
  #define LOOP_CHANGE_FD                0x4C06
  #define LOOP_SET_CAPACITY     0x4C07
  
 +/* /dev/loop-control interface */
 +#define LOOP_CTL_ADD          0x4C80
 +#define LOOP_CTL_REMOVE               0x4C81
 +#define LOOP_CTL_GET_FREE     0x4C82
  #endif