aoeblk.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
  2. /*
  3. * aoeblk.c
  4. * block device routines
  5. */
  6. #include <linux/kernel.h>
  7. #include <linux/hdreg.h>
  8. #include <linux/blk-mq.h>
  9. #include <linux/backing-dev.h>
  10. #include <linux/fs.h>
  11. #include <linux/ioctl.h>
  12. #include <linux/slab.h>
  13. #include <linux/ratelimit.h>
  14. #include <linux/netdevice.h>
  15. #include <linux/mutex.h>
  16. #include <linux/export.h>
  17. #include <linux/moduleparam.h>
  18. #include <linux/debugfs.h>
  19. #include <scsi/sg.h>
  20. #include "aoe.h"
  21. static DEFINE_MUTEX(aoeblk_mutex);
  22. static struct kmem_cache *buf_pool_cache;
  23. static struct dentry *aoe_debugfs_dir;
  24. /* GPFS needs a larger value than the default. */
  25. static int aoe_maxsectors;
  26. module_param(aoe_maxsectors, int, 0644);
  27. MODULE_PARM_DESC(aoe_maxsectors,
  28. "When nonzero, set the maximum number of sectors per I/O request");
  29. static ssize_t aoedisk_show_state(struct device *dev,
  30. struct device_attribute *attr, char *page)
  31. {
  32. struct gendisk *disk = dev_to_disk(dev);
  33. struct aoedev *d = disk->private_data;
  34. return sysfs_emit(page, "%s%s\n",
  35. (d->flags & DEVFL_UP) ? "up" : "down",
  36. (d->flags & DEVFL_KICKME) ? ",kickme" :
  37. (d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
  38. /* I'd rather see nopen exported so we can ditch closewait */
  39. }
  40. static ssize_t aoedisk_show_mac(struct device *dev,
  41. struct device_attribute *attr, char *page)
  42. {
  43. struct gendisk *disk = dev_to_disk(dev);
  44. struct aoedev *d = disk->private_data;
  45. struct aoetgt *t = d->targets[0];
  46. if (t == NULL)
  47. return sysfs_emit(page, "none\n");
  48. return sysfs_emit(page, "%pm\n", t->addr);
  49. }
  50. static ssize_t aoedisk_show_netif(struct device *dev,
  51. struct device_attribute *attr, char *page)
  52. {
  53. struct gendisk *disk = dev_to_disk(dev);
  54. struct aoedev *d = disk->private_data;
  55. struct net_device *nds[8], **nd, **nnd, **ne;
  56. struct aoetgt **t, **te;
  57. struct aoeif *ifp, *e;
  58. char *p;
  59. memset(nds, 0, sizeof nds);
  60. nd = nds;
  61. ne = nd + ARRAY_SIZE(nds);
  62. t = d->targets;
  63. te = t + d->ntargets;
  64. for (; t < te && *t; t++) {
  65. ifp = (*t)->ifs;
  66. e = ifp + NAOEIFS;
  67. for (; ifp < e && ifp->nd; ifp++) {
  68. for (nnd = nds; nnd < nd; nnd++)
  69. if (*nnd == ifp->nd)
  70. break;
  71. if (nnd == nd && nd != ne)
  72. *nd++ = ifp->nd;
  73. }
  74. }
  75. ne = nd;
  76. nd = nds;
  77. if (*nd == NULL)
  78. return sysfs_emit(page, "none\n");
  79. for (p = page; nd < ne; nd++)
  80. p += scnprintf(p, PAGE_SIZE - (p-page), "%s%s",
  81. p == page ? "" : ",", (*nd)->name);
  82. p += scnprintf(p, PAGE_SIZE - (p-page), "\n");
  83. return p-page;
  84. }
  85. /* firmware version */
  86. static ssize_t aoedisk_show_fwver(struct device *dev,
  87. struct device_attribute *attr, char *page)
  88. {
  89. struct gendisk *disk = dev_to_disk(dev);
  90. struct aoedev *d = disk->private_data;
  91. return sysfs_emit(page, "0x%04x\n", (unsigned int) d->fw_ver);
  92. }
  93. static ssize_t aoedisk_show_payload(struct device *dev,
  94. struct device_attribute *attr, char *page)
  95. {
  96. struct gendisk *disk = dev_to_disk(dev);
  97. struct aoedev *d = disk->private_data;
  98. return sysfs_emit(page, "%lu\n", d->maxbcnt);
  99. }
  100. static int aoe_debugfs_show(struct seq_file *s, void *ignored)
  101. {
  102. struct aoedev *d;
  103. struct aoetgt **t, **te;
  104. struct aoeif *ifp, *ife;
  105. unsigned long flags;
  106. char c;
  107. d = s->private;
  108. seq_printf(s, "rttavg: %d rttdev: %d\n",
  109. d->rttavg >> RTTSCALE,
  110. d->rttdev >> RTTDSCALE);
  111. seq_printf(s, "nskbpool: %d\n", skb_queue_len(&d->skbpool));
  112. seq_printf(s, "kicked: %ld\n", d->kicked);
  113. seq_printf(s, "maxbcnt: %ld\n", d->maxbcnt);
  114. seq_printf(s, "ref: %ld\n", d->ref);
  115. spin_lock_irqsave(&d->lock, flags);
  116. t = d->targets;
  117. te = t + d->ntargets;
  118. for (; t < te && *t; t++) {
  119. c = '\t';
  120. seq_printf(s, "falloc: %ld\n", (*t)->falloc);
  121. seq_printf(s, "ffree: %p\n",
  122. list_empty(&(*t)->ffree) ? NULL : (*t)->ffree.next);
  123. seq_printf(s, "%pm:%d:%d:%d\n", (*t)->addr, (*t)->nout,
  124. (*t)->maxout, (*t)->nframes);
  125. seq_printf(s, "\tssthresh:%d\n", (*t)->ssthresh);
  126. seq_printf(s, "\ttaint:%d\n", (*t)->taint);
  127. seq_printf(s, "\tr:%d\n", (*t)->rpkts);
  128. seq_printf(s, "\tw:%d\n", (*t)->wpkts);
  129. ifp = (*t)->ifs;
  130. ife = ifp + ARRAY_SIZE((*t)->ifs);
  131. for (; ifp->nd && ifp < ife; ifp++) {
  132. seq_printf(s, "%c%s", c, ifp->nd->name);
  133. c = ',';
  134. }
  135. seq_puts(s, "\n");
  136. }
  137. spin_unlock_irqrestore(&d->lock, flags);
  138. return 0;
  139. }
  140. DEFINE_SHOW_ATTRIBUTE(aoe_debugfs);
  141. static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL);
  142. static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL);
  143. static DEVICE_ATTR(netif, 0444, aoedisk_show_netif, NULL);
  144. static struct device_attribute dev_attr_firmware_version = {
  145. .attr = { .name = "firmware-version", .mode = 0444 },
  146. .show = aoedisk_show_fwver,
  147. };
  148. static DEVICE_ATTR(payload, 0444, aoedisk_show_payload, NULL);
  149. static struct attribute *aoe_attrs[] = {
  150. &dev_attr_state.attr,
  151. &dev_attr_mac.attr,
  152. &dev_attr_netif.attr,
  153. &dev_attr_firmware_version.attr,
  154. &dev_attr_payload.attr,
  155. NULL,
  156. };
  157. static const struct attribute_group aoe_attr_group = {
  158. .attrs = aoe_attrs,
  159. };
  160. static const struct attribute_group *aoe_attr_groups[] = {
  161. &aoe_attr_group,
  162. NULL,
  163. };
  164. static void
  165. aoedisk_add_debugfs(struct aoedev *d)
  166. {
  167. char *p;
  168. if (aoe_debugfs_dir == NULL)
  169. return;
  170. p = strchr(d->gd->disk_name, '/');
  171. if (p == NULL)
  172. p = d->gd->disk_name;
  173. else
  174. p++;
  175. BUG_ON(*p == '\0');
  176. d->debugfs = debugfs_create_file(p, 0444, aoe_debugfs_dir, d,
  177. &aoe_debugfs_fops);
  178. }
  179. void
  180. aoedisk_rm_debugfs(struct aoedev *d)
  181. {
  182. debugfs_remove(d->debugfs);
  183. d->debugfs = NULL;
  184. }
  185. static int
  186. aoeblk_open(struct block_device *bdev, fmode_t mode)
  187. {
  188. struct aoedev *d = bdev->bd_disk->private_data;
  189. ulong flags;
  190. if (!virt_addr_valid(d)) {
  191. pr_crit("aoe: invalid device pointer in %s\n",
  192. __func__);
  193. WARN_ON(1);
  194. return -ENODEV;
  195. }
  196. if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL)
  197. return -ENODEV;
  198. mutex_lock(&aoeblk_mutex);
  199. spin_lock_irqsave(&d->lock, flags);
  200. if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) {
  201. d->nopen++;
  202. spin_unlock_irqrestore(&d->lock, flags);
  203. mutex_unlock(&aoeblk_mutex);
  204. return 0;
  205. }
  206. spin_unlock_irqrestore(&d->lock, flags);
  207. mutex_unlock(&aoeblk_mutex);
  208. return -ENODEV;
  209. }
  210. static void
  211. aoeblk_release(struct gendisk *disk, fmode_t mode)
  212. {
  213. struct aoedev *d = disk->private_data;
  214. ulong flags;
  215. spin_lock_irqsave(&d->lock, flags);
  216. if (--d->nopen == 0) {
  217. spin_unlock_irqrestore(&d->lock, flags);
  218. aoecmd_cfg(d->aoemajor, d->aoeminor);
  219. return;
  220. }
  221. spin_unlock_irqrestore(&d->lock, flags);
  222. }
  223. static blk_status_t aoeblk_queue_rq(struct blk_mq_hw_ctx *hctx,
  224. const struct blk_mq_queue_data *bd)
  225. {
  226. struct aoedev *d = hctx->queue->queuedata;
  227. spin_lock_irq(&d->lock);
  228. if ((d->flags & DEVFL_UP) == 0) {
  229. pr_info_ratelimited("aoe: device %ld.%d is not up\n",
  230. d->aoemajor, d->aoeminor);
  231. spin_unlock_irq(&d->lock);
  232. blk_mq_start_request(bd->rq);
  233. return BLK_STS_IOERR;
  234. }
  235. list_add_tail(&bd->rq->queuelist, &d->rq_list);
  236. aoecmd_work(d);
  237. spin_unlock_irq(&d->lock);
  238. return BLK_STS_OK;
  239. }
  240. static int
  241. aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
  242. {
  243. struct aoedev *d = bdev->bd_disk->private_data;
  244. if ((d->flags & DEVFL_UP) == 0) {
  245. printk(KERN_ERR "aoe: disk not up\n");
  246. return -ENODEV;
  247. }
  248. geo->cylinders = d->geo.cylinders;
  249. geo->heads = d->geo.heads;
  250. geo->sectors = d->geo.sectors;
  251. return 0;
  252. }
  253. static int
  254. aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg)
  255. {
  256. struct aoedev *d;
  257. if (!arg)
  258. return -EINVAL;
  259. d = bdev->bd_disk->private_data;
  260. if ((d->flags & DEVFL_UP) == 0) {
  261. pr_err("aoe: disk not up\n");
  262. return -ENODEV;
  263. }
  264. if (cmd == HDIO_GET_IDENTITY) {
  265. if (!copy_to_user((void __user *) arg, &d->ident,
  266. sizeof(d->ident)))
  267. return 0;
  268. return -EFAULT;
  269. }
  270. /* udev calls scsi_id, which uses SG_IO, resulting in noise */
  271. if (cmd != SG_IO)
  272. pr_info("aoe: unknown ioctl 0x%x\n", cmd);
  273. return -ENOTTY;
  274. }
  275. static const struct block_device_operations aoe_bdops = {
  276. .open = aoeblk_open,
  277. .release = aoeblk_release,
  278. .ioctl = aoeblk_ioctl,
  279. .compat_ioctl = blkdev_compat_ptr_ioctl,
  280. .getgeo = aoeblk_getgeo,
  281. .owner = THIS_MODULE,
  282. };
  283. static const struct blk_mq_ops aoeblk_mq_ops = {
  284. .queue_rq = aoeblk_queue_rq,
  285. };
  286. /* blk_mq_alloc_disk and add_disk can sleep */
  287. void
  288. aoeblk_gdalloc(void *vp)
  289. {
  290. struct aoedev *d = vp;
  291. struct gendisk *gd;
  292. mempool_t *mp;
  293. struct blk_mq_tag_set *set;
  294. ulong flags;
  295. int late = 0;
  296. int err;
  297. spin_lock_irqsave(&d->lock, flags);
  298. if (d->flags & DEVFL_GDALLOC
  299. && !(d->flags & DEVFL_TKILL)
  300. && !(d->flags & DEVFL_GD_NOW))
  301. d->flags |= DEVFL_GD_NOW;
  302. else
  303. late = 1;
  304. spin_unlock_irqrestore(&d->lock, flags);
  305. if (late)
  306. return;
  307. mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab,
  308. buf_pool_cache);
  309. if (mp == NULL) {
  310. printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
  311. d->aoemajor, d->aoeminor);
  312. goto err;
  313. }
  314. set = &d->tag_set;
  315. set->ops = &aoeblk_mq_ops;
  316. set->cmd_size = sizeof(struct aoe_req);
  317. set->nr_hw_queues = 1;
  318. set->queue_depth = 128;
  319. set->numa_node = NUMA_NO_NODE;
  320. set->flags = BLK_MQ_F_SHOULD_MERGE;
  321. err = blk_mq_alloc_tag_set(set);
  322. if (err) {
  323. pr_err("aoe: cannot allocate tag set for %ld.%d\n",
  324. d->aoemajor, d->aoeminor);
  325. goto err_mempool;
  326. }
  327. gd = blk_mq_alloc_disk(set, d);
  328. if (IS_ERR(gd)) {
  329. pr_err("aoe: cannot allocate block queue for %ld.%d\n",
  330. d->aoemajor, d->aoeminor);
  331. goto err_tagset;
  332. }
  333. spin_lock_irqsave(&d->lock, flags);
  334. WARN_ON(!(d->flags & DEVFL_GD_NOW));
  335. WARN_ON(!(d->flags & DEVFL_GDALLOC));
  336. WARN_ON(d->flags & DEVFL_TKILL);
  337. WARN_ON(d->gd);
  338. WARN_ON(d->flags & DEVFL_UP);
  339. blk_queue_max_hw_sectors(gd->queue, BLK_DEF_MAX_SECTORS);
  340. blk_queue_io_opt(gd->queue, SZ_2M);
  341. d->bufpool = mp;
  342. d->blkq = gd->queue;
  343. d->gd = gd;
  344. if (aoe_maxsectors)
  345. blk_queue_max_hw_sectors(gd->queue, aoe_maxsectors);
  346. gd->major = AOE_MAJOR;
  347. gd->first_minor = d->sysminor;
  348. gd->minors = AOE_PARTITIONS;
  349. gd->fops = &aoe_bdops;
  350. gd->private_data = d;
  351. set_capacity(gd, d->ssize);
  352. snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
  353. d->aoemajor, d->aoeminor);
  354. d->flags &= ~DEVFL_GDALLOC;
  355. d->flags |= DEVFL_UP;
  356. spin_unlock_irqrestore(&d->lock, flags);
  357. err = device_add_disk(NULL, gd, aoe_attr_groups);
  358. if (err)
  359. goto out_disk_cleanup;
  360. aoedisk_add_debugfs(d);
  361. spin_lock_irqsave(&d->lock, flags);
  362. WARN_ON(!(d->flags & DEVFL_GD_NOW));
  363. d->flags &= ~DEVFL_GD_NOW;
  364. spin_unlock_irqrestore(&d->lock, flags);
  365. return;
  366. out_disk_cleanup:
  367. put_disk(gd);
  368. err_tagset:
  369. blk_mq_free_tag_set(set);
  370. err_mempool:
  371. mempool_destroy(mp);
  372. err:
  373. spin_lock_irqsave(&d->lock, flags);
  374. d->flags &= ~DEVFL_GD_NOW;
  375. queue_work(aoe_wq, &d->work);
  376. spin_unlock_irqrestore(&d->lock, flags);
  377. }
  378. void
  379. aoeblk_exit(void)
  380. {
  381. debugfs_remove_recursive(aoe_debugfs_dir);
  382. aoe_debugfs_dir = NULL;
  383. kmem_cache_destroy(buf_pool_cache);
  384. }
  385. int __init
  386. aoeblk_init(void)
  387. {
  388. buf_pool_cache = kmem_cache_create("aoe_bufs",
  389. sizeof(struct buf),
  390. 0, 0, NULL);
  391. if (buf_pool_cache == NULL)
  392. return -ENOMEM;
  393. aoe_debugfs_dir = debugfs_create_dir("aoe", NULL);
  394. return 0;
  395. }