1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2006, Intel Corporation.
5 * Copyright (C) 2006-2008 Intel Corporation
6 * Author: Ashok Raj <ashok.raj@intel.com>
7 * Author: Shaohua Li <shaohua.li@intel.com>
8 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
10 * This file implements early detection/parsing of Remapping Devices
11 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
14 * These routines are used by both DMA-remapping and Interrupt-remapping
17 #define pr_fmt(fmt) "DMAR: " fmt
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <asm/irq_remapping.h>
32 #include <asm/iommu_table.h>
34 #include "irq_remapping.h"
36 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
37 struct dmar_res_callback {
38 dmar_res_handler_t cb[ACPI_DMAR_TYPE_RESERVED];
39 void *arg[ACPI_DMAR_TYPE_RESERVED];
40 bool ignore_unhandled;
46 * 1) The hotplug framework guarentees that DMAR unit will be hot-added
47 * before IO devices managed by that unit.
48 * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
49 * after IO devices managed by that unit.
50 * 3) Hotplug events are rare.
52 * Locking rules for DMA and interrupt remapping related global data structures:
53 * 1) Use dmar_global_lock in process context
54 * 2) Use RCU in interrupt context
56 DECLARE_RWSEM(dmar_global_lock);
57 LIST_HEAD(dmar_drhd_units);
59 struct acpi_table_header * __initdata dmar_tbl;
60 static int dmar_dev_scope_status = 1;
61 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
63 static int alloc_iommu(struct dmar_drhd_unit *drhd);
64 static void free_iommu(struct intel_iommu *iommu);
66 extern const struct iommu_ops intel_iommu_ops;
68 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
71 * add INCLUDE_ALL at the tail, so scan the list will find it at
74 if (drhd->include_all)
75 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77 list_add_rcu(&drhd->list, &dmar_drhd_units);
80 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 struct acpi_dmar_device_scope *scope;
87 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
88 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
89 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
92 scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
93 pr_warn("Unsupported device scope\n");
95 start += scope->length;
100 return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
103 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
106 struct device *tmp_dev;
108 if (*devices && *cnt) {
109 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
118 /* Optimize out kzalloc()/kfree() for normal cases */
119 static char dmar_pci_notify_info_buf[64];
121 static struct dmar_pci_notify_info *
122 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
127 struct dmar_pci_notify_info *info;
129 BUG_ON(dev->is_virtfn);
131 /* Only generate path[] for device addition event */
132 if (event == BUS_NOTIFY_ADD_DEVICE)
133 for (tmp = dev; tmp; tmp = tmp->bus->self)
136 size = struct_size(info, path, level);
137 if (size <= sizeof(dmar_pci_notify_info_buf)) {
138 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
140 info = kzalloc(size, GFP_KERNEL);
142 pr_warn("Out of memory when allocating notify_info "
143 "for %s.\n", pci_name(dev));
144 if (dmar_dev_scope_status == 0)
145 dmar_dev_scope_status = -ENOMEM;
152 info->seg = pci_domain_nr(dev->bus);
154 if (event == BUS_NOTIFY_ADD_DEVICE) {
155 for (tmp = dev; tmp; tmp = tmp->bus->self) {
157 info->path[level].bus = tmp->bus->number;
158 info->path[level].device = PCI_SLOT(tmp->devfn);
159 info->path[level].function = PCI_FUNC(tmp->devfn);
160 if (pci_is_root_bus(tmp->bus))
161 info->bus = tmp->bus->number;
168 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
170 if ((void *)info != dmar_pci_notify_info_buf)
174 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
175 struct acpi_dmar_pci_path *path, int count)
179 if (info->bus != bus)
181 if (info->level != count)
184 for (i = 0; i < count; i++) {
185 if (path[i].device != info->path[i].device ||
186 path[i].function != info->path[i].function)
198 if (bus == info->path[i].bus &&
199 path[0].device == info->path[i].device &&
200 path[0].function == info->path[i].function) {
201 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
202 bus, path[0].device, path[0].function);
209 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
210 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
211 void *start, void*end, u16 segment,
212 struct dmar_dev_scope *devices,
216 struct device *tmp, *dev = &info->dev->dev;
217 struct acpi_dmar_device_scope *scope;
218 struct acpi_dmar_pci_path *path;
220 if (segment != info->seg)
223 for (; start < end; start += scope->length) {
225 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
226 scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
229 path = (struct acpi_dmar_pci_path *)(scope + 1);
230 level = (scope->length - sizeof(*scope)) / sizeof(*path);
231 if (!dmar_match_pci_path(info, scope->bus, path, level))
235 * We expect devices with endpoint scope to have normal PCI
236 * headers, and devices with bridge scope to have bridge PCI
237 * headers. However PCI NTB devices may be listed in the
238 * DMAR table with bridge scope, even though they have a
239 * normal PCI header. NTB devices are identified by class
240 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
241 * for this special case.
243 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
244 info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
245 (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
246 (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
247 info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
248 pr_warn("Device scope type does not match for %s\n",
249 pci_name(info->dev));
253 for_each_dev_scope(devices, devices_cnt, i, tmp)
255 devices[i].bus = info->dev->bus->number;
256 devices[i].devfn = info->dev->devfn;
257 rcu_assign_pointer(devices[i].dev,
261 BUG_ON(i >= devices_cnt);
267 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
268 struct dmar_dev_scope *devices, int count)
273 if (info->seg != segment)
276 for_each_active_dev_scope(devices, count, index, tmp)
277 if (tmp == &info->dev->dev) {
278 RCU_INIT_POINTER(devices[index].dev, NULL);
287 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
290 struct dmar_drhd_unit *dmaru;
291 struct acpi_dmar_hardware_unit *drhd;
293 for_each_drhd_unit(dmaru) {
294 if (dmaru->include_all)
297 drhd = container_of(dmaru->hdr,
298 struct acpi_dmar_hardware_unit, header);
299 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
300 ((void *)drhd) + drhd->header.length,
302 dmaru->devices, dmaru->devices_cnt);
307 ret = dmar_iommu_notify_scope_dev(info);
308 if (ret < 0 && dmar_dev_scope_status == 0)
309 dmar_dev_scope_status = ret;
314 static void dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
316 struct dmar_drhd_unit *dmaru;
318 for_each_drhd_unit(dmaru)
319 if (dmar_remove_dev_scope(info, dmaru->segment,
320 dmaru->devices, dmaru->devices_cnt))
322 dmar_iommu_notify_scope_dev(info);
325 static int dmar_pci_bus_notifier(struct notifier_block *nb,
326 unsigned long action, void *data)
328 struct pci_dev *pdev = to_pci_dev(data);
329 struct dmar_pci_notify_info *info;
331 /* Only care about add/remove events for physical functions.
332 * For VFs we actually do the lookup based on the corresponding
333 * PF in device_to_iommu() anyway. */
336 if (action != BUS_NOTIFY_ADD_DEVICE &&
337 action != BUS_NOTIFY_REMOVED_DEVICE)
340 info = dmar_alloc_pci_notify_info(pdev, action);
344 down_write(&dmar_global_lock);
345 if (action == BUS_NOTIFY_ADD_DEVICE)
346 dmar_pci_bus_add_dev(info);
347 else if (action == BUS_NOTIFY_REMOVED_DEVICE)
348 dmar_pci_bus_del_dev(info);
349 up_write(&dmar_global_lock);
351 dmar_free_pci_notify_info(info);
356 static struct notifier_block dmar_pci_bus_nb = {
357 .notifier_call = dmar_pci_bus_notifier,
361 static struct dmar_drhd_unit *
362 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
364 struct dmar_drhd_unit *dmaru;
366 list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
368 if (dmaru->segment == drhd->segment &&
369 dmaru->reg_base_addr == drhd->address)
376 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
377 * structure which uniquely represent one DMA remapping hardware unit
378 * present in the platform
380 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
382 struct acpi_dmar_hardware_unit *drhd;
383 struct dmar_drhd_unit *dmaru;
386 drhd = (struct acpi_dmar_hardware_unit *)header;
387 dmaru = dmar_find_dmaru(drhd);
391 dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
396 * If header is allocated from slab by ACPI _DSM method, we need to
397 * copy the content because the memory buffer will be freed on return.
399 dmaru->hdr = (void *)(dmaru + 1);
400 memcpy(dmaru->hdr, header, header->length);
401 dmaru->reg_base_addr = drhd->address;
402 dmaru->segment = drhd->segment;
403 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
404 dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
405 ((void *)drhd) + drhd->header.length,
406 &dmaru->devices_cnt);
407 if (dmaru->devices_cnt && dmaru->devices == NULL) {
412 ret = alloc_iommu(dmaru);
414 dmar_free_dev_scope(&dmaru->devices,
415 &dmaru->devices_cnt);
419 dmar_register_drhd_unit(dmaru);
428 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
430 if (dmaru->devices && dmaru->devices_cnt)
431 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
433 free_iommu(dmaru->iommu);
437 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
440 struct acpi_dmar_andd *andd = (void *)header;
442 /* Check for NUL termination within the designated length */
443 if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
445 "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
446 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
447 dmi_get_system_info(DMI_BIOS_VENDOR),
448 dmi_get_system_info(DMI_BIOS_VERSION),
449 dmi_get_system_info(DMI_PRODUCT_VERSION));
450 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
453 pr_info("ANDD device: %x name: %s\n", andd->device_number,
459 #ifdef CONFIG_ACPI_NUMA
460 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
462 struct acpi_dmar_rhsa *rhsa;
463 struct dmar_drhd_unit *drhd;
465 rhsa = (struct acpi_dmar_rhsa *)header;
466 for_each_drhd_unit(drhd) {
467 if (drhd->reg_base_addr == rhsa->base_address) {
468 int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
470 if (!node_online(node))
472 drhd->iommu->node = node;
477 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
478 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
480 dmi_get_system_info(DMI_BIOS_VENDOR),
481 dmi_get_system_info(DMI_BIOS_VERSION),
482 dmi_get_system_info(DMI_PRODUCT_VERSION));
483 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
488 #define dmar_parse_one_rhsa dmar_res_noop
492 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
494 struct acpi_dmar_hardware_unit *drhd;
495 struct acpi_dmar_reserved_memory *rmrr;
496 struct acpi_dmar_atsr *atsr;
497 struct acpi_dmar_rhsa *rhsa;
499 switch (header->type) {
500 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
501 drhd = container_of(header, struct acpi_dmar_hardware_unit,
503 pr_info("DRHD base: %#016Lx flags: %#x\n",
504 (unsigned long long)drhd->address, drhd->flags);
506 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
507 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
509 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
510 (unsigned long long)rmrr->base_address,
511 (unsigned long long)rmrr->end_address);
513 case ACPI_DMAR_TYPE_ROOT_ATS:
514 atsr = container_of(header, struct acpi_dmar_atsr, header);
515 pr_info("ATSR flags: %#x\n", atsr->flags);
517 case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
518 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
519 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
520 (unsigned long long)rhsa->base_address,
521 rhsa->proximity_domain);
523 case ACPI_DMAR_TYPE_NAMESPACE:
524 /* We don't print this here because we need to sanity-check
525 it first. So print it in dmar_parse_one_andd() instead. */
531 * dmar_table_detect - checks to see if the platform supports DMAR devices
533 static int __init dmar_table_detect(void)
535 acpi_status status = AE_OK;
537 /* if we could find DMAR table, then there are DMAR devices */
538 status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
540 if (ACPI_SUCCESS(status) && !dmar_tbl) {
541 pr_warn("Unable to map DMAR\n");
542 status = AE_NOT_FOUND;
545 return ACPI_SUCCESS(status) ? 0 : -ENOENT;
548 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
549 size_t len, struct dmar_res_callback *cb)
551 struct acpi_dmar_header *iter, *next;
552 struct acpi_dmar_header *end = ((void *)start) + len;
554 for (iter = start; iter < end; iter = next) {
555 next = (void *)iter + iter->length;
556 if (iter->length == 0) {
557 /* Avoid looping forever on bad ACPI tables */
558 pr_debug(FW_BUG "Invalid 0-length structure\n");
560 } else if (next > end) {
561 /* Avoid passing table end */
562 pr_warn(FW_BUG "Record passes table end\n");
567 dmar_table_print_dmar_entry(iter);
569 if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
570 /* continue for forward compatibility */
571 pr_debug("Unknown DMAR structure type %d\n",
573 } else if (cb->cb[iter->type]) {
576 ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
579 } else if (!cb->ignore_unhandled) {
580 pr_warn("No handler for DMAR structure type %d\n",
589 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
590 struct dmar_res_callback *cb)
592 return dmar_walk_remapping_entries((void *)(dmar + 1),
593 dmar->header.length - sizeof(*dmar), cb);
597 * parse_dmar_table - parses the DMA reporting table
600 parse_dmar_table(void)
602 struct acpi_table_dmar *dmar;
605 struct dmar_res_callback cb = {
607 .ignore_unhandled = true,
608 .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
609 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
610 .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
611 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
612 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
613 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
617 * Do it again, earlier dmar_tbl mapping could be mapped with
623 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
624 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
626 dmar_tbl = tboot_get_dmar_table(dmar_tbl);
628 dmar = (struct acpi_table_dmar *)dmar_tbl;
632 if (dmar->width < PAGE_SHIFT - 1) {
633 pr_warn("Invalid DMAR haw\n");
637 pr_info("Host address width %d\n", dmar->width + 1);
638 ret = dmar_walk_dmar_table(dmar, &cb);
639 if (ret == 0 && drhd_count == 0)
640 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
645 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
646 int cnt, struct pci_dev *dev)
652 for_each_active_dev_scope(devices, cnt, index, tmp)
653 if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
656 /* Check our parent */
657 dev = dev->bus->self;
663 struct dmar_drhd_unit *
664 dmar_find_matched_drhd_unit(struct pci_dev *dev)
666 struct dmar_drhd_unit *dmaru;
667 struct acpi_dmar_hardware_unit *drhd;
669 dev = pci_physfn(dev);
672 for_each_drhd_unit(dmaru) {
673 drhd = container_of(dmaru->hdr,
674 struct acpi_dmar_hardware_unit,
677 if (dmaru->include_all &&
678 drhd->segment == pci_domain_nr(dev->bus))
681 if (dmar_pci_device_match(dmaru->devices,
682 dmaru->devices_cnt, dev))
692 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
693 struct acpi_device *adev)
695 struct dmar_drhd_unit *dmaru;
696 struct acpi_dmar_hardware_unit *drhd;
697 struct acpi_dmar_device_scope *scope;
700 struct acpi_dmar_pci_path *path;
702 for_each_drhd_unit(dmaru) {
703 drhd = container_of(dmaru->hdr,
704 struct acpi_dmar_hardware_unit,
707 for (scope = (void *)(drhd + 1);
708 (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
709 scope = ((void *)scope) + scope->length) {
710 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
712 if (scope->enumeration_id != device_number)
715 path = (void *)(scope + 1);
716 pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
717 dev_name(&adev->dev), dmaru->reg_base_addr,
718 scope->bus, path->device, path->function);
719 for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
721 dmaru->devices[i].bus = scope->bus;
722 dmaru->devices[i].devfn = PCI_DEVFN(path->device,
724 rcu_assign_pointer(dmaru->devices[i].dev,
725 get_device(&adev->dev));
728 BUG_ON(i >= dmaru->devices_cnt);
731 pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
732 device_number, dev_name(&adev->dev));
735 static int __init dmar_acpi_dev_scope_init(void)
737 struct acpi_dmar_andd *andd;
739 if (dmar_tbl == NULL)
742 for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
743 ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
744 andd = ((void *)andd) + andd->header.length) {
745 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
747 struct acpi_device *adev;
749 if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
752 pr_err("Failed to find handle for ACPI object %s\n",
756 if (acpi_bus_get_device(h, &adev)) {
757 pr_err("Failed to get device for ACPI object %s\n",
761 dmar_acpi_insert_dev_scope(andd->device_number, adev);
767 int __init dmar_dev_scope_init(void)
769 struct pci_dev *dev = NULL;
770 struct dmar_pci_notify_info *info;
772 if (dmar_dev_scope_status != 1)
773 return dmar_dev_scope_status;
775 if (list_empty(&dmar_drhd_units)) {
776 dmar_dev_scope_status = -ENODEV;
778 dmar_dev_scope_status = 0;
780 dmar_acpi_dev_scope_init();
782 for_each_pci_dev(dev) {
786 info = dmar_alloc_pci_notify_info(dev,
787 BUS_NOTIFY_ADD_DEVICE);
789 return dmar_dev_scope_status;
791 dmar_pci_bus_add_dev(info);
792 dmar_free_pci_notify_info(info);
797 return dmar_dev_scope_status;
800 void __init dmar_register_bus_notifier(void)
802 bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
806 int __init dmar_table_init(void)
808 static int dmar_table_initialized;
811 if (dmar_table_initialized == 0) {
812 ret = parse_dmar_table();
815 pr_info("Parse DMAR table failure.\n");
816 } else if (list_empty(&dmar_drhd_units)) {
817 pr_info("No DMAR devices found\n");
822 dmar_table_initialized = ret;
824 dmar_table_initialized = 1;
827 return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
830 static void warn_invalid_dmar(u64 addr, const char *message)
833 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
834 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
836 dmi_get_system_info(DMI_BIOS_VENDOR),
837 dmi_get_system_info(DMI_BIOS_VERSION),
838 dmi_get_system_info(DMI_PRODUCT_VERSION));
839 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
843 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
845 struct acpi_dmar_hardware_unit *drhd;
849 drhd = (void *)entry;
850 if (!drhd->address) {
851 warn_invalid_dmar(0, "");
856 addr = ioremap(drhd->address, VTD_PAGE_SIZE);
858 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
860 pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
864 cap = dmar_readq(addr + DMAR_CAP_REG);
865 ecap = dmar_readq(addr + DMAR_ECAP_REG);
870 early_iounmap(addr, VTD_PAGE_SIZE);
872 if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
873 warn_invalid_dmar(drhd->address, " returns all ones");
880 int __init detect_intel_iommu(void)
883 struct dmar_res_callback validate_drhd_cb = {
884 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
885 .ignore_unhandled = true,
888 down_write(&dmar_global_lock);
889 ret = dmar_table_detect();
891 ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
893 if (!ret && !no_iommu && !iommu_detected && !dmar_disabled) {
895 /* Make sure ACS will be enabled */
901 x86_init.iommu.iommu_init = intel_iommu_init;
902 x86_platform.iommu_shutdown = intel_iommu_shutdown;
908 acpi_put_table(dmar_tbl);
911 up_write(&dmar_global_lock);
913 return ret ? ret : 1;
916 static void unmap_iommu(struct intel_iommu *iommu)
919 release_mem_region(iommu->reg_phys, iommu->reg_size);
923 * map_iommu: map the iommu's registers
924 * @iommu: the iommu to map
925 * @phys_addr: the physical address of the base resgister
927 * Memory map the iommu's registers. Start w/ a single page, and
928 * possibly expand if that turns out to be insufficent.
930 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
934 iommu->reg_phys = phys_addr;
935 iommu->reg_size = VTD_PAGE_SIZE;
937 if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
938 pr_err("Can't reserve memory\n");
943 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
945 pr_err("Can't map the region\n");
950 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
951 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
953 if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
955 warn_invalid_dmar(phys_addr, " returns all ones");
959 /* the registers might be more than one page */
960 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
961 cap_max_fault_reg_offset(iommu->cap));
962 map_size = VTD_PAGE_ALIGN(map_size);
963 if (map_size > iommu->reg_size) {
965 release_mem_region(iommu->reg_phys, iommu->reg_size);
966 iommu->reg_size = map_size;
967 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
969 pr_err("Can't reserve memory\n");
973 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
975 pr_err("Can't map the region\n");
986 release_mem_region(iommu->reg_phys, iommu->reg_size);
991 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
993 iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
994 DMAR_UNITS_SUPPORTED);
995 if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
998 set_bit(iommu->seq_id, dmar_seq_ids);
999 sprintf(iommu->name, "dmar%d", iommu->seq_id);
1002 return iommu->seq_id;
1005 static void dmar_free_seq_id(struct intel_iommu *iommu)
1007 if (iommu->seq_id >= 0) {
1008 clear_bit(iommu->seq_id, dmar_seq_ids);
1013 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1015 struct intel_iommu *iommu;
1021 if (!drhd->reg_base_addr) {
1022 warn_invalid_dmar(0, "");
1026 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1030 if (dmar_alloc_seq_id(iommu) < 0) {
1031 pr_err("Failed to allocate seq_id\n");
1036 err = map_iommu(iommu, drhd->reg_base_addr);
1038 pr_err("Failed to map %s\n", iommu->name);
1039 goto error_free_seq_id;
1043 agaw = iommu_calculate_agaw(iommu);
1045 pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1049 msagaw = iommu_calculate_max_sagaw(iommu);
1051 pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1056 iommu->msagaw = msagaw;
1057 iommu->segment = drhd->segment;
1059 iommu->node = NUMA_NO_NODE;
1061 ver = readl(iommu->reg + DMAR_VER_REG);
1062 pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1064 (unsigned long long)drhd->reg_base_addr,
1065 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1066 (unsigned long long)iommu->cap,
1067 (unsigned long long)iommu->ecap);
1069 /* Reflect status in gcmd */
1070 sts = readl(iommu->reg + DMAR_GSTS_REG);
1071 if (sts & DMA_GSTS_IRES)
1072 iommu->gcmd |= DMA_GCMD_IRE;
1073 if (sts & DMA_GSTS_TES)
1074 iommu->gcmd |= DMA_GCMD_TE;
1075 if (sts & DMA_GSTS_QIES)
1076 iommu->gcmd |= DMA_GCMD_QIE;
1078 raw_spin_lock_init(&iommu->register_lock);
1080 if (intel_iommu_enabled) {
1081 err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1087 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
1089 err = iommu_device_register(&iommu->iommu);
1094 drhd->iommu = iommu;
1101 dmar_free_seq_id(iommu);
1107 static void free_iommu(struct intel_iommu *iommu)
1109 if (intel_iommu_enabled) {
1110 iommu_device_unregister(&iommu->iommu);
1111 iommu_device_sysfs_remove(&iommu->iommu);
1115 if (iommu->pr_irq) {
1116 free_irq(iommu->pr_irq, iommu);
1117 dmar_free_hwirq(iommu->pr_irq);
1120 free_irq(iommu->irq, iommu);
1121 dmar_free_hwirq(iommu->irq);
1126 free_page((unsigned long)iommu->qi->desc);
1127 kfree(iommu->qi->desc_status);
1134 dmar_free_seq_id(iommu);
1139 * Reclaim all the submitted descriptors which have completed its work.
1141 static inline void reclaim_free_desc(struct q_inval *qi)
1143 while (qi->desc_status[qi->free_tail] == QI_DONE ||
1144 qi->desc_status[qi->free_tail] == QI_ABORT) {
1145 qi->desc_status[qi->free_tail] = QI_FREE;
1146 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1151 static int qi_check_fault(struct intel_iommu *iommu, int index)
1155 struct q_inval *qi = iommu->qi;
1156 int wait_index = (index + 1) % QI_LENGTH;
1157 int shift = qi_shift(iommu);
1159 if (qi->desc_status[wait_index] == QI_ABORT)
1162 fault = readl(iommu->reg + DMAR_FSTS_REG);
1165 * If IQE happens, the head points to the descriptor associated
1166 * with the error. No new descriptors are fetched until the IQE
1169 if (fault & DMA_FSTS_IQE) {
1170 head = readl(iommu->reg + DMAR_IQH_REG);
1171 if ((head >> shift) == index) {
1172 struct qi_desc *desc = qi->desc + head;
1175 * desc->qw2 and desc->qw3 are either reserved or
1176 * used by software as private data. We won't print
1177 * out these two qw's for security consideration.
1179 pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1180 (unsigned long long)desc->qw0,
1181 (unsigned long long)desc->qw1);
1182 memcpy(desc, qi->desc + (wait_index << shift),
1184 writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1190 * If ITE happens, all pending wait_desc commands are aborted.
1191 * No new descriptors are fetched until the ITE is cleared.
1193 if (fault & DMA_FSTS_ITE) {
1194 head = readl(iommu->reg + DMAR_IQH_REG);
1195 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1197 tail = readl(iommu->reg + DMAR_IQT_REG);
1198 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1200 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1203 if (qi->desc_status[head] == QI_IN_USE)
1204 qi->desc_status[head] = QI_ABORT;
1205 head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1206 } while (head != tail);
1208 if (qi->desc_status[wait_index] == QI_ABORT)
1212 if (fault & DMA_FSTS_ICE)
1213 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1219 * Submit the queued invalidation descriptor to the remapping
1220 * hardware unit and wait for its completion.
1222 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
1225 struct q_inval *qi = iommu->qi;
1226 int offset, shift, length;
1227 struct qi_desc wait_desc;
1228 int wait_index, index;
1229 unsigned long flags;
1237 raw_spin_lock_irqsave(&qi->q_lock, flags);
1238 while (qi->free_cnt < 3) {
1239 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1241 raw_spin_lock_irqsave(&qi->q_lock, flags);
1244 index = qi->free_head;
1245 wait_index = (index + 1) % QI_LENGTH;
1246 shift = qi_shift(iommu);
1247 length = 1 << shift;
1249 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
1251 offset = index << shift;
1252 memcpy(qi->desc + offset, desc, length);
1253 wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1254 QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1255 wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1259 offset = wait_index << shift;
1260 memcpy(qi->desc + offset, &wait_desc, length);
1262 qi->free_head = (qi->free_head + 2) % QI_LENGTH;
1266 * update the HW tail register indicating the presence of
1269 writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1271 while (qi->desc_status[wait_index] != QI_DONE) {
1273 * We will leave the interrupts disabled, to prevent interrupt
1274 * context to queue another cmd while a cmd is already submitted
1275 * and waiting for completion on this cpu. This is to avoid
1276 * a deadlock where the interrupt context can wait indefinitely
1277 * for free slots in the queue.
1279 rc = qi_check_fault(iommu, index);
1283 raw_spin_unlock(&qi->q_lock);
1285 raw_spin_lock(&qi->q_lock);
1288 qi->desc_status[index] = QI_DONE;
1290 reclaim_free_desc(qi);
1291 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1300 * Flush the global interrupt entry cache.
1302 void qi_global_iec(struct intel_iommu *iommu)
1304 struct qi_desc desc;
1306 desc.qw0 = QI_IEC_TYPE;
1311 /* should never fail */
1312 qi_submit_sync(&desc, iommu);
1315 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1318 struct qi_desc desc;
1320 desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1321 | QI_CC_GRAN(type) | QI_CC_TYPE;
1326 qi_submit_sync(&desc, iommu);
1329 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1330 unsigned int size_order, u64 type)
1334 struct qi_desc desc;
1337 if (cap_write_drain(iommu->cap))
1340 if (cap_read_drain(iommu->cap))
1343 desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1344 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1345 desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1346 | QI_IOTLB_AM(size_order);
1350 qi_submit_sync(&desc, iommu);
1353 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1354 u16 qdep, u64 addr, unsigned mask)
1356 struct qi_desc desc;
1359 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1360 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1362 desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1364 if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1367 desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1368 QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1372 qi_submit_sync(&desc, iommu);
1375 /* PASID-based IOTLB invalidation */
1376 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1377 unsigned long npages, bool ih)
1379 struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1382 * npages == -1 means a PASID-selective invalidation, otherwise,
1383 * a positive value for Page-selective-within-PASID invalidation.
1384 * 0 is not a valid input.
1386 if (WARN_ON(!npages)) {
1387 pr_err("Invalid input npages = %ld\n", npages);
1392 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1393 QI_EIOTLB_DID(did) |
1394 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1398 int mask = ilog2(__roundup_pow_of_two(npages));
1399 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1401 if (WARN_ON_ONCE(!ALIGN(addr, align)))
1402 addr &= ~(align - 1);
1404 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1405 QI_EIOTLB_DID(did) |
1406 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1408 desc.qw1 = QI_EIOTLB_ADDR(addr) |
1413 qi_submit_sync(&desc, iommu);
1417 * Disable Queued Invalidation interface.
1419 void dmar_disable_qi(struct intel_iommu *iommu)
1421 unsigned long flags;
1423 cycles_t start_time = get_cycles();
1425 if (!ecap_qis(iommu->ecap))
1428 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1430 sts = readl(iommu->reg + DMAR_GSTS_REG);
1431 if (!(sts & DMA_GSTS_QIES))
1435 * Give a chance to HW to complete the pending invalidation requests.
1437 while ((readl(iommu->reg + DMAR_IQT_REG) !=
1438 readl(iommu->reg + DMAR_IQH_REG)) &&
1439 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1442 iommu->gcmd &= ~DMA_GCMD_QIE;
1443 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1445 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1446 !(sts & DMA_GSTS_QIES), sts);
1448 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1452 * Enable queued invalidation.
1454 static void __dmar_enable_qi(struct intel_iommu *iommu)
1457 unsigned long flags;
1458 struct q_inval *qi = iommu->qi;
1459 u64 val = virt_to_phys(qi->desc);
1461 qi->free_head = qi->free_tail = 0;
1462 qi->free_cnt = QI_LENGTH;
1465 * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1468 if (ecap_smts(iommu->ecap))
1469 val |= (1 << 11) | 1;
1471 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1473 /* write zero to the tail reg */
1474 writel(0, iommu->reg + DMAR_IQT_REG);
1476 dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1478 iommu->gcmd |= DMA_GCMD_QIE;
1479 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1481 /* Make sure hardware complete it */
1482 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1484 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1488 * Enable Queued Invalidation interface. This is a must to support
1489 * interrupt-remapping. Also used by DMA-remapping, which replaces
1490 * register based IOTLB invalidation.
1492 int dmar_enable_qi(struct intel_iommu *iommu)
1495 struct page *desc_page;
1497 if (!ecap_qis(iommu->ecap))
1501 * queued invalidation is already setup and enabled.
1506 iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1513 * Need two pages to accommodate 256 descriptors of 256 bits each
1514 * if the remapping hardware supports scalable mode translation.
1516 desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1517 !!ecap_smts(iommu->ecap));
1524 qi->desc = page_address(desc_page);
1526 qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1527 if (!qi->desc_status) {
1528 free_page((unsigned long) qi->desc);
1534 raw_spin_lock_init(&qi->q_lock);
1536 __dmar_enable_qi(iommu);
1541 /* iommu interrupt handling. Most stuff are MSI-like. */
1549 static const char *dma_remap_fault_reasons[] =
1552 "Present bit in root entry is clear",
1553 "Present bit in context entry is clear",
1554 "Invalid context entry",
1555 "Access beyond MGAW",
1556 "PTE Write access is not set",
1557 "PTE Read access is not set",
1558 "Next page table ptr is invalid",
1559 "Root table address invalid",
1560 "Context table ptr is invalid",
1561 "non-zero reserved fields in RTP",
1562 "non-zero reserved fields in CTP",
1563 "non-zero reserved fields in PTE",
1564 "PCE for translation request specifies blocking",
1567 static const char * const dma_remap_sm_fault_reasons[] = {
1568 "SM: Invalid Root Table Address",
1569 "SM: TTM 0 for request with PASID",
1570 "SM: TTM 0 for page group request",
1571 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1572 "SM: Error attempting to access Root Entry",
1573 "SM: Present bit in Root Entry is clear",
1574 "SM: Non-zero reserved field set in Root Entry",
1575 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1576 "SM: Error attempting to access Context Entry",
1577 "SM: Present bit in Context Entry is clear",
1578 "SM: Non-zero reserved field set in the Context Entry",
1579 "SM: Invalid Context Entry",
1580 "SM: DTE field in Context Entry is clear",
1581 "SM: PASID Enable field in Context Entry is clear",
1582 "SM: PASID is larger than the max in Context Entry",
1583 "SM: PRE field in Context-Entry is clear",
1584 "SM: RID_PASID field error in Context-Entry",
1585 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1586 "SM: Error attempting to access the PASID Directory Entry",
1587 "SM: Present bit in Directory Entry is clear",
1588 "SM: Non-zero reserved field set in PASID Directory Entry",
1589 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1590 "SM: Error attempting to access PASID Table Entry",
1591 "SM: Present bit in PASID Table Entry is clear",
1592 "SM: Non-zero reserved field set in PASID Table Entry",
1593 "SM: Invalid Scalable-Mode PASID Table Entry",
1594 "SM: ERE field is clear in PASID Table Entry",
1595 "SM: SRE field is clear in PASID Table Entry",
1596 "Unknown", "Unknown",/* 0x5E-0x5F */
1597 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1598 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1599 "SM: Error attempting to access first-level paging entry",
1600 "SM: Present bit in first-level paging entry is clear",
1601 "SM: Non-zero reserved field set in first-level paging entry",
1602 "SM: Error attempting to access FL-PML4 entry",
1603 "SM: First-level entry address beyond MGAW in Nested translation",
1604 "SM: Read permission error in FL-PML4 entry in Nested translation",
1605 "SM: Read permission error in first-level paging entry in Nested translation",
1606 "SM: Write permission error in first-level paging entry in Nested translation",
1607 "SM: Error attempting to access second-level paging entry",
1608 "SM: Read/Write permission error in second-level paging entry",
1609 "SM: Non-zero reserved field set in second-level paging entry",
1610 "SM: Invalid second-level page table pointer",
1611 "SM: A/D bit update needed in second-level entry when set up in no snoop",
1612 "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1613 "SM: Address in first-level translation is not canonical",
1614 "SM: U/S set 0 for first-level translation with user privilege",
1615 "SM: No execute permission for request with PASID and ER=1",
1616 "SM: Address beyond the DMA hardware max",
1617 "SM: Second-level entry address beyond the max",
1618 "SM: No write permission for Write/AtomicOp request",
1619 "SM: No read permission for Read/AtomicOp request",
1620 "SM: Invalid address-interrupt address",
1621 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1622 "SM: A/D bit update needed in first-level entry when set up in no snoop",
1625 static const char *irq_remap_fault_reasons[] =
1627 "Detected reserved fields in the decoded interrupt-remapped request",
1628 "Interrupt index exceeded the interrupt-remapping table size",
1629 "Present field in the IRTE entry is clear",
1630 "Error accessing interrupt-remapping table pointed by IRTA_REG",
1631 "Detected reserved fields in the IRTE entry",
1632 "Blocked a compatibility format interrupt request",
1633 "Blocked an interrupt request due to source-id verification failure",
1636 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1638 if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1639 ARRAY_SIZE(irq_remap_fault_reasons))) {
1640 *fault_type = INTR_REMAP;
1641 return irq_remap_fault_reasons[fault_reason - 0x20];
1642 } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1643 ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1644 *fault_type = DMA_REMAP;
1645 return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1646 } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1647 *fault_type = DMA_REMAP;
1648 return dma_remap_fault_reasons[fault_reason];
1650 *fault_type = UNKNOWN;
1656 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1658 if (iommu->irq == irq)
1659 return DMAR_FECTL_REG;
1660 else if (iommu->pr_irq == irq)
1661 return DMAR_PECTL_REG;
1666 void dmar_msi_unmask(struct irq_data *data)
1668 struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1669 int reg = dmar_msi_reg(iommu, data->irq);
1673 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1674 writel(0, iommu->reg + reg);
1675 /* Read a reg to force flush the post write */
1676 readl(iommu->reg + reg);
1677 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1680 void dmar_msi_mask(struct irq_data *data)
1682 struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1683 int reg = dmar_msi_reg(iommu, data->irq);
1687 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1688 writel(DMA_FECTL_IM, iommu->reg + reg);
1689 /* Read a reg to force flush the post write */
1690 readl(iommu->reg + reg);
1691 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1694 void dmar_msi_write(int irq, struct msi_msg *msg)
1696 struct intel_iommu *iommu = irq_get_handler_data(irq);
1697 int reg = dmar_msi_reg(iommu, irq);
1700 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1701 writel(msg->data, iommu->reg + reg + 4);
1702 writel(msg->address_lo, iommu->reg + reg + 8);
1703 writel(msg->address_hi, iommu->reg + reg + 12);
1704 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1707 void dmar_msi_read(int irq, struct msi_msg *msg)
1709 struct intel_iommu *iommu = irq_get_handler_data(irq);
1710 int reg = dmar_msi_reg(iommu, irq);
1713 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1714 msg->data = readl(iommu->reg + reg + 4);
1715 msg->address_lo = readl(iommu->reg + reg + 8);
1716 msg->address_hi = readl(iommu->reg + reg + 12);
1717 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1720 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1721 u8 fault_reason, int pasid, u16 source_id,
1722 unsigned long long addr)
1727 reason = dmar_get_fault_reason(fault_reason, &fault_type);
1729 if (fault_type == INTR_REMAP)
1730 pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1731 source_id >> 8, PCI_SLOT(source_id & 0xFF),
1732 PCI_FUNC(source_id & 0xFF), addr >> 48,
1733 fault_reason, reason);
1735 pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1736 type ? "DMA Read" : "DMA Write",
1737 source_id >> 8, PCI_SLOT(source_id & 0xFF),
1738 PCI_FUNC(source_id & 0xFF), pasid, addr,
1739 fault_reason, reason);
1743 #define PRIMARY_FAULT_REG_LEN (16)
1744 irqreturn_t dmar_fault(int irq, void *dev_id)
1746 struct intel_iommu *iommu = dev_id;
1747 int reg, fault_index;
1750 static DEFINE_RATELIMIT_STATE(rs,
1751 DEFAULT_RATELIMIT_INTERVAL,
1752 DEFAULT_RATELIMIT_BURST);
1754 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1755 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1756 if (fault_status && __ratelimit(&rs))
1757 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1759 /* TBD: ignore advanced fault log currently */
1760 if (!(fault_status & DMA_FSTS_PPF))
1763 fault_index = dma_fsts_fault_record_index(fault_status);
1764 reg = cap_fault_reg_offset(iommu->cap);
1766 /* Disable printing, simply clear the fault when ratelimited */
1767 bool ratelimited = !__ratelimit(&rs);
1775 /* highest 32 bits */
1776 data = readl(iommu->reg + reg +
1777 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1778 if (!(data & DMA_FRCD_F))
1782 fault_reason = dma_frcd_fault_reason(data);
1783 type = dma_frcd_type(data);
1785 pasid = dma_frcd_pasid_value(data);
1786 data = readl(iommu->reg + reg +
1787 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1788 source_id = dma_frcd_source_id(data);
1790 pasid_present = dma_frcd_pasid_present(data);
1791 guest_addr = dmar_readq(iommu->reg + reg +
1792 fault_index * PRIMARY_FAULT_REG_LEN);
1793 guest_addr = dma_frcd_page_addr(guest_addr);
1796 /* clear the fault */
1797 writel(DMA_FRCD_F, iommu->reg + reg +
1798 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1800 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1803 /* Using pasid -1 if pasid is not present */
1804 dmar_fault_do_one(iommu, type, fault_reason,
1805 pasid_present ? pasid : -1,
1806 source_id, guest_addr);
1809 if (fault_index >= cap_num_fault_regs(iommu->cap))
1811 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1814 writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1815 iommu->reg + DMAR_FSTS_REG);
1818 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1822 int dmar_set_interrupt(struct intel_iommu *iommu)
1827 * Check if the fault interrupt is already initialized.
1832 irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
1836 pr_err("No free IRQ vectors\n");
1840 ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1842 pr_err("Can't request irq\n");
1846 int __init enable_drhd_fault_handling(void)
1848 struct dmar_drhd_unit *drhd;
1849 struct intel_iommu *iommu;
1852 * Enable fault control interrupt.
1854 for_each_iommu(iommu, drhd) {
1856 int ret = dmar_set_interrupt(iommu);
1859 pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1860 (unsigned long long)drhd->reg_base_addr, ret);
1865 * Clear any previous faults.
1867 dmar_fault(iommu->irq, iommu);
1868 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1869 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1876 * Re-enable Queued Invalidation interface.
1878 int dmar_reenable_qi(struct intel_iommu *iommu)
1880 if (!ecap_qis(iommu->ecap))
1887 * First disable queued invalidation.
1889 dmar_disable_qi(iommu);
1891 * Then enable queued invalidation again. Since there is no pending
1892 * invalidation requests now, it's safe to re-enable queued
1895 __dmar_enable_qi(iommu);
1901 * Check interrupt remapping support in DMAR table description.
1903 int __init dmar_ir_support(void)
1905 struct acpi_table_dmar *dmar;
1906 dmar = (struct acpi_table_dmar *)dmar_tbl;
1909 return dmar->flags & 0x1;
1912 /* Check whether DMAR units are in use */
1913 static inline bool dmar_in_use(void)
1915 return irq_remapping_enabled || intel_iommu_enabled;
1918 static int __init dmar_free_unused_resources(void)
1920 struct dmar_drhd_unit *dmaru, *dmaru_n;
1925 if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
1926 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
1928 down_write(&dmar_global_lock);
1929 list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
1930 list_del(&dmaru->list);
1931 dmar_free_drhd(dmaru);
1933 up_write(&dmar_global_lock);
1938 late_initcall(dmar_free_unused_resources);
1939 IOMMU_INIT_POST(detect_intel_iommu);
1942 * DMAR Hotplug Support
1943 * For more details, please refer to Intel(R) Virtualization Technology
1944 * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
1945 * "Remapping Hardware Unit Hot Plug".
1947 static guid_t dmar_hp_guid =
1948 GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
1949 0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
1952 * Currently there's only one revision and BIOS will not check the revision id,
1953 * so use 0 for safety.
1955 #define DMAR_DSM_REV_ID 0
1956 #define DMAR_DSM_FUNC_DRHD 1
1957 #define DMAR_DSM_FUNC_ATSR 2
1958 #define DMAR_DSM_FUNC_RHSA 3
1960 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
1962 return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
1965 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
1966 dmar_res_handler_t handler, void *arg)
1969 union acpi_object *obj;
1970 struct acpi_dmar_header *start;
1971 struct dmar_res_callback callback;
1972 static int res_type[] = {
1973 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
1974 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
1975 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
1978 if (!dmar_detect_dsm(handle, func))
1981 obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
1982 func, NULL, ACPI_TYPE_BUFFER);
1986 memset(&callback, 0, sizeof(callback));
1987 callback.cb[res_type[func]] = handler;
1988 callback.arg[res_type[func]] = arg;
1989 start = (struct acpi_dmar_header *)obj->buffer.pointer;
1990 ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
1997 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2000 struct dmar_drhd_unit *dmaru;
2002 dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2006 ret = dmar_ir_hotplug(dmaru, true);
2008 ret = dmar_iommu_hotplug(dmaru, true);
2013 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2017 struct dmar_drhd_unit *dmaru;
2019 dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2024 * All PCI devices managed by this unit should have been destroyed.
2026 if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2027 for_each_active_dev_scope(dmaru->devices,
2028 dmaru->devices_cnt, i, dev)
2032 ret = dmar_ir_hotplug(dmaru, false);
2034 ret = dmar_iommu_hotplug(dmaru, false);
2039 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2041 struct dmar_drhd_unit *dmaru;
2043 dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2045 list_del_rcu(&dmaru->list);
2047 dmar_free_drhd(dmaru);
2053 static int dmar_hotplug_insert(acpi_handle handle)
2058 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2059 &dmar_validate_one_drhd, (void *)1);
2063 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2064 &dmar_parse_one_drhd, (void *)&drhd_count);
2065 if (ret == 0 && drhd_count == 0) {
2066 pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2072 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2073 &dmar_parse_one_rhsa, NULL);
2077 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2078 &dmar_parse_one_atsr, NULL);
2082 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2083 &dmar_hp_add_drhd, NULL);
2087 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2088 &dmar_hp_remove_drhd, NULL);
2090 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2091 &dmar_release_one_atsr, NULL);
2093 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2094 &dmar_hp_release_drhd, NULL);
2099 static int dmar_hotplug_remove(acpi_handle handle)
2103 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2104 &dmar_check_one_atsr, NULL);
2108 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2109 &dmar_hp_remove_drhd, NULL);
2111 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2112 &dmar_release_one_atsr, NULL));
2113 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2114 &dmar_hp_release_drhd, NULL));
2116 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2117 &dmar_hp_add_drhd, NULL);
2123 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2124 void *context, void **retval)
2126 acpi_handle *phdl = retval;
2128 if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2130 return AE_CTRL_TERMINATE;
2136 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2139 acpi_handle tmp = NULL;
2145 if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2148 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2150 dmar_get_dsm_handle,
2152 if (ACPI_FAILURE(status)) {
2153 pr_warn("Failed to locate _DSM method.\n");
2160 down_write(&dmar_global_lock);
2162 ret = dmar_hotplug_insert(tmp);
2164 ret = dmar_hotplug_remove(tmp);
2165 up_write(&dmar_global_lock);
2170 int dmar_device_add(acpi_handle handle)
2172 return dmar_device_hotplug(handle, true);
2175 int dmar_device_remove(acpi_handle handle)
2177 return dmar_device_hotplug(handle, false);
2181 * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2183 * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2184 * the ACPI DMAR table. This means that the platform boot firmware has made
2185 * sure no device can issue DMA outside of RMRR regions.
2187 bool dmar_platform_optin(void)
2189 struct acpi_table_dmar *dmar;
2193 status = acpi_get_table(ACPI_SIG_DMAR, 0,
2194 (struct acpi_table_header **)&dmar);
2195 if (ACPI_FAILURE(status))
2198 ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2199 acpi_put_table((struct acpi_table_header *)dmar);
2203 EXPORT_SYMBOL_GPL(dmar_platform_optin);