Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f654706

Browse files
uweigandborntraeger
authored andcommitted
KVM: s390/interrupt: do not pin adapter interrupt pages
The adapter interrupt page containing the indicator bits is currently pinned. That means that a guest with many devices can pin a lot of memory pages in the host. This also complicates the reference tracking which is needed for memory management handling of protected virtual machines. It might also have some strange side effects for madvise MADV_DONTNEED and other things. We can simply try to get the userspace page set the bits and free the page. By storing the userspace address in the irq routing entry instead of the guest address we can actually avoid many lookups and list walks so that this variant is very likely not slower. If userspace messes around with the memory slots the worst thing that can happen is that we write to some other memory within that process. As we get the the page with FOLL_WRITE this can also not be used to write to shared read-only pages. Signed-off-by: Ulrich Weigand <[email protected]> Acked-by: David Hildenbrand <[email protected]> Reviewed-by: Cornelia Huck <[email protected]> [[email protected]: patch simplification] Signed-off-by: Christian Borntraeger <[email protected]>
1 parent f15587c commit f654706

File tree

3 files changed

+51
-133
lines changed

3 files changed

+51
-133
lines changed

Documentation/virt/kvm/devices/s390_flic.rst

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -108,16 +108,9 @@ Groups:
108108
mask or unmask the adapter, as specified in mask
109109

110110
KVM_S390_IO_ADAPTER_MAP
111-
perform a gmap translation for the guest address provided in addr,
112-
pin a userspace page for the translated address and add it to the
113-
list of mappings
114-
115-
.. note:: A new mapping will be created unconditionally; therefore,
116-
the calling code should avoid making duplicate mappings.
117-
111+
This is now a no-op. The mapping is purely done by the irq route.
118112
KVM_S390_IO_ADAPTER_UNMAP
119-
release a userspace page for the translated address specified in addr
120-
from the list of mappings
113+
This is now a no-op. The mapping is purely done by the irq route.
121114

122115
KVM_DEV_FLIC_AISM
123116
modify the adapter-interruption-suppression mode for a given isc if the

arch/s390/include/asm/kvm_host.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -701,9 +701,6 @@ struct s390_io_adapter {
701701
bool masked;
702702
bool swap;
703703
bool suppressible;
704-
struct rw_semaphore maps_lock;
705-
struct list_head maps;
706-
atomic_t nr_maps;
707704
};
708705

709706
#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)

arch/s390/kvm/interrupt.c

Lines changed: 49 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
/*
33
* handling kvm guest interrupts
44
*
5-
* Copyright IBM Corp. 2008, 2015
5+
* Copyright IBM Corp. 2008, 2020
66
*
77
* Author(s): Carsten Otte <[email protected]>
88
*/
@@ -2327,9 +2327,6 @@ static int register_io_adapter(struct kvm_device *dev,
23272327
if (!adapter)
23282328
return -ENOMEM;
23292329

2330-
INIT_LIST_HEAD(&adapter->maps);
2331-
init_rwsem(&adapter->maps_lock);
2332-
atomic_set(&adapter->nr_maps, 0);
23332330
adapter->id = adapter_info.id;
23342331
adapter->isc = adapter_info.isc;
23352332
adapter->maskable = adapter_info.maskable;
@@ -2354,87 +2351,12 @@ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
23542351
return ret;
23552352
}
23562353

2357-
static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
2358-
{
2359-
struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
2360-
struct s390_map_info *map;
2361-
int ret;
2362-
2363-
if (!adapter || !addr)
2364-
return -EINVAL;
2365-
2366-
map = kzalloc(sizeof(*map), GFP_KERNEL);
2367-
if (!map) {
2368-
ret = -ENOMEM;
2369-
goto out;
2370-
}
2371-
INIT_LIST_HEAD(&map->list);
2372-
map->guest_addr = addr;
2373-
map->addr = gmap_translate(kvm->arch.gmap, addr);
2374-
if (map->addr == -EFAULT) {
2375-
ret = -EFAULT;
2376-
goto out;
2377-
}
2378-
ret = get_user_pages_fast(map->addr, 1, FOLL_WRITE, &map->page);
2379-
if (ret < 0)
2380-
goto out;
2381-
BUG_ON(ret != 1);
2382-
down_write(&adapter->maps_lock);
2383-
if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
2384-
list_add_tail(&map->list, &adapter->maps);
2385-
ret = 0;
2386-
} else {
2387-
put_page(map->page);
2388-
ret = -EINVAL;
2389-
}
2390-
up_write(&adapter->maps_lock);
2391-
out:
2392-
if (ret)
2393-
kfree(map);
2394-
return ret;
2395-
}
2396-
2397-
static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
2398-
{
2399-
struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
2400-
struct s390_map_info *map, *tmp;
2401-
int found = 0;
2402-
2403-
if (!adapter || !addr)
2404-
return -EINVAL;
2405-
2406-
down_write(&adapter->maps_lock);
2407-
list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
2408-
if (map->guest_addr == addr) {
2409-
found = 1;
2410-
atomic_dec(&adapter->nr_maps);
2411-
list_del(&map->list);
2412-
put_page(map->page);
2413-
kfree(map);
2414-
break;
2415-
}
2416-
}
2417-
up_write(&adapter->maps_lock);
2418-
2419-
return found ? 0 : -EINVAL;
2420-
}
2421-
24222354
void kvm_s390_destroy_adapters(struct kvm *kvm)
24232355
{
24242356
int i;
2425-
struct s390_map_info *map, *tmp;
24262357

2427-
for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
2428-
if (!kvm->arch.adapters[i])
2429-
continue;
2430-
list_for_each_entry_safe(map, tmp,
2431-
&kvm->arch.adapters[i]->maps, list) {
2432-
list_del(&map->list);
2433-
put_page(map->page);
2434-
kfree(map);
2435-
}
2358+
for (i = 0; i < MAX_S390_IO_ADAPTERS; i++)
24362359
kfree(kvm->arch.adapters[i]);
2437-
}
24382360
}
24392361

24402362
static int modify_io_adapter(struct kvm_device *dev,
@@ -2456,11 +2378,14 @@ static int modify_io_adapter(struct kvm_device *dev,
24562378
if (ret > 0)
24572379
ret = 0;
24582380
break;
2381+
/*
2382+
* The following operations are no longer needed and therefore no-ops.
2383+
* The gpa to hva translation is done when an IRQ route is set up. The
2384+
* set_irq code uses get_user_pages_remote() to do the actual write.
2385+
*/
24592386
case KVM_S390_IO_ADAPTER_MAP:
2460-
ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
2461-
break;
24622387
case KVM_S390_IO_ADAPTER_UNMAP:
2463-
ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
2388+
ret = 0;
24642389
break;
24652390
default:
24662391
ret = -EINVAL;
@@ -2699,19 +2624,15 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
26992624
return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
27002625
}
27012626

2702-
static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
2703-
u64 addr)
2627+
static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
27042628
{
2705-
struct s390_map_info *map;
2629+
struct page *page = NULL;
27062630

2707-
if (!adapter)
2708-
return NULL;
2709-
2710-
list_for_each_entry(map, &adapter->maps, list) {
2711-
if (map->guest_addr == addr)
2712-
return map;
2713-
}
2714-
return NULL;
2631+
down_read(&kvm->mm->mmap_sem);
2632+
get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE,
2633+
&page, NULL, NULL);
2634+
up_read(&kvm->mm->mmap_sem);
2635+
return page;
27152636
}
27162637

27172638
static int adapter_indicators_set(struct kvm *kvm,
@@ -2720,30 +2641,35 @@ static int adapter_indicators_set(struct kvm *kvm,
27202641
{
27212642
unsigned long bit;
27222643
int summary_set, idx;
2723-
struct s390_map_info *info;
2644+
struct page *ind_page, *summary_page;
27242645
void *map;
27252646

2726-
info = get_map_info(adapter, adapter_int->ind_addr);
2727-
if (!info)
2647+
ind_page = get_map_page(kvm, adapter_int->ind_addr);
2648+
if (!ind_page)
27282649
return -1;
2729-
map = page_address(info->page);
2730-
bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
2731-
set_bit(bit, map);
2732-
idx = srcu_read_lock(&kvm->srcu);
2733-
mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
2734-
set_page_dirty_lock(info->page);
2735-
info = get_map_info(adapter, adapter_int->summary_addr);
2736-
if (!info) {
2737-
srcu_read_unlock(&kvm->srcu, idx);
2650+
summary_page = get_map_page(kvm, adapter_int->summary_addr);
2651+
if (!summary_page) {
2652+
put_page(ind_page);
27382653
return -1;
27392654
}
2740-
map = page_address(info->page);
2741-
bit = get_ind_bit(info->addr, adapter_int->summary_offset,
2742-
adapter->swap);
2655+
2656+
idx = srcu_read_lock(&kvm->srcu);
2657+
map = page_address(ind_page);
2658+
bit = get_ind_bit(adapter_int->ind_addr,
2659+
adapter_int->ind_offset, adapter->swap);
2660+
set_bit(bit, map);
2661+
mark_page_dirty(kvm, adapter_int->ind_addr >> PAGE_SHIFT);
2662+
set_page_dirty_lock(ind_page);
2663+
map = page_address(summary_page);
2664+
bit = get_ind_bit(adapter_int->summary_addr,
2665+
adapter_int->summary_offset, adapter->swap);
27432666
summary_set = test_and_set_bit(bit, map);
2744-
mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
2745-
set_page_dirty_lock(info->page);
2667+
mark_page_dirty(kvm, adapter_int->summary_addr >> PAGE_SHIFT);
2668+
set_page_dirty_lock(summary_page);
27462669
srcu_read_unlock(&kvm->srcu, idx);
2670+
2671+
put_page(ind_page);
2672+
put_page(summary_page);
27472673
return summary_set ? 0 : 1;
27482674
}
27492675

@@ -2765,9 +2691,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
27652691
adapter = get_io_adapter(kvm, e->adapter.adapter_id);
27662692
if (!adapter)
27672693
return -1;
2768-
down_read(&adapter->maps_lock);
27692694
ret = adapter_indicators_set(kvm, adapter, &e->adapter);
2770-
up_read(&adapter->maps_lock);
27712695
if ((ret > 0) && !adapter->masked) {
27722696
ret = kvm_s390_inject_airq(kvm, adapter);
27732697
if (ret == 0)
@@ -2818,23 +2742,27 @@ int kvm_set_routing_entry(struct kvm *kvm,
28182742
struct kvm_kernel_irq_routing_entry *e,
28192743
const struct kvm_irq_routing_entry *ue)
28202744
{
2821-
int ret;
2745+
u64 uaddr;
28222746

28232747
switch (ue->type) {
2748+
/* we store the userspace addresses instead of the guest addresses */
28242749
case KVM_IRQ_ROUTING_S390_ADAPTER:
28252750
e->set = set_adapter_int;
2826-
e->adapter.summary_addr = ue->u.adapter.summary_addr;
2827-
e->adapter.ind_addr = ue->u.adapter.ind_addr;
2751+
uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
2752+
if (uaddr == -EFAULT)
2753+
return -EFAULT;
2754+
e->adapter.summary_addr = uaddr;
2755+
uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
2756+
if (uaddr == -EFAULT)
2757+
return -EFAULT;
2758+
e->adapter.ind_addr = uaddr;
28282759
e->adapter.summary_offset = ue->u.adapter.summary_offset;
28292760
e->adapter.ind_offset = ue->u.adapter.ind_offset;
28302761
e->adapter.adapter_id = ue->u.adapter.adapter_id;
2831-
ret = 0;
2832-
break;
2762+
return 0;
28332763
default:
2834-
ret = -EINVAL;
2764+
return -EINVAL;
28352765
}
2836-
2837-
return ret;
28382766
}
28392767

28402768
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,

0 commit comments

Comments
 (0)